| %line | %branch | |||||||||
|---|---|---|---|---|---|---|---|---|---|---|
| net.sf.classifier4J.demo.Trainer | 
 | 
 | 
| 1 |  /* | |
| 2 |   * ==================================================================== | |
| 3 |   *  | |
| 4 |   * The Apache Software License, Version 1.1 | |
| 5 |   * | |
| 6 |   * Copyright (c) 2003 Nick Lothian. All rights reserved. | |
| 7 |   * | |
| 8 |   * Redistribution and use in source and binary forms, with or without | |
| 9 |   * modification, are permitted provided that the following conditions | |
| 10 |   * are met: | |
| 11 |   * | |
| 12 |   * 1. Redistributions of source code must retain the above copyright | |
| 13 |   *    notice, this list of conditions and the following disclaimer.  | |
| 14 |   * | |
| 15 |   * 2. Redistributions in binary form must reproduce the above copyright | |
| 16 |   *    notice, this list of conditions and the following disclaimer in | |
| 17 |   *    the documentation and/or other materials provided with the | |
| 18 |   *    distribution. | |
| 19 |   * | |
| 20 |   * 3. The end-user documentation included with the redistribution, if | |
| 21 |   *    any, must include the following acknowlegement:   | |
| 22 |   *       "This product includes software developed by the  | |
| 23 |   *        developers of Classifier4J (http://classifier4j.sf.net/)." | |
| 24 |   *    Alternately, this acknowlegement may appear in the software itself, | |
| 25 |   *    if and wherever such third-party acknowlegements normally appear. | |
| 26 |   * | |
| 27 |   * 4. The name "Classifier4J" must not be used to endorse or promote  | |
| 28 |   *    products derived from this software without prior written  | |
| 29 |   *    permission. For written permission, please contact    | |
| 30 |   *    http://sourceforge.net/users/nicklothian/. | |
| 31 |   * | |
| 32 |   * 5. Products derived from this software may not be called  | |
| 33 |   *    "Classifier4J", nor may "Classifier4J" appear in their names  | |
| 34 |   *    without prior written permission. For written permission, please  | |
| 35 |   *    contact http://sourceforge.net/users/nicklothian/. | |
| 36 |   * | |
| 37 |   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED | |
| 38 |   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |
| 39 |   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 40 |   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR | |
| 41 |   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 42 |   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| 43 |   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF | |
| 44 |   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |
| 45 |   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 46 |   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | |
| 47 |   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 48 |   * SUCH DAMAGE. | |
| 49 |   * ==================================================================== | |
| 50 |   */ | |
| 51 |  package net.sf.classifier4J.demo; | |
| 52 | ||
| 53 |  import java.io.File; | |
| 54 |  import java.io.FileInputStream; | |
| 55 |  import java.io.IOException; | |
| 56 |  import java.io.InputStream; | |
| 57 |  import java.sql.SQLException; | |
| 58 | ||
| 59 |  import net.sf.classifier4J.ClassifierException; | |
| 60 |  import net.sf.classifier4J.DefaultTokenizer; | |
| 61 |  import net.sf.classifier4J.ITokenizer; | |
| 62 |  import net.sf.classifier4J.ITrainableClassifier; | |
| 63 |  import net.sf.classifier4J.Utilities; | |
| 64 |  import net.sf.classifier4J.bayesian.BayesianClassifier; | |
| 65 |  import net.sf.classifier4J.bayesian.JDBMWordsDataSource; | |
| 66 | ||
| 67 |  /** | |
| 68 |   * @author Nick Lothian | |
| 69 |   * @author Peter Leschev | |
| 70 |   */ | |
| 71 | 0 |  public class Trainer { | 
| 72 | ||
| 73 |      /** | |
| 74 |       * Given an inputStream of data, a tokenizer this method trains the  | |
| 75 |       * specified classifier. | |
| 76 |       * | |
| 77 |       * @returns Words Per Second  | |
| 78 |       */ | |
| 79 | public static double trainClassifier(ITokenizer tokenizer, | |
| 80 | ITrainableClassifier classifier, | |
| 81 |                                           boolean isMatch,  | |
| 82 |                                           InputStream inputStream) throws IOException, ClassifierException { | |
| 83 | ||
| 84 |  //        System.out.println("Training Classifier4J using " + classifier + " and " + | |
| 85 |  //                           tokenizer); | |
| 86 | ||
| 87 | 0 |          String contents = Utilities.getString(inputStream); | 
| 88 | 0 |          int length = tokenizer.tokenize(contents).length; | 
| 89 | ||
| 90 | 0 |          long startTime = System.currentTimeMillis(); | 
| 91 | ||
| 92 | 0 |          if (isMatch) { | 
| 93 |  //            System.out.println(length +  | |
| 94 |  //                               " matching words. This may take a while."); | |
| 95 | 0 |              classifier.teachMatch(contents); | 
| 96 |          } else { | |
| 97 |  //            System.out.println(length +  | |
| 98 |  //                               " non-matching words. This may take a while."); | |
| 99 | 0 |              classifier.teachNonMatch(contents); | 
| 100 | } | |
| 101 | ||
| 102 | 0 |          long endTime = System.currentTimeMillis(); | 
| 103 | ||
| 104 | 0 | double time = (class="keyword">double)(endTime - startTime) / (class="keyword">double)1000; | 
| 105 | ||
| 106 | 0 |          if (Double.compare(time, 0) == 0) { | 
| 107 | 0 |              time = 1; | 
| 108 | } | |
| 109 | ||
| 110 | 0 |          double wordsPerSecond = length / time; | 
| 111 | ||
| 112 |  //        System.out.println("Done. Took " + time + " seconds, which is " +  | |
| 113 |  //                           wordsPerSecond + " words per second."); | |
| 114 | ||
| 115 | 0 |          return wordsPerSecond; | 
| 116 | } | |
| 117 | ||
| 118 | 0 |      public static String connectionString = "jdbc:hsqldb:./database/"; | 
| 119 | 0 |      public static String username = "sa"; | 
| 120 | 0 |      public static String password = ""; | 
| 121 | ||
| 122 |      static JDBMWordsDataSource wds; | |
| 123 | ||
| 124 | private static ITrainableClassifier setupClassifier(String connString, String user, String pw) throws SQLException, IOException { | |
| 125 |  /* | |
| 126 |  DriverMangerJDBCConnectionManager cm = new DriverMangerJDBCConnectionManager(connString, user, pw); | |
| 127 |  JDBCWordsDataSource wds = new JDBCWordsDataSource(cm); | |
| 128 |  wds.createTable(); | |
| 129 |  */ | |
| 130 | 0 |          wds = new JDBMWordsDataSource("./database/"); | 
| 131 | 0 |          wds.open(); | 
| 132 | 0 |          return new BayesianClassifier(wds); | 
| 133 | } | |
| 134 | ||
| 135 | public static void main(String[] args) throws Exception { | |
| 136 | 0 |          System.out.println("This program reads in two files, one of which is considered to define a match."); | 
| 137 | 0 |          System.out.println("These two files are analysed by Classifier4J and the resulting word probabilities are loaded into a JDBM database."); | 
| 138 | 0 |          System.out.println(""); | 
| 139 | 0 |          System.out.println("To reset the word probabilities, delete the \"database\" directory which is created."); | 
| 140 | ||
| 141 | 0 |          File dir = new File("./database"); | 
| 142 | 0 |          dir.mkdir(); | 
| 143 | ||
| 144 | 0 |          ITrainableClassifier classifier = setupClassifier(connectionString, username, password); | 
| 145 | 0 |          ITokenizer tokenizer = new DefaultTokenizer(); | 
| 146 | ||
| 147 | 0 |          trainClassifier(tokenizer,  | 
| 148 | classifier, | |
| 149 |                          true,  | |
| 150 | new FileInputStream("./demodata/match.txt")); | |
| 151 | ||
| 152 | 0 |          trainClassifier(tokenizer,  | 
| 153 | classifier, | |
| 154 | false, | |
| 155 | new FileInputStream("./demodata/nonmatch.txt")); | |
| 156 | ||
| 157 | 0 |          wds.close(); | 
| 158 | 0 |      } | 
| 159 |  	/* | |
| 160 |  	static { | |
| 161 |  		try { | |
| 162 |  			Class.forName("org.hsqldb.jdbcDriver"); | |
| 163 |  		} catch (ClassNotFoundException e) { | |
| 164 |  			e.printStackTrace(); | |
| 165 |  		} | |
| 166 |  	} | |
| 167 |  	*/ | |
| 168 | } | 
| This report is generated by jcoverage, Maven and Maven JCoverage Plugin. |