Coverage report

  %line %branch
net.sf.classifier4J.demo.Trainer
0% 
0% 

 1  
 /*
 2  
  * ====================================================================
 3  
  * 
 4  
  * The Apache Software License, Version 1.1
 5  
  *
 6  
  * Copyright (c) 2003 Nick Lothian. All rights reserved.
 7  
  *
 8  
  * Redistribution and use in source and binary forms, with or without
 9  
  * modification, are permitted provided that the following conditions
 10  
  * are met:
 11  
  *
 12  
  * 1. Redistributions of source code must retain the above copyright
 13  
  *    notice, this list of conditions and the following disclaimer. 
 14  
  *
 15  
  * 2. Redistributions in binary form must reproduce the above copyright
 16  
  *    notice, this list of conditions and the following disclaimer in
 17  
  *    the documentation and/or other materials provided with the
 18  
  *    distribution.
 19  
  *
 20  
  * 3. The end-user documentation included with the redistribution, if
 21  
  *    any, must include the following acknowlegement:  
 22  
  *       "This product includes software developed by the 
 23  
  *        developers of Classifier4J (http://classifier4j.sf.net/)."
 24  
  *    Alternately, this acknowlegement may appear in the software itself,
 25  
  *    if and wherever such third-party acknowlegements normally appear.
 26  
  *
 27  
  * 4. The name "Classifier4J" must not be used to endorse or promote 
 28  
  *    products derived from this software without prior written 
 29  
  *    permission. For written permission, please contact   
 30  
  *    http://sourceforge.net/users/nicklothian/.
 31  
  *
 32  
  * 5. Products derived from this software may not be called 
 33  
  *    "Classifier4J", nor may "Classifier4J" appear in their names 
 34  
  *    without prior written permission. For written permission, please 
 35  
  *    contact http://sourceforge.net/users/nicklothian/.
 36  
  *
 37  
  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 38  
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 39  
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 40  
  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 41  
  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 42  
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 43  
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 44  
  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 45  
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 46  
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 47  
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 48  
  * SUCH DAMAGE.
 49  
  * ====================================================================
 50  
  */
 51  
 package net.sf.classifier4J.demo;
 52  
 
 53  
 import java.io.File;
 54  
 import java.io.FileInputStream;
 55  
 import java.io.IOException;
 56  
 import java.io.InputStream;
 57  
 import java.sql.SQLException;
 58  
 
 59  
 import net.sf.classifier4J.ClassifierException;
 60  
 import net.sf.classifier4J.DefaultTokenizer;
 61  
 import net.sf.classifier4J.ITokenizer;
 62  
 import net.sf.classifier4J.ITrainableClassifier;
 63  
 import net.sf.classifier4J.Utilities;
 64  
 import net.sf.classifier4J.bayesian.BayesianClassifier;
 65  
 import net.sf.classifier4J.bayesian.JDBMWordsDataSource;
 66  
 
 67  
 /**
 68  
  * @author Nick Lothian
 69  
  * @author Peter Leschev
 70  
  */
 71  0
 public class Trainer {
 72  
 
 73  
     /**
 74  
      * Given an inputStream of data, a tokenizer this method trains the 
 75  
      * specified classifier.
 76  
      *
 77  
      * @returns Words Per Second 
 78  
      */
 79  
     public static double trainClassifier(ITokenizer tokenizer,
 80  
                                          ITrainableClassifier classifier, 
 81  
                                          boolean isMatch, 
 82  
                                          InputStream inputStream) throws IOException, ClassifierException {
 83  
             
 84  
 //        System.out.println("Training Classifier4J using " + classifier + " and " +
 85  
 //                           tokenizer);
 86  
 
 87  0
         String contents = Utilities.getString(inputStream);
 88  0
         int length = tokenizer.tokenize(contents).length;
 89  
 
 90  0
         long startTime = System.currentTimeMillis();
 91  
         
 92  0
         if (isMatch) {
 93  
 //            System.out.println(length + 
 94  
 //                               " matching words. This may take a while.");
 95  0
             classifier.teachMatch(contents);
 96  
         } else {
 97  
 //            System.out.println(length + 
 98  
 //                               " non-matching words. This may take a while.");
 99  0
             classifier.teachNonMatch(contents);
 100  
         }
 101  
         
 102  0
         long endTime = System.currentTimeMillis();
 103  
         
 104  0
         double time = (class="keyword">double)(endTime - startTime) / (class="keyword">double)1000;
 105  
         
 106  0
         if (Double.compare(time, 0) == 0) {
 107  0
             time = 1;
 108  
         }
 109  
         
 110  0
         double wordsPerSecond = length / time;
 111  
         
 112  
 //        System.out.println("Done. Took " + time + " seconds, which is " + 
 113  
 //                           wordsPerSecond + " words per second.");
 114  
         
 115  0
         return wordsPerSecond;
 116  
     }
 117  
 
 118  0
     public static String connectionString = "jdbc:hsqldb:./database/";
 119  0
     public static String username = "sa";
 120  0
     public static String password = "";
 121  
 
 122  
     static JDBMWordsDataSource wds;
 123  
 
 124  
     private static ITrainableClassifier setupClassifier(String connString, String user, String pw) throws SQLException, IOException {
 125  
 /*
 126  
 DriverMangerJDBCConnectionManager cm = new DriverMangerJDBCConnectionManager(connString, user, pw);
 127  
 JDBCWordsDataSource wds = new JDBCWordsDataSource(cm);
 128  
 wds.createTable();
 129  
 */
 130  0
         wds = new JDBMWordsDataSource("./database/");
 131  0
         wds.open();
 132  0
         return new BayesianClassifier(wds);
 133  
     }
 134  
 
 135  
     public static void main(String[] args) throws Exception {
 136  0
         System.out.println("This program reads in two files, one of which is considered to define a match.");
 137  0
         System.out.println("These two files are analysed by Classifier4J and the resulting word probabilities are loaded into a JDBM database.");
 138  0
         System.out.println("");
 139  0
         System.out.println("To reset the word probabilities, delete the \"database\" directory which is created.");
 140  
 
 141  0
         File dir = new File("./database");
 142  0
         dir.mkdir();
 143  
 
 144  0
         ITrainableClassifier classifier = setupClassifier(connectionString, username, password);
 145  0
         ITokenizer tokenizer = new DefaultTokenizer();
 146  
         
 147  0
         trainClassifier(tokenizer, 
 148  
                         classifier, 
 149  
                         true, 
 150  
                         new FileInputStream("./demodata/match.txt"));
 151  
         
 152  0
         trainClassifier(tokenizer, 
 153  
                         classifier,
 154  
                         false, 
 155  
                         new FileInputStream("./demodata/nonmatch.txt"));
 156  
 		
 157  0
         wds.close();
 158  0
     }
 159  
 	/*
 160  
 	static {
 161  
 		try {
 162  
 			Class.forName("org.hsqldb.jdbcDriver");
 163  
 		} catch (ClassNotFoundException e) {
 164  
 			e.printStackTrace();
 165  
 		}
 166  
 	}
 167  
 	*/
 168  
 }

This report is generated by jcoverage, Maven and Maven JCoverage Plugin.