1 /* 2 * ==================================================================== 3 * 4 * The Apache Software License, Version 1.1 5 * 6 * Copyright (c) 2003 Nick Lothian. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 20 * 3. The end-user documentation included with the redistribution, if 21 * any, must include the following acknowlegement: 22 * "This product includes software developed by the 23 * developers of Classifier4J (http://classifier4j.sf.net/)." 24 * Alternately, this acknowlegement may appear in the software itself, 25 * if and wherever such third-party acknowlegements normally appear. 26 * 27 * 4. The name "Classifier4J" must not be used to endorse or promote 28 * products derived from this software without prior written 29 * permission. For written permission, please contact 30 * http://sourceforge.net/users/nicklothian/. 31 * 32 * 5. Products derived from this software may not be called 33 * "Classifier4J", nor may "Classifier4J" appear in their names 34 * without prior written permission. For written permission, please 35 * contact http://sourceforge.net/users/nicklothian/. 36 * 37 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 38 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 39 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 40 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR 41 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 44 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 45 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 46 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 47 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 48 * SUCH DAMAGE. 49 * ==================================================================== 50 */ 51 package net.sf.classifier4J.demo; 52 53 import java.io.FileInputStream; 54 import java.io.IOException; 55 import java.io.InputStream; 56 import java.sql.SQLException; 57 58 import net.sf.classifier4J.ClassifierException; 59 import net.sf.classifier4J.DefaultTokenizer; 60 import net.sf.classifier4J.IClassifier; 61 import net.sf.classifier4J.ITokenizer; 62 import net.sf.classifier4J.Utilities; 63 import net.sf.classifier4J.bayesian.BayesianClassifier; 64 import net.sf.classifier4J.bayesian.JDBMWordsDataSource; 65 66 /*** 67 * @author Nick Lothian 68 * @author Peter Leschev 69 */ 70 public class Analyser { 71 72 public static String connectionString = Trainer.connectionString; 73 public static String username = Trainer.username; 74 public static String password = Trainer.password; 75 76 static JDBMWordsDataSource wds; 77 78 private static IClassifier setupClassifier(ITokenizer tokenizer, String connString, String user, String pw) throws SQLException, IOException { 79 /* 80 DriverMangerJDBCConnectionManager cm = new DriverMangerJDBCConnectionManager(connString, user, pw); 81 JDBCWordsDataSource wds = new JDBCWordsDataSource(cm); 82 wds.createTable(); 83 */ 84 wds = new JDBMWordsDataSource("./database/"); 85 wds.open(); 86 return new BayesianClassifier(wds, tokenizer); 87 } 88 89 /*** 90 * @returns Words Per Second 91 */ 92 public static double useClassifier(ITokenizer tokenizer, 93 IClassifier classifier, 94 InputStream inputStream) throws IOException, ClassifierException { 95 96 // System.out.println("Using Classifier4J with " + classifier + " and " + 97 // tokenizer); 98 99 String contents = Utilities.getString(inputStream); 100 int length = tokenizer.tokenize(contents).length; 101 102 // System.out.println("Analysing " + length + " words. This may take a while."); 103 104 long startTime = System.currentTimeMillis(); 105 106 double matchProb = classifier.classify(contents); 107 108 long endTime = System.currentTimeMillis(); 109 110 double time = (double)(endTime - startTime) / (double)1000; 111 112 if (Double.compare(time, 0) == 0) { 113 time = 1; 114 } 115 116 double wordsPerSecond = length / time; 117 118 // System.out.println("Done. Took " + time + " seconds, which is " + 119 // wordsPerSecond + " words per second."); 120 121 // System.out.println("Match Probability = " + matchProb); 122 // System.out.println("Is considered a match: " + classifier.isMatch(matchProb)); 123 124 return wordsPerSecond; 125 } 126 127 public static void main(String[] args) throws Exception { 128 System.out.println("This program reads in a single file and classifies it as a match or not."); 129 System.out.println("It should be run after the Trainer program."); 130 131 132 String filename = "./demodata/toanalyse.txt"; 133 134 135 InputStream input = new FileInputStream(filename); 136 ITokenizer tokenizer = new DefaultTokenizer(); 137 IClassifier classifier = setupClassifier(tokenizer, connectionString, username, password); 138 139 useClassifier(tokenizer, classifier, input); 140 141 wds.close(); 142 } 143 /* 144 static { 145 try { 146 Class.forName("org.hsqldb.jdbcDriver"); 147 //Class.forName("org.gjt.mm.mysql.Driver"); 148 } catch (ClassNotFoundException e) { 149 e.printStackTrace(); 150 } 151 } 152 */ 153 }