| %line | %branch | |||||||||
|---|---|---|---|---|---|---|---|---|---|---|
| net.sf.classifier4J.demo.Analyser |
|
|
| 1 | /* |
|
| 2 | * ==================================================================== |
|
| 3 | * |
|
| 4 | * The Apache Software License, Version 1.1 |
|
| 5 | * |
|
| 6 | * Copyright (c) 2003 Nick Lothian. All rights reserved. |
|
| 7 | * |
|
| 8 | * Redistribution and use in source and binary forms, with or without |
|
| 9 | * modification, are permitted provided that the following conditions |
|
| 10 | * are met: |
|
| 11 | * |
|
| 12 | * 1. Redistributions of source code must retain the above copyright |
|
| 13 | * notice, this list of conditions and the following disclaimer. |
|
| 14 | * |
|
| 15 | * 2. Redistributions in binary form must reproduce the above copyright |
|
| 16 | * notice, this list of conditions and the following disclaimer in |
|
| 17 | * the documentation and/or other materials provided with the |
|
| 18 | * distribution. |
|
| 19 | * |
|
| 20 | * 3. The end-user documentation included with the redistribution, if |
|
| 21 | * any, must include the following acknowlegement: |
|
| 22 | * "This product includes software developed by the |
|
| 23 | * developers of Classifier4J (http://classifier4j.sf.net/)." |
|
| 24 | * Alternately, this acknowlegement may appear in the software itself, |
|
| 25 | * if and wherever such third-party acknowlegements normally appear. |
|
| 26 | * |
|
| 27 | * 4. The name "Classifier4J" must not be used to endorse or promote |
|
| 28 | * products derived from this software without prior written |
|
| 29 | * permission. For written permission, please contact |
|
| 30 | * http://sourceforge.net/users/nicklothian/. |
|
| 31 | * |
|
| 32 | * 5. Products derived from this software may not be called |
|
| 33 | * "Classifier4J", nor may "Classifier4J" appear in their names |
|
| 34 | * without prior written permission. For written permission, please |
|
| 35 | * contact http://sourceforge.net/users/nicklothian/. |
|
| 36 | * |
|
| 37 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED |
|
| 38 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
|
| 39 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
|
| 40 | * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR |
|
| 41 | * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
| 42 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
| 43 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
|
| 44 | * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
|
| 45 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
|
| 46 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
|
| 47 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
| 48 | * SUCH DAMAGE. |
|
| 49 | * ==================================================================== |
|
| 50 | */ |
|
| 51 | package net.sf.classifier4J.demo; |
|
| 52 | ||
| 53 | import java.io.FileInputStream; |
|
| 54 | import java.io.IOException; |
|
| 55 | import java.io.InputStream; |
|
| 56 | import java.sql.SQLException; |
|
| 57 | ||
| 58 | import net.sf.classifier4J.ClassifierException; |
|
| 59 | import net.sf.classifier4J.DefaultTokenizer; |
|
| 60 | import net.sf.classifier4J.IClassifier; |
|
| 61 | import net.sf.classifier4J.ITokenizer; |
|
| 62 | import net.sf.classifier4J.Utilities; |
|
| 63 | import net.sf.classifier4J.bayesian.BayesianClassifier; |
|
| 64 | import net.sf.classifier4J.bayesian.JDBMWordsDataSource; |
|
| 65 | ||
| 66 | /** |
|
| 67 | * @author Nick Lothian |
|
| 68 | * @author Peter Leschev |
|
| 69 | */ |
|
| 70 | 0 | public class Analyser { |
| 71 | ||
| 72 | 0 | public static String connectionString = Trainer.connectionString; |
| 73 | 0 | public static String username = Trainer.username; |
| 74 | 0 | public static String password = Trainer.password; |
| 75 | ||
| 76 | static JDBMWordsDataSource wds; |
|
| 77 | ||
| 78 | private static IClassifier setupClassifier(ITokenizer tokenizer, String connString, String user, String pw) throws SQLException, IOException { |
|
| 79 | /* |
|
| 80 | DriverMangerJDBCConnectionManager cm = new DriverMangerJDBCConnectionManager(connString, user, pw); |
|
| 81 | JDBCWordsDataSource wds = new JDBCWordsDataSource(cm); |
|
| 82 | wds.createTable(); |
|
| 83 | */ |
|
| 84 | 0 | wds = new JDBMWordsDataSource("./database/"); |
| 85 | 0 | wds.open(); |
| 86 | 0 | return new BayesianClassifier(wds, tokenizer); |
| 87 | } |
|
| 88 | ||
| 89 | /** |
|
| 90 | * @returns Words Per Second |
|
| 91 | */ |
|
| 92 | public static double useClassifier(ITokenizer tokenizer, |
|
| 93 | IClassifier classifier, |
|
| 94 | InputStream inputStream) throws IOException, ClassifierException { |
|
| 95 | ||
| 96 | // System.out.println("Using Classifier4J with " + classifier + " and " + |
|
| 97 | // tokenizer); |
|
| 98 | ||
| 99 | 0 | String contents = Utilities.getString(inputStream); |
| 100 | 0 | int length = tokenizer.tokenize(contents).length; |
| 101 | ||
| 102 | // System.out.println("Analysing " + length + " words. This may take a while."); |
|
| 103 | ||
| 104 | 0 | long startTime = System.currentTimeMillis(); |
| 105 | ||
| 106 | 0 | double matchProb = classifier.classify(contents); |
| 107 | ||
| 108 | 0 | long endTime = System.currentTimeMillis(); |
| 109 | ||
| 110 | 0 | double time = (class="keyword">double)(endTime - startTime) / (class="keyword">double)1000; |
| 111 | ||
| 112 | 0 | if (Double.compare(time, 0) == 0) { |
| 113 | 0 | time = 1; |
| 114 | } |
|
| 115 | ||
| 116 | 0 | double wordsPerSecond = length / time; |
| 117 | ||
| 118 | // System.out.println("Done. Took " + time + " seconds, which is " + |
|
| 119 | // wordsPerSecond + " words per second."); |
|
| 120 | ||
| 121 | // System.out.println("Match Probability = " + matchProb); |
|
| 122 | // System.out.println("Is considered a match: " + classifier.isMatch(matchProb)); |
|
| 123 | ||
| 124 | 0 | return wordsPerSecond; |
| 125 | } |
|
| 126 | ||
| 127 | public static void main(String[] args) throws Exception { |
|
| 128 | 0 | System.out.println("This program reads in a single file and classifies it as a match or not."); |
| 129 | 0 | System.out.println("It should be run after the Trainer program."); |
| 130 | ||
| 131 | ||
| 132 | 0 | String filename = "./demodata/toanalyse.txt"; |
| 133 | ||
| 134 | ||
| 135 | 0 | InputStream input = new FileInputStream(filename); |
| 136 | 0 | ITokenizer tokenizer = new DefaultTokenizer(); |
| 137 | 0 | IClassifier classifier = setupClassifier(tokenizer, connectionString, username, password); |
| 138 | ||
| 139 | 0 | useClassifier(tokenizer, classifier, input); |
| 140 | ||
| 141 | 0 | wds.close(); |
| 142 | 0 | } |
| 143 | /* |
|
| 144 | static { |
|
| 145 | try { |
|
| 146 | Class.forName("org.hsqldb.jdbcDriver"); |
|
| 147 | //Class.forName("org.gjt.mm.mysql.Driver"); |
|
| 148 | } catch (ClassNotFoundException e) { |
|
| 149 | e.printStackTrace(); |
|
| 150 | } |
|
| 151 | } |
|
| 152 | */ |
|
| 153 | } |
| This report is generated by jcoverage, Maven and Maven JCoverage Plugin. |