| %line | %branch | |||||||||
|---|---|---|---|---|---|---|---|---|---|---|
| net.sf.classifier4J.bayesian.JDBCWordsDataSource |
|
|
| 1 | /* |
|
| 2 | * ==================================================================== |
|
| 3 | * |
|
| 4 | * The Apache Software License, Version 1.1 |
|
| 5 | * |
|
| 6 | * Copyright (c) 2003 Nick Lothian. All rights reserved. |
|
| 7 | * |
|
| 8 | * Redistribution and use in source and binary forms, with or without |
|
| 9 | * modification, are permitted provided that the following conditions |
|
| 10 | * are met: |
|
| 11 | * |
|
| 12 | * 1. Redistributions of source code must retain the above copyright |
|
| 13 | * notice, this list of conditions and the following disclaimer. |
|
| 14 | * |
|
| 15 | * 2. Redistributions in binary form must reproduce the above copyright |
|
| 16 | * notice, this list of conditions and the following disclaimer in |
|
| 17 | * the documentation and/or other materials provided with the |
|
| 18 | * distribution. |
|
| 19 | * |
|
| 20 | * 3. The end-user documentation included with the redistribution, if |
|
| 21 | * any, must include the following acknowlegement: |
|
| 22 | * "This product includes software developed by the |
|
| 23 | * developers of Classifier4J (http://classifier4j.sf.net/)." |
|
| 24 | * Alternately, this acknowlegement may appear in the software itself, |
|
| 25 | * if and wherever such third-party acknowlegements normally appear. |
|
| 26 | * |
|
| 27 | * 4. The name "Classifier4J" must not be used to endorse or promote |
|
| 28 | * products derived from this software without prior written |
|
| 29 | * permission. For written permission, please contact |
|
| 30 | * http://sourceforge.net/users/nicklothian/. |
|
| 31 | * |
|
| 32 | * 5. Products derived from this software may not be called |
|
| 33 | * "Classifier4J", nor may "Classifier4J" appear in their names |
|
| 34 | * without prior written permission. For written permission, please |
|
| 35 | * contact http://sourceforge.net/users/nicklothian/. |
|
| 36 | * |
|
| 37 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED |
|
| 38 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
|
| 39 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
|
| 40 | * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR |
|
| 41 | * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
| 42 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
| 43 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
|
| 44 | * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
|
| 45 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
|
| 46 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
|
| 47 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
| 48 | * SUCH DAMAGE. |
|
| 49 | * ==================================================================== |
|
| 50 | */ |
|
| 51 | ||
| 52 | package net.sf.classifier4J.bayesian; |
|
| 53 | ||
| 54 | import java.sql.Connection; |
|
| 55 | import java.sql.DatabaseMetaData; |
|
| 56 | import java.sql.PreparedStatement; |
|
| 57 | import java.sql.ResultSet; |
|
| 58 | import java.sql.SQLException; |
|
| 59 | import java.sql.Statement; |
|
| 60 | ||
| 61 | import org.apache.commons.logging.Log; |
|
| 62 | import org.apache.commons.logging.LogFactory; |
|
| 63 | ||
| 64 | import net.sf.classifier4J.ICategorisedClassifier; |
|
| 65 | ||
| 66 | /** |
|
| 67 | * |
|
| 68 | * <p>A JDBC based datasource. It requires a table of the following structure (tested in MySQL 4): |
|
| 69 | * |
|
| 70 | * <pre> |
|
| 71 | * CREATE TABLE word_probability ( |
|
| 72 | * word VARCHAR(255) NOT NULL, |
|
| 73 | * category VARCHAR(20) NOT NULL, |
|
| 74 | * match_count INT DEFAULT 0 NOT NULL, |
|
| 75 | * nonmatch_count INT DEFAULT 0 NOT NULL, |
|
| 76 | * PRIMARY KEY(word, category) |
|
| 77 | * ) |
|
| 78 | * </pre> |
|
| 79 | * |
|
| 80 | *</p> |
|
| 81 | *<p>It will truncate any word longer than 255 characters to 255 characters</p> |
|
| 82 | * |
|
| 83 | * @author Nick Lothian |
|
| 84 | * @author Peter Leschev |
|
| 85 | * |
|
| 86 | */ |
|
| 87 | public class JDBCWordsDataSource implements ICategorisedWordsDataSource { |
|
| 88 | ||
| 89 | IJDBCConnectionManager connectionManager; |
|
| 90 | ||
| 91 | 0 | private Log log = LogFactory.getLog(this.getClass()); |
| 92 | ||
| 93 | /** |
|
| 94 | * Create a JDBCWordsDataSource using the DEFAULT_CATEGORY ("DEFAULT") |
|
| 95 | * |
|
| 96 | * @param cm The connection manager to use |
|
| 97 | */ |
|
| 98 | 0 | public JDBCWordsDataSource(IJDBCConnectionManager cm) throws WordsDataSourceException { |
| 99 | 0 | this.connectionManager = cm; |
| 100 | 0 | createTable(); |
| 101 | 0 | } |
| 102 | ||
| 103 | public WordProbability getWordProbability(String category, String word) throws WordsDataSourceException { |
|
| 104 | ||
| 105 | 0 | WordProbability wp = null; |
| 106 | 0 | String method = "getWordProbability()"; |
| 107 | ||
| 108 | 0 | int matchingCount = 0; |
| 109 | 0 | int nonMatchingCount = 0; |
| 110 | ||
| 111 | 0 | Connection conn = null; |
| 112 | try { |
|
| 113 | 0 | conn = connectionManager.getConnection(); |
| 114 | 0 | PreparedStatement ps = conn.prepareStatement("SELECT match_count, nonmatch_count FROM word_probability WHERE word = ? AND category = ?"); |
| 115 | 0 | ps.setString(1, word); |
| 116 | 0 | ps.setString(2, category); |
| 117 | ||
| 118 | 0 | ResultSet rs = ps.executeQuery(); |
| 119 | 0 | if (rs.next()) { |
| 120 | 0 | matchingCount = rs.getInt("match_count"); |
| 121 | 0 | nonMatchingCount = rs.getInt("nonmatch_count"); |
| 122 | } |
|
| 123 | 0 | wp = new WordProbability(word, matchingCount, nonMatchingCount); |
| 124 | ||
| 125 | 0 | } catch (SQLException e) { |
| 126 | 0 | throw new WordsDataSourceException("Problem obtaining WordProbability from database", e); |
| 127 | } finally { |
|
| 128 | 0 | if (conn != null) { |
| 129 | try { |
|
| 130 | 0 | connectionManager.returnConnection(conn); |
| 131 | 0 | } catch (SQLException e1) { |
| 132 | // ignore |
|
| 133 | 0 | } |
| 134 | } |
|
| 135 | } |
|
| 136 | ||
| 137 | 0 | if (log.isDebugEnabled()) { |
| 138 | 0 | log.debug(method + " WordProbability loaded [" + wp + "]"); |
| 139 | } |
|
| 140 | ||
| 141 | 0 | return wp; |
| 142 | ||
| 143 | } |
|
| 144 | ||
| 145 | public WordProbability getWordProbability(String word) throws WordsDataSourceException { |
|
| 146 | 0 | return getWordProbability(ICategorisedClassifier.DEFAULT_CATEGORY, word); |
| 147 | } |
|
| 148 | ||
| 149 | private void updateWordProbability(String category, String word, boolean isMatch) throws WordsDataSourceException { |
|
| 150 | 0 | String fieldname = "nonmatch_count"; |
| 151 | 0 | if (isMatch) { |
| 152 | 0 | fieldname = "match_count"; |
| 153 | } |
|
| 154 | ||
| 155 | // truncate word at 255 characters |
|
| 156 | 0 | if (word.length() > 255) { |
| 157 | 0 | word = word.substring(0, 254); |
| 158 | } |
|
| 159 | ||
| 160 | 0 | Connection conn = null; |
| 161 | try { |
|
| 162 | 0 | conn = connectionManager.getConnection(); |
| 163 | 0 | PreparedStatement insertStatement = conn.prepareStatement("INSERT INTO word_probability (word, category) VALUES (?, ?)"); |
| 164 | 0 | PreparedStatement selectStatement = conn.prepareStatement("SELECT 1 FROM word_probability WHERE word = ? AND category = ?"); |
| 165 | 0 | PreparedStatement updateStatement = conn.prepareStatement("UPDATE word_probability SET " + fieldname + " = " + fieldname + " + 1 WHERE word = ? AND category = ?"); |
| 166 | ||
| 167 | 0 | selectStatement.setString(1, word); |
| 168 | 0 | selectStatement.setString(2, category); |
| 169 | 0 | ResultSet rs = selectStatement.executeQuery(); |
| 170 | 0 | if (!rs.next()) { |
| 171 | // word is not in table |
|
| 172 | // insert the word |
|
| 173 | 0 | insertStatement.setString(1, word); |
| 174 | 0 | insertStatement.setString(2, category); |
| 175 | 0 | insertStatement.execute(); |
| 176 | } |
|
| 177 | // update the word count |
|
| 178 | 0 | updateStatement.setString(1, word); |
| 179 | 0 | updateStatement.setString(2, category); |
| 180 | 0 | updateStatement.execute(); |
| 181 | ||
| 182 | 0 | } catch (SQLException e) { |
| 183 | 0 | throw new WordsDataSourceException("Problem updating WordProbability", e); |
| 184 | } finally { |
|
| 185 | 0 | if (conn != null) { |
| 186 | try { |
|
| 187 | 0 | connectionManager.returnConnection(conn); |
| 188 | 0 | } catch (SQLException e1) { |
| 189 | // ignore |
|
| 190 | 0 | } |
| 191 | } |
|
| 192 | } |
|
| 193 | ||
| 194 | 0 | } |
| 195 | ||
| 196 | public void addMatch(String category, String word) throws WordsDataSourceException { |
|
| 197 | 0 | if (category == null) { |
| 198 | 0 | throw new IllegalArgumentException("category cannot be null"); |
| 199 | } |
|
| 200 | 0 | updateWordProbability(category, word, true); |
| 201 | 0 | } |
| 202 | ||
| 203 | public void addMatch(String word) throws WordsDataSourceException { |
|
| 204 | 0 | updateWordProbability(ICategorisedClassifier.DEFAULT_CATEGORY, word, true); |
| 205 | 0 | } |
| 206 | ||
| 207 | public void addNonMatch(String category, String word) throws WordsDataSourceException { |
|
| 208 | 0 | if (category == null) { |
| 209 | 0 | throw new IllegalArgumentException("category cannot be null"); |
| 210 | } |
|
| 211 | 0 | updateWordProbability(category, word, false); |
| 212 | 0 | } |
| 213 | ||
| 214 | public void addNonMatch(String word) throws WordsDataSourceException { |
|
| 215 | 0 | updateWordProbability(ICategorisedClassifier.DEFAULT_CATEGORY, word, false); |
| 216 | 0 | } |
| 217 | ||
| 218 | /** |
|
| 219 | * Create the word_probability table if it does not already |
|
| 220 | * exist. Tested successfully with MySQL 4 & HSQLDB. See |
|
| 221 | * comments in code for Axion 1.0M1 issues. |
|
| 222 | * |
|
| 223 | * |
|
| 224 | * @throws WordsDataSourceException |
|
| 225 | */ |
|
| 226 | private void createTable() throws WordsDataSourceException { |
|
| 227 | 0 | Connection con = null; |
| 228 | try { |
|
| 229 | 0 | con = connectionManager.getConnection(); |
| 230 | ||
| 231 | // check if the word_probability table exists |
|
| 232 | 0 | DatabaseMetaData dbm = con.getMetaData(); |
| 233 | 0 | ResultSet rs = dbm.getTables(null, class="keyword">null, "word_probability", null); |
| 234 | 0 | if (!rs.next()) { |
| 235 | // the table does not exist |
|
| 236 | 0 | Statement stmt = con.createStatement(); |
| 237 | // Under Axion 1.0M1, use |
|
| 238 | // stmt.executeUpdate( "CREATE TABLE word_probability ( " |
|
| 239 | // + " word VARCHAR(255) NOT NULL," |
|
| 240 | // + " category VARCHAR(20) NOT NULL," |
|
| 241 | // + " match_count INTEGER NOT NULL," |
|
| 242 | // + " nonmatch_count INTEGER NOT NULL, " |
|
| 243 | // + " PRIMARY KEY(word, category) ) "); |
|
| 244 | 0 | stmt.executeUpdate("CREATE TABLE word_probability ( " |
| 245 | + " word VARCHAR(255) NOT NULL," |
|
| 246 | + " category VARCHAR(20) NOT NULL," |
|
| 247 | + " match_count INT DEFAULT 0 NOT NULL," |
|
| 248 | + " nonmatch_count INT DEFAULT 0 NOT NULL, " |
|
| 249 | + " PRIMARY KEY(word, category) ) "); |
|
| 250 | } |
|
| 251 | 0 | } catch (SQLException e) { |
| 252 | 0 | throw new WordsDataSourceException("Problem creating table", e); // we can't recover from this |
| 253 | } finally { |
|
| 254 | 0 | if (con != null) { |
| 255 | try { |
|
| 256 | 0 | connectionManager.returnConnection(con); |
| 257 | 0 | } catch (SQLException e1) { |
| 258 | // ignore |
|
| 259 | 0 | } |
| 260 | } |
|
| 261 | } |
|
| 262 | 0 | } |
| 263 | } |
| This report is generated by jcoverage, Maven and Maven JCoverage Plugin. |