1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52 package net.sf.classifier4J.bayesian;
53
54 import java.sql.Connection;
55 import java.sql.DatabaseMetaData;
56 import java.sql.PreparedStatement;
57 import java.sql.ResultSet;
58 import java.sql.SQLException;
59 import java.sql.Statement;
60
61 import org.apache.commons.logging.Log;
62 import org.apache.commons.logging.LogFactory;
63
64 import net.sf.classifier4J.ICategorisedClassifier;
65
66 /***
67 *
68 * <p>A JDBC based datasource. It requires a table of the following structure (tested in MySQL 4):
69 *
70 * <pre>
71 * CREATE TABLE word_probability (
72 * word VARCHAR(255) NOT NULL,
73 * category VARCHAR(20) NOT NULL,
74 * match_count INT DEFAULT 0 NOT NULL,
75 * nonmatch_count INT DEFAULT 0 NOT NULL,
76 * PRIMARY KEY(word, category)
77 * )
78 * </pre>
79 *
80 *</p>
81 *<p>It will truncate any word longer than 255 characters to 255 characters</p>
82 *
83 * @author Nick Lothian
84 * @author Peter Leschev
85 *
86 */
87 public class JDBCWordsDataSource implements ICategorisedWordsDataSource {
88
89 IJDBCConnectionManager connectionManager;
90
91 private Log log = LogFactory.getLog(this.getClass());
92
93 /***
94 * Create a JDBCWordsDataSource using the DEFAULT_CATEGORY ("DEFAULT")
95 *
96 * @param cm The connection manager to use
97 */
98 public JDBCWordsDataSource(IJDBCConnectionManager cm) throws WordsDataSourceException {
99 this.connectionManager = cm;
100 createTable();
101 }
102
103 public WordProbability getWordProbability(String category, String word) throws WordsDataSourceException {
104
105 WordProbability wp = null;
106 String method = "getWordProbability()";
107
108 int matchingCount = 0;
109 int nonMatchingCount = 0;
110
111 Connection conn = null;
112 try {
113 conn = connectionManager.getConnection();
114 PreparedStatement ps = conn.prepareStatement("SELECT match_count, nonmatch_count FROM word_probability WHERE word = ? AND category = ?");
115 ps.setString(1, word);
116 ps.setString(2, category);
117
118 ResultSet rs = ps.executeQuery();
119 if (rs.next()) {
120 matchingCount = rs.getInt("match_count");
121 nonMatchingCount = rs.getInt("nonmatch_count");
122 }
123 wp = new WordProbability(word, matchingCount, nonMatchingCount);
124
125 } catch (SQLException e) {
126 throw new WordsDataSourceException("Problem obtaining WordProbability from database", e);
127 } finally {
128 if (conn != null) {
129 try {
130 connectionManager.returnConnection(conn);
131 } catch (SQLException e1) {
132
133 }
134 }
135 }
136
137 if (log.isDebugEnabled()) {
138 log.debug(method + " WordProbability loaded [" + wp + "]");
139 }
140
141 return wp;
142
143 }
144
145 public WordProbability getWordProbability(String word) throws WordsDataSourceException {
146 return getWordProbability(ICategorisedClassifier.DEFAULT_CATEGORY, word);
147 }
148
149 private void updateWordProbability(String category, String word, boolean isMatch) throws WordsDataSourceException {
150 String fieldname = "nonmatch_count";
151 if (isMatch) {
152 fieldname = "match_count";
153 }
154
155
156 if (word.length() > 255) {
157 word = word.substring(0, 254);
158 }
159
160 Connection conn = null;
161 try {
162 conn = connectionManager.getConnection();
163 PreparedStatement insertStatement = conn.prepareStatement("INSERT INTO word_probability (word, category) VALUES (?, ?)");
164 PreparedStatement selectStatement = conn.prepareStatement("SELECT 1 FROM word_probability WHERE word = ? AND category = ?");
165 PreparedStatement updateStatement = conn.prepareStatement("UPDATE word_probability SET " + fieldname + " = " + fieldname + " + 1 WHERE word = ? AND category = ?");
166
167 selectStatement.setString(1, word);
168 selectStatement.setString(2, category);
169 ResultSet rs = selectStatement.executeQuery();
170 if (!rs.next()) {
171
172
173 insertStatement.setString(1, word);
174 insertStatement.setString(2, category);
175 insertStatement.execute();
176 }
177
178 updateStatement.setString(1, word);
179 updateStatement.setString(2, category);
180 updateStatement.execute();
181
182 } catch (SQLException e) {
183 throw new WordsDataSourceException("Problem updating WordProbability", e);
184 } finally {
185 if (conn != null) {
186 try {
187 connectionManager.returnConnection(conn);
188 } catch (SQLException e1) {
189
190 }
191 }
192 }
193
194 }
195
196 public void addMatch(String category, String word) throws WordsDataSourceException {
197 if (category == null) {
198 throw new IllegalArgumentException("category cannot be null");
199 }
200 updateWordProbability(category, word, true);
201 }
202
203 public void addMatch(String word) throws WordsDataSourceException {
204 updateWordProbability(ICategorisedClassifier.DEFAULT_CATEGORY, word, true);
205 }
206
207 public void addNonMatch(String category, String word) throws WordsDataSourceException {
208 if (category == null) {
209 throw new IllegalArgumentException("category cannot be null");
210 }
211 updateWordProbability(category, word, false);
212 }
213
214 public void addNonMatch(String word) throws WordsDataSourceException {
215 updateWordProbability(ICategorisedClassifier.DEFAULT_CATEGORY, word, false);
216 }
217
218 /***
219 * Create the word_probability table if it does not already
220 * exist. Tested successfully with MySQL 4 & HSQLDB. See
221 * comments in code for Axion 1.0M1 issues.
222 *
223 *
224 * @throws WordsDataSourceException
225 */
226 private void createTable() throws WordsDataSourceException {
227 Connection con = null;
228 try {
229 con = connectionManager.getConnection();
230
231
232 DatabaseMetaData dbm = con.getMetaData();
233 ResultSet rs = dbm.getTables(null, null, "word_probability", null);
234 if (!rs.next()) {
235
236 Statement stmt = con.createStatement();
237
238
239
240
241
242
243
244 stmt.executeUpdate("CREATE TABLE word_probability ( "
245 + " word VARCHAR(255) NOT NULL,"
246 + " category VARCHAR(20) NOT NULL,"
247 + " match_count INT DEFAULT 0 NOT NULL,"
248 + " nonmatch_count INT DEFAULT 0 NOT NULL, "
249 + " PRIMARY KEY(word, category) ) ");
250 }
251 } catch (SQLException e) {
252 throw new WordsDataSourceException("Problem creating table", e);
253 } finally {
254 if (con != null) {
255 try {
256 connectionManager.returnConnection(con);
257 } catch (SQLException e1) {
258
259 }
260 }
261 }
262 }
263 }