1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52 package net.sf.classifier4J.bayesian;
53
54 import java.io.IOException;
55
56 import jdbm.btree.BTree;
57 import jdbm.helper.MRU;
58 import jdbm.helper.ObjectCache;
59 import jdbm.helper.StringComparator;
60 import jdbm.recman.RecordManager;
61 import net.sf.classifier4J.ICategorisedClassifier;
62
63 import org.apache.commons.logging.Log;
64 import org.apache.commons.logging.LogFactory;
65
66 public class JDBMWordsDataSource implements ICategorisedWordsDataSource {
67 Log log = LogFactory.getLog(this.getClass());
68
69 RecordManager recordManager = null;
70 BTree tree = null;
71
72 String dir = ".";
73 static String databaseName = "wordprobs";
74 static String tableName = "wordprobabilities";
75
76 public JDBMWordsDataSource() {
77 }
78
79 public JDBMWordsDataSource(String directory) {
80 this.dir = directory;
81 }
82
83 public void close() {
84 if (recordManager != null) {
85 try {
86 recordManager.commit();
87 } catch (IOException e) {
88
89 }
90 try {
91 recordManager.close();
92 } catch (IOException e1) {
93
94 }
95 }
96 }
97
98 public void open() throws IOException {
99 recordManager = new RecordManager(dir + "/" + databaseName);
100 ObjectCache cache = new ObjectCache(recordManager, new MRU(100));
101
102 long recid = recordManager.getNamedObject(tableName);
103 if (recid != 0) {
104
105 tree = BTree.load(recordManager, cache, recid);
106 } else {
107
108 tree = new BTree(recordManager, cache, new StringComparator());
109 recordManager.setNamedObject(tableName, tree.getRecid());
110 }
111 }
112
113 /***
114 * @see net.sf.classifier4J.bayesian.IWordsDataSource#addMatch(java.lang.String)
115 */
116 public void addMatch(String word) {
117 addMatch(ICategorisedClassifier.DEFAULT_CATEGORY, word);
118 }
119
120 /***
121 * @see net.sf.classifier4J.bayesian.IWordsDataSource#addNonMatch(java.lang.String)
122 */
123 public void addNonMatch(String word) {
124 addNonMatch(ICategorisedClassifier.DEFAULT_CATEGORY, word);
125 }
126
127 /***
128 * @see net.sf.classifier4J.bayesian.ICategorisedWordsDataSource#addMatch(java.lang.String, java.lang.String)
129 */
130 public void addMatch(String category, String word) {
131 try {
132 WordProbability wp = getWordProbability(category, word);
133 if (wp == null) {
134 wp = new WordProbability(word, 1, 0);
135 } else {
136 wp.setMatchingCount(wp.getMatchingCount() + 1);
137 }
138 tree.insert(getKey(category, word), wp, true);
139 } catch (IOException e) {
140 log.error("Error with JDBM datasource", e);
141 throw new RuntimeException("Error with JDBM datasource");
142 }
143
144 }
145
146 /***
147 * @see net.sf.classifier4J.bayesian.ICategorisedWordsDataSource#addNonMatch(java.lang.String, java.lang.String)
148 */
149 public void addNonMatch(String category, String word) {
150 try {
151 WordProbability wp = getWordProbability(category, word);
152 if (wp == null) {
153 wp = new WordProbability(word, 0, 1);
154 } else {
155 wp.setNonMatchingCount(wp.getNonMatchingCount() + 1);
156 }
157 tree.insert(getKey(category, word), wp, true);
158 } catch (IOException e) {
159 log.error("Error with JDBM datasource", e);
160 throw new RuntimeException("Error with JDBM datasource");
161 }
162 }
163
164 /***
165 * @see net.sf.classifier4J.bayesian.IWordsDataSource#getWordProbability(java.lang.String)
166 */
167 public WordProbability getWordProbability(String word) {
168 return getWordProbability(ICategorisedClassifier.DEFAULT_CATEGORY, word);
169 }
170
171 /***
172 * @see net.sf.classifier4J.bayesian.ICategorisedWordsDataSource#getWordProbability(java.lang.String, java.lang.String)
173 */
174 public WordProbability getWordProbability(String category, String word) {
175 try {
176 return (WordProbability) tree.find(getKey(category,word));
177 } catch (IOException e) {
178 log.error("Error in JDBM datasource", e);
179 throw new RuntimeException("Error in JDBM datasource");
180 }
181 }
182
183 /***
184 *
185 * @param category The category, or null for the default
186 * @param word The word, cannot be null
187 * @return the key for the category and word. By default this is "category : word"
188 * @throws IllegalArgumentException if word is null
189 */
190 protected String getKey(String category, String word) throws IllegalArgumentException {
191 if (word == null) {
192 throw new IllegalArgumentException("Word cannot be null");
193 }
194 StringBuffer result = new StringBuffer("");
195 if (category == null) {
196 result.append(ICategorisedClassifier.DEFAULT_CATEGORY);
197 } else {
198 result.append(category);
199 }
200 result.append(" : ");
201 result.append(word);
202
203 return result.toString();
204 }
205
206 protected void finalize() throws Throwable {
207 close();
208 }
209
210
211 }