%line | %branch | |||||||||
---|---|---|---|---|---|---|---|---|---|---|
net.sf.classifier4J.bayesian.JDBMWordsDataSource |
|
|
1 | /* |
|
2 | * ==================================================================== |
|
3 | * |
|
4 | * The Apache Software License, Version 1.1 |
|
5 | * |
|
6 | * Copyright (c) 2003 Nick Lothian. All rights reserved. |
|
7 | * |
|
8 | * Redistribution and use in source and binary forms, with or without |
|
9 | * modification, are permitted provided that the following conditions |
|
10 | * are met: |
|
11 | * |
|
12 | * 1. Redistributions of source code must retain the above copyright |
|
13 | * notice, this list of conditions and the following disclaimer. |
|
14 | * |
|
15 | * 2. Redistributions in binary form must reproduce the above copyright |
|
16 | * notice, this list of conditions and the following disclaimer in |
|
17 | * the documentation and/or other materials provided with the |
|
18 | * distribution. |
|
19 | * |
|
20 | * 3. The end-user documentation included with the redistribution, if |
|
21 | * any, must include the following acknowlegement: |
|
22 | * "This product includes software developed by the |
|
23 | * developers of Classifier4J (http://classifier4j.sf.net/)." |
|
24 | * Alternately, this acknowlegement may appear in the software itself, |
|
25 | * if and wherever such third-party acknowlegements normally appear. |
|
26 | * |
|
27 | * 4. The name "Classifier4J" must not be used to endorse or promote |
|
28 | * products derived from this software without prior written |
|
29 | * permission. For written permission, please contact |
|
30 | * http://sourceforge.net/users/nicklothian/. |
|
31 | * |
|
32 | * 5. Products derived from this software may not be called |
|
33 | * "Classifier4J", nor may "Classifier4J" appear in their names |
|
34 | * without prior written permission. For written permission, please |
|
35 | * contact http://sourceforge.net/users/nicklothian/. |
|
36 | * |
|
37 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED |
|
38 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
|
39 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
|
40 | * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR |
|
41 | * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
42 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
43 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
|
44 | * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
|
45 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
|
46 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
|
47 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
48 | * SUCH DAMAGE. |
|
49 | * ==================================================================== |
|
50 | */ |
|
51 | ||
52 | package net.sf.classifier4J.bayesian; |
|
53 | ||
54 | import java.io.IOException; |
|
55 | ||
56 | import jdbm.btree.BTree; |
|
57 | import jdbm.helper.MRU; |
|
58 | import jdbm.helper.ObjectCache; |
|
59 | import jdbm.helper.StringComparator; |
|
60 | import jdbm.recman.RecordManager; |
|
61 | import net.sf.classifier4J.ICategorisedClassifier; |
|
62 | ||
63 | import org.apache.commons.logging.Log; |
|
64 | import org.apache.commons.logging.LogFactory; |
|
65 | ||
66 | public class JDBMWordsDataSource implements ICategorisedWordsDataSource { |
|
67 | 7 | Log log = LogFactory.getLog(this.getClass()); |
68 | ||
69 | 7 | RecordManager recordManager = null; |
70 | 7 | BTree tree = null; |
71 | ||
72 | 7 | String dir = "."; |
73 | 1 | static String databaseName = "wordprobs"; |
74 | 1 | static String tableName = "wordprobabilities"; |
75 | ||
76 | 7 | public JDBMWordsDataSource() { |
77 | 7 | } |
78 | ||
79 | 0 | public JDBMWordsDataSource(String directory) { |
80 | 0 | this.dir = directory; |
81 | 0 | } |
82 | ||
83 | public void close() { |
|
84 | 10 | if (recordManager != null) { |
85 | try { |
|
86 | 10 | recordManager.commit(); |
87 | 7 | } catch (IOException e) { |
88 | // do nothing |
|
89 | 0 | } |
90 | try { |
|
91 | 7 | recordManager.close(); |
92 | 7 | } catch (IOException e1) { |
93 | // do nothing |
|
94 | 0 | } |
95 | } |
|
96 | 7 | } |
97 | ||
98 | public void open() throws IOException { |
|
99 | 7 | recordManager = new RecordManager(dir + "/" + databaseName); |
100 | 7 | ObjectCache cache = new ObjectCache(recordManager, class="keyword">new MRU(100)); |
101 | ||
102 | 7 | long recid = recordManager.getNamedObject(tableName); |
103 | 7 | if (recid != 0) { |
104 | // already exists |
|
105 | 0 | tree = BTree.load(recordManager, cache, recid); |
106 | } else { |
|
107 | // does not exist |
|
108 | 7 | tree = new BTree(recordManager, cache, class="keyword">new StringComparator()); |
109 | 7 | recordManager.setNamedObject(tableName, tree.getRecid()); |
110 | } |
|
111 | 7 | } |
112 | ||
113 | /** |
|
114 | * @see net.sf.classifier4J.bayesian.IWordsDataSource#addMatch(java.lang.String) |
|
115 | */ |
|
116 | public void addMatch(String word) { |
|
117 | 12 | addMatch(ICategorisedClassifier.DEFAULT_CATEGORY, word); |
118 | 12 | } |
119 | ||
120 | /** |
|
121 | * @see net.sf.classifier4J.bayesian.IWordsDataSource#addNonMatch(java.lang.String) |
|
122 | */ |
|
123 | public void addNonMatch(String word) { |
|
124 | 512 | addNonMatch(ICategorisedClassifier.DEFAULT_CATEGORY, word); |
125 | 512 | } |
126 | ||
127 | /** |
|
128 | * @see net.sf.classifier4J.bayesian.ICategorisedWordsDataSource#addMatch(java.lang.String, java.lang.String) |
|
129 | */ |
|
130 | public void addMatch(String category, String word) { |
|
131 | try { |
|
132 | 14 | WordProbability wp = getWordProbability(category, word); |
133 | 14 | if (wp == null) { |
134 | 2 | wp = new WordProbability(word, 1, 0); |
135 | } else { |
|
136 | 12 | wp.setMatchingCount(wp.getMatchingCount() + 1); |
137 | } |
|
138 | 14 | tree.insert(getKey(category, word), wp, true); |
139 | 14 | } catch (IOException e) { |
140 | 0 | log.error("Error with JDBM datasource", e); |
141 | 0 | throw new RuntimeException("Error with JDBM datasource"); |
142 | } |
|
143 | ||
144 | 14 | } |
145 | ||
146 | /** |
|
147 | * @see net.sf.classifier4J.bayesian.ICategorisedWordsDataSource#addNonMatch(java.lang.String, java.lang.String) |
|
148 | */ |
|
149 | public void addNonMatch(String category, String word) { |
|
150 | try { |
|
151 | 513 | WordProbability wp = getWordProbability(category, word); |
152 | 513 | if (wp == null) { |
153 | 4 | wp = new WordProbability(word, 0, 1); |
154 | } else { |
|
155 | 509 | wp.setNonMatchingCount(wp.getNonMatchingCount() + 1); |
156 | } |
|
157 | 513 | tree.insert(getKey(category, word), wp, true); |
158 | 513 | } catch (IOException e) { |
159 | 0 | log.error("Error with JDBM datasource", e); |
160 | 0 | throw new RuntimeException("Error with JDBM datasource"); |
161 | } |
|
162 | 513 | } |
163 | ||
164 | /** |
|
165 | * @see net.sf.classifier4J.bayesian.IWordsDataSource#getWordProbability(java.lang.String) |
|
166 | */ |
|
167 | public WordProbability getWordProbability(String word) { |
|
168 | 8 | return getWordProbability(ICategorisedClassifier.DEFAULT_CATEGORY, word); |
169 | } |
|
170 | ||
171 | /** |
|
172 | * @see net.sf.classifier4J.bayesian.ICategorisedWordsDataSource#getWordProbability(java.lang.String, java.lang.String) |
|
173 | */ |
|
174 | public WordProbability getWordProbability(String category, String word) { |
|
175 | try { |
|
176 | 536 | return (WordProbability) tree.find(getKey(category,word)); |
177 | } catch (IOException e) { |
|
178 | 0 | log.error("Error in JDBM datasource", e); |
179 | 0 | throw new RuntimeException("Error in JDBM datasource"); |
180 | } |
|
181 | } |
|
182 | ||
183 | /** |
|
184 | * |
|
185 | * @param category The category, or null for the default |
|
186 | * @param word The word, cannot be null |
|
187 | * @return the key for the category and word. By default this is "category : word" |
|
188 | * @throws IllegalArgumentException if word is null |
|
189 | */ |
|
190 | protected String getKey(String category, String word) throws IllegalArgumentException { |
|
191 | 1063 | if (word == null) { |
192 | 0 | throw new IllegalArgumentException("Word cannot be null"); |
193 | } |
|
194 | 1063 | StringBuffer result = new StringBuffer(""); |
195 | 1063 | if (category == null) { |
196 | 0 | result.append(ICategorisedClassifier.DEFAULT_CATEGORY); |
197 | } else { |
|
198 | 1063 | result.append(category); |
199 | } |
|
200 | 1063 | result.append(" : "); // space:space |
201 | 1063 | result.append(word); |
202 | ||
203 | 1063 | return result.toString(); |
204 | } |
|
205 | ||
206 | protected void finalize() throws Throwable { |
|
207 | 3 | close(); |
208 | 0 | } |
209 | ||
210 | ||
211 | } |
This report is generated by jcoverage, Maven and Maven JCoverage Plugin. |