1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53 package net.sf.classifier4J.bayesian;
54
55 import junit.framework.TestCase;
56 import junit.textui.TestRunner;
57 import net.sf.classifier4J.DefaultStopWordsProvider;
58 import net.sf.classifier4J.DefaultTokenizer;
59 import net.sf.classifier4J.ICategorisedClassifier;
60 import net.sf.classifier4J.IClassifier;
61 import net.sf.classifier4J.IStopWordProvider;
62 import net.sf.classifier4J.ITokenizer;
63
64 import org.apache.commons.logging.Log;
65 import org.apache.commons.logging.LogFactory;
66
67
68
69
70
71 public class BayesianClassifierTest extends TestCase {
72
73 private Log log = LogFactory.getLog(this.getClass());
74
75 public BayesianClassifierTest(String name) {
76 super(name);
77 }
78
79 public void testClassify() throws Exception {
80
81 SimpleWordsDataSource wds = new SimpleWordsDataSource();
82 BayesianClassifier classifier = new BayesianClassifier(wds);
83
84 String sentence[] = { "This", "is", "a", "sentence", "about", "java" };
85
86 assertEquals(IClassifier.NEUTRAL_PROBABILITY, classifier.classify(ICategorisedClassifier.DEFAULT_CATEGORY, sentence), 0d);
87
88 wds.setWordProbability(new WordProbability("This", 0.5d));
89 wds.setWordProbability(new WordProbability("is", 0.5d));
90 wds.setWordProbability(new WordProbability("a", 0.5d));
91 wds.setWordProbability(new WordProbability("sentence", 0.2d));
92 wds.setWordProbability(new WordProbability("about", 0.5d));
93 wds.setWordProbability(new WordProbability("java", 0.99d));
94
95 assertEquals(0.96d, classifier.classify(ICategorisedClassifier.DEFAULT_CATEGORY, sentence), 0.009d);
96 }
97
98 public void testTeaching() throws Exception {
99 BayesianClassifier classifier = new BayesianClassifier();
100
101 String sentence1[] = {"The", "menu", "tag", "library", "manages", "the",
102 "complex", "process", "of", "creating", "menus", "in",
103 "JavaScript", "The", "menu", "tag", "itself", "is",
104 "an", "abstract", "class", "that", "extends", "the",
105 "TagSupport", "class", "and", "overrides", "the",
106 "doStartTag", "and", "doEndTag", "methods.", "The",
107 "getMenu", "method,", "which", "is", "a", "template",
108 "method", "and", "should", "be", "overridden", "in",
109 "the", "subclasses,", "provides", "JavaScript", "to",
110 "add", "menu", "items", "in", "the", "menu",
111 "structure", "created", "in", "the", "doStartTag",
112 "method", "Subclasses", "of", "the", "menu", "tag",
113 "override", "the", "getMenu", "method,", "which",
114 "uses", "menu", "builders", "to", "render", "menu",
115 "data", "from", "the", "data", "source"};
116
117 String sentence2[] = {"I", "witness", "a", "more", "subtle",
118 "demonstration", "of", "real", "time", "physics",
119 "simulation", "at", "the", "tiny", "Palo", "Alto",
120 "office", "of", "Havok", "a", "competing", "physics",
121 "engine", "shop", "On", "the", "screen", "a",
122 "computer", "generated", "sailboat", "floats", "in",
123 "a", "stone", "lined", "pool", "of", "water", "The",
124 "company's", "genial", "Irish", "born", "cofounder",
125 "Hugh", "Reynolds", "shows", "me", "how", "to",
126 "push", "the", "boat", "with", "a", "mouse", "When",
127 "I", "nudge", "it", "air", "fills", "the", "sail",
128 "causing", "the", "ship", "to", "tilt", "leeward",
129 "Ripples", "in", "the", "water", "deflect", "off",
130 "the", "stones", "intersecting", "with", "one",
131 "another", "I", "urge", "the", "boat", "onward",
132 "and", "it", "glides", "effortlessly", "into", "the",
133 "wall", "Reynolds", "tosses", "in", "a", "handful",
134 "of", "virtual", "coins", "they", "spin", "through",
135 "the", "air,", "splash", "into", "the", "water,",
136 "and", "sink"};
137
138 String sentence3[] = {"The", "New", "Input", "Output", "NIO", "libraries",
139 "introduced", "in", "Java", "2", "Platform",
140 "Standard", "Edition", "J2SE", "1.4", "address",
141 "this", "problem", "NIO", "uses", "a", "buffer",
142 "oriented", "model", "That", "is", "NIO", "deals",
143 "with", "data", "primarily", "in", "large", "blocks",
144 "This", "eliminates", "the", "overhead", "caused",
145 "by", "the", "stream", "model", "and", "even", "makes",
146 "use", "of", "OS", "level", "facilities", "where",
147 "possible", "to", "maximize", "throughput"};
148
149 String sentence4[] = {"As", "governments", "scramble", "to", "contain",
150 "SARS", "the", "World", "Health", "Organisation",
151 "said", "it", "was", "extending", "the", "scope", "of",
152 "its", "April", "2", "travel", "alert", "to",
153 "include", "Beijing", "and", "the", "northern",
154 "Chinese", "province", "of", "Shanxi", "together",
155 "with", "Toronto", "the", "epicentre", "of", "the",
156 "SARS", "outbreak", "in", "Canada"};
157
158 String sentence5[] = {"That", "was", "our", "worst", "problem", "I",
159 "tried", "to", "see", "it", "the", "XP", "way", "Well",
160 "what", "we", "can", "do", "is", "implement",
161 "something", "I", "can't", "give", "any", "guarantees",
162 "as", "to", "how", "much", "of", "it", "will", "be",
163 "implemented", "in", "a", "month", "I", "won't",
164 "even", "hazard", "a", "guess", "as", "to", "how",
165 "long", "it", "would", "take", "to", "implement", "as",
166 "a", "whole", "I", "can't", "draw", "UML", "diagrams",
167 "for", "it", "or", "write", "technical", "specs",
168 "that", "would", "take", "time", "from", "coding",
169 "it", "which", "we", "can't", "afford", "Oh", "and",
170 "I", "have", "two", "kids", "I", "can't", "do", "much",
171 "OverTime", "But", "I", "should", "be", "able", "to",
172 "do", "something", "simple", "that", "will", "have",
173 "very", "few", "bugs", "and", "show", "a", "working",
174 "program", "early", "and", "often"};
175
176
177 classifier.teachMatch(ICategorisedClassifier.DEFAULT_CATEGORY, sentence1);
178 classifier.teachNonMatch(ICategorisedClassifier.DEFAULT_CATEGORY, sentence2);
179 classifier.teachMatch(ICategorisedClassifier.DEFAULT_CATEGORY, sentence3);
180 classifier.teachNonMatch(ICategorisedClassifier.DEFAULT_CATEGORY, sentence4);
181 classifier.teachMatch(ICategorisedClassifier.DEFAULT_CATEGORY, sentence5);
182
183 assertTrue(classifier.isMatch(ICategorisedClassifier.DEFAULT_CATEGORY, sentence1));
184 assertTrue(!classifier.isMatch(ICategorisedClassifier.DEFAULT_CATEGORY, sentence2));
185 assertTrue(classifier.isMatch(ICategorisedClassifier.DEFAULT_CATEGORY, sentence3));
186 assertTrue(!classifier.isMatch(ICategorisedClassifier.DEFAULT_CATEGORY, sentence4));
187 assertTrue(classifier.isMatch(ICategorisedClassifier.DEFAULT_CATEGORY, sentence5));
188 }
189
190 public void testGetWordsDataSource() throws Exception {
191 SimpleWordsDataSource wds = new SimpleWordsDataSource();
192 BayesianClassifier classifier = new BayesianClassifier(wds);
193
194 assertEquals(wds, classifier.getWordsDataSource());
195 }
196
197 public void testGetTokenizer() throws Exception {
198 SimpleWordsDataSource wds = new SimpleWordsDataSource();
199 ITokenizer tokenizer = new DefaultTokenizer(DefaultTokenizer.BREAK_ON_WORD_BREAKS);
200 BayesianClassifier classifier = new BayesianClassifier(wds, tokenizer);
201
202 assertEquals(tokenizer, classifier.getTokenizer());
203 }
204
205 public void testGetStopWordProvider() throws Exception {
206 SimpleWordsDataSource wds = new SimpleWordsDataSource();
207 ITokenizer tokenizer = new DefaultTokenizer(DefaultTokenizer.BREAK_ON_WORD_BREAKS);
208 IStopWordProvider stopWordProvider = new DefaultStopWordsProvider();
209 BayesianClassifier classifier = new BayesianClassifier(wds, tokenizer, stopWordProvider);
210
211 assertEquals(stopWordProvider, classifier.getStopWordProvider());
212 }
213
214 public void testCaseSensitive() throws Exception {
215 BayesianClassifier classifier = new BayesianClassifier();
216 assertFalse(classifier.isCaseSensitive());
217 classifier.setCaseSensitive(true);
218 assertTrue(classifier.isCaseSensitive());
219 }
220
221 public void testTransformWord() throws Exception {
222 BayesianClassifier classifier = new BayesianClassifier();
223 assertFalse(classifier.isCaseSensitive());
224
225 String word = null;
226 try {
227 classifier.transformWord(word);
228 fail("No exception thrown when null passed");
229 } catch (IllegalArgumentException e) {
230
231 }
232
233 word = "myWord";
234 assertEquals(word.toLowerCase(), classifier.transformWord(word));
235
236 classifier.setCaseSensitive(true);
237 assertNotSame(word.toLowerCase(), classifier.transformWord(word));
238 assertEquals(word, classifier.transformWord(word));
239 }
240
241 public void testCalculateOverallProbability() throws Exception {
242 double prob = 0.3d;
243 WordProbability wp1 = new WordProbability("myWord1", prob);
244 WordProbability wp2 = new WordProbability("myWord2", prob);
245 WordProbability wp3 = new WordProbability("myWord3", prob);
246
247 WordProbability[] wps = {wp1, wp2, wp3};
248 double errorMargin = 0.0001d;
249
250 double xy = (prob * prob * prob);
251 double z = (1-prob)*(1-prob)*(1-prob);
252
253 double result = xy/(xy + z);
254
255 BayesianClassifier classifier = new BayesianClassifier();
256
257 assertEquals(result, classifier.calculateOverallProbability(wps), errorMargin);
258 }
259
260
261 public static void main(String[] args) throws Exception {
262 TestRunner.run(BayesianClassifierTest.class);
263 }
264 }