1 /*
2 * ====================================================================
3 *
4 * The Apache Software License, Version 1.1
5 *
6 * Copyright (c) 2003 Nick Lothian. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. The end-user documentation included with the redistribution, if
21 * any, must include the following acknowlegement:
22 * "This product includes software developed by the
23 * developers of Classifier4J (http://classifier4j.sf.net/)."
24 * Alternately, this acknowlegement may appear in the software itself,
25 * if and wherever such third-party acknowlegements normally appear.
26 *
27 * 4. The name "Classifier4J" must not be used to endorse or promote
28 * products derived from this software without prior written
29 * permission. For written permission, please contact
30 * http://sourceforge.net/users/nicklothian/.
31 *
32 * 5. Products derived from this software may not be called
33 * "Classifier4J", nor may "Classifier4J" appear in their names
34 * without prior written permission. For written permission, please
35 * contact http://sourceforge.net/users/nicklothian/.
36 *
37 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
41 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
44 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
46 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
47 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48 * SUCH DAMAGE.
49 * ====================================================================
50 */
51
52
53 package net.sf.classifier4J;
54
55 import org.apache.commons.logging.Log;
56 import org.apache.commons.logging.LogFactory;
57
58 import junit.framework.TestCase;
59 import junit.textui.TestRunner;
60
61 public class DefaultTokenizerTest extends TestCase {
62
63 private Log log = LogFactory.getLog(this.getClass());
64
65 public DefaultTokenizerTest(String name) {
66 super(name);
67 }
68
69 public void testConstructors() {
70 ITokenizer tok = null;
71
72 try {
73 tok = new DefaultTokenizer(null);
74 fail("Shouldn't be able to set a tokenizer of null");
75 }
76 catch(IllegalArgumentException e) {
77 assertTrue(true);
78 }
79
80 tok = new DefaultTokenizer("");
81
82 tok = new DefaultTokenizer(DefaultTokenizer.BREAK_ON_WHITESPACE);
83
84 tok = new DefaultTokenizer(DefaultTokenizer.BREAK_ON_WORD_BREAKS);
85
86 try {
87 tok = new DefaultTokenizer(43);
88 fail("Shouldn't be able to set a tokenizer of type 43");
89 }
90 catch(IllegalArgumentException e) {
91 assertTrue(true);
92 }
93 }
94
95 public void testTokenize() {
96
97 ITokenizer tok = null;
98 String words[] = null;
99
100 tok = new DefaultTokenizer(DefaultTokenizer.BREAK_ON_WHITESPACE);
101 words = tok.tokenize("My very,new string!");
102
103 assertEquals(3, words.length);
104 assertEquals("My", words[0]);
105 assertEquals("very,new", words[1]);
106 assertEquals("string!", words[2]);
107
108 tok = new DefaultTokenizer(DefaultTokenizer.BREAK_ON_WORD_BREAKS);
109 words = tok.tokenize("My very,new-string!and/more(NIO)peter's 1.4");
110
111 assertEquals(11, words.length);
112 assertEquals("My", words[0]);
113 assertEquals("very", words[1]);
114 assertEquals("new", words[2]);
115 assertEquals("string", words[3]);
116 assertEquals("and", words[4]);
117 assertEquals("more", words[5]);
118 assertEquals("NIO", words[6]);
119
120 //todo: Shouldn't this be "peter's", instead of "peter" & "s"?
121 assertEquals("peter", words[7]);
122 assertEquals("s", words[8]);
123
124 //todo: Shouldn't this be "1.4", instead of "1" & "4"?
125 assertEquals("1", words[9]);
126 assertEquals("4", words[10]);
127 }
128
129 public static void main(String[] args) throws Exception {
130 TestRunner.run(DefaultTokenizerTest.class);
131 }
132 }