1   /*
2    * ====================================================================
3    * 
4    * The Apache Software License, Version 1.1
5    *
6    * Copyright (c) 2003 Nick Lothian. All rights reserved.
7    *
8    * Redistribution and use in source and binary forms, with or without
9    * modification, are permitted provided that the following conditions
10   * are met:
11   *
12   * 1. Redistributions of source code must retain the above copyright
13   *    notice, this list of conditions and the following disclaimer. 
14   *
15   * 2. Redistributions in binary form must reproduce the above copyright
16   *    notice, this list of conditions and the following disclaimer in
17   *    the documentation and/or other materials provided with the
18   *    distribution.
19   *
20   * 3. The end-user documentation included with the redistribution, if
21   *    any, must include the following acknowlegement:  
22   *       "This product includes software developed by the 
23   *        developers of Classifier4J (http://classifier4j.sf.net/)."
24   *    Alternately, this acknowlegement may appear in the software itself,
25   *    if and wherever such third-party acknowlegements normally appear.
26   *
27   * 4. The name "Classifier4J" must not be used to endorse or promote 
28   *    products derived from this software without prior written 
29   *    permission. For written permission, please contact   
30   *    http://sourceforge.net/users/nicklothian/.
31   *
32   * 5. Products derived from this software may not be called 
33   *    "Classifier4J", nor may "Classifier4J" appear in their names 
34   *    without prior written permission. For written permission, please 
35   *    contact http://sourceforge.net/users/nicklothian/.
36   *
37   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
41   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
44   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
46   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
47   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48   * SUCH DAMAGE.
49   * ====================================================================
50   */
51  
52  
53  package net.sf.classifier4J;
54  
55  import org.apache.commons.logging.Log;
56  import org.apache.commons.logging.LogFactory;
57  
58  import junit.framework.TestCase;
59  import junit.textui.TestRunner;
60  
61  public class DefaultTokenizerTest extends TestCase {
62    
63    private Log log = LogFactory.getLog(this.getClass());
64    
65    public DefaultTokenizerTest(String name) {
66      super(name);
67    }
68    
69    public void testConstructors() {
70      ITokenizer tok = null;
71      
72      try {
73        tok = new DefaultTokenizer(null);
74        fail("Shouldn't be able to set a tokenizer of null");
75      }
76      catch(IllegalArgumentException e) {
77        assertTrue(true);
78      }
79      
80      tok = new DefaultTokenizer("");
81      
82      tok = new DefaultTokenizer(DefaultTokenizer.BREAK_ON_WHITESPACE);
83      
84      tok = new DefaultTokenizer(DefaultTokenizer.BREAK_ON_WORD_BREAKS);
85      
86      try {
87        tok = new DefaultTokenizer(43);
88        fail("Shouldn't be able to set a tokenizer of type 43");
89      }
90      catch(IllegalArgumentException e) {
91        assertTrue(true);
92      }
93    }
94    
95    public void testTokenize() {
96      
97      ITokenizer tok = null;
98      String words[] = null;
99      
100     tok = new DefaultTokenizer(DefaultTokenizer.BREAK_ON_WHITESPACE);
101     words = tok.tokenize("My very,new string!");
102     
103     assertEquals(3, words.length);
104     assertEquals("My",       words[0]);
105     assertEquals("very,new", words[1]);
106     assertEquals("string!",  words[2]);
107     
108     tok = new DefaultTokenizer(DefaultTokenizer.BREAK_ON_WORD_BREAKS);
109     words = tok.tokenize("My very,new-string!and/more(NIO)peter's 1.4");
110     
111     assertEquals(11, words.length);
112     assertEquals("My",       words[0]);
113     assertEquals("very",     words[1]);
114     assertEquals("new",      words[2]);
115     assertEquals("string",   words[3]);
116     assertEquals("and",      words[4]);
117     assertEquals("more",     words[5]);
118     assertEquals("NIO",      words[6]);
119     
120     //todo: Shouldn't this be "peter's", instead of "peter" & "s"?
121     assertEquals("peter",    words[7]);
122     assertEquals("s",        words[8]);  
123     
124     //todo: Shouldn't this be "1.4", instead of "1" & "4"?
125     assertEquals("1",        words[9]); 
126     assertEquals("4",        words[10]); 
127   }
128   
129   public static void main(String[] args) throws Exception {
130     TestRunner.run(DefaultTokenizerTest.class);
131   }
132 }