1   /*
2    * ====================================================================
3    * 
4    * The Apache Software License, Version 1.1
5    *
6    * Copyright (c) 2003 Nick Lothian. All rights reserved.
7    *
8    * Redistribution and use in source and binary forms, with or without
9    * modification, are permitted provided that the following conditions
10   * are met:
11   *
12   * 1. Redistributions of source code must retain the above copyright
13   *    notice, this list of conditions and the following disclaimer. 
14   *
15   * 2. Redistributions in binary form must reproduce the above copyright
16   *    notice, this list of conditions and the following disclaimer in
17   *    the documentation and/or other materials provided with the
18   *    distribution.
19   *
20   * 3. The end-user documentation included with the redistribution, if
21   *    any, must include the following acknowlegement:  
22   *       "This product includes software developed by the 
23   *        developers of Classifier4J (http://classifier4j.sf.net/)."
24   *    Alternately, this acknowlegement may appear in the software itself,
25   *    if and wherever such third-party acknowlegements normally appear.
26   *
27   * 4. The name "Classifier4J" must not be used to endorse or promote 
28   *    products derived from this software without prior written 
29   *    permission. For written permission, please contact   
30   *    http://sourceforge.net/users/nicklothian/.
31   *
32   * 5. Products derived from this software may not be called 
33   *    "Classifier4J", nor may "Classifier4J" appear in their names 
34   *    without prior written permission. For written permission, please 
35   *    contact http://sourceforge.net/users/nicklothian/.
36   *
37   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
41   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
44   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
46   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
47   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48   * SUCH DAMAGE.
49   * ====================================================================
50   */
51  
52  package net.sf.classifier4J.summariser;
53  
54  import java.util.HashMap;
55  import java.util.Iterator;
56  import java.util.Map;
57  import java.util.Set;
58  
59  import junit.framework.TestCase;
60  
61  public class SimpleSummariserTest extends TestCase {
62  
63      SimpleSummariser summariser = null;
64  
65      /*
66       * @see TestCase#setUp()
67       */
68      protected void setUp() throws Exception {
69          summariser = new SimpleSummariser();
70          super.setUp();
71      }
72  
73      /*
74       * @see TestCase#tearDown()
75       */
76      protected void tearDown() throws Exception {
77          summariser = null;
78          super.tearDown();
79      }
80  
81      public void testSummarise() {
82  
83          String input = "Classifier4J is a java package for working with text. Classifier4J includes a summariser.";
84          String expectedResult = "Classifier4J is a java package for working with text.";
85  
86          String result = summariser.summarise(input, 1);
87          assertEquals(expectedResult, result);
88  
89          input = "Classifier4J is a java package for working with text. Classifier4J includes a summariser. A Summariser allows the summary of text. A Summariser is really cool. I don't think there are any other java summarisers.";
90          expectedResult = "Classifier4J is a java package for working with text. Classifier4J includes a summariser.";
91          result = summariser.summarise(input, 2);
92          assertEquals(expectedResult, result);
93  
94          /*
95          // This fails due to appending "." instead of whatever the correct punctuation is 		 
96          input = "Classifier4J is a java package for working with text! Classifier4J includes a summariser.";
97          expectedResult = "Classifier4J is a java package for working with text";
98          result = summariser.summarise(input, 1);
99          System.out.println(expectedResult);
100         System.out.println(result);		
101         assertEquals(expectedResult, result);
102         */
103 
104     }
105 
106     public void testGetMostFrequentWords() {
107         Map input = new HashMap();
108         String[] values = { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten" };
109         for (int i = 0; i < values.length; i++) {
110             input.put(values[i], new Integer(i));
111         }
112 
113         Set result = summariser.getMostFrequentWords(3, input);
114         assertNotNull(result);
115         assertEquals(3, result.size());
116 
117         Iterator it = result.iterator();
118         int count = 1;
119         while (it.hasNext()) {
120             String resultValue = (String) it.next();
121             assertEquals(values[values.length - count], resultValue);
122             count++;
123         }
124 
125         result = summariser.getMostFrequentWords(4, input);
126         assertNotNull(result);
127         assertEquals(4, result.size());
128 
129         it = result.iterator();
130         count = 1;
131         while (it.hasNext()) {
132             String resultValue = (String) it.next();
133             assertEquals(values[values.length - count], resultValue);
134             count++;
135         }
136 
137     }
138 
139 }