1 /*
2 * ====================================================================
3 *
4 * The Apache Software License, Version 1.1
5 *
6 * Copyright (c) 2003 Nick Lothian. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. The end-user documentation included with the redistribution, if
21 * any, must include the following acknowlegement:
22 * "This product includes software developed by the
23 * developers of Classifier4J (http://classifier4j.sf.net/)."
24 * Alternately, this acknowlegement may appear in the software itself,
25 * if and wherever such third-party acknowlegements normally appear.
26 *
27 * 4. The name "Classifier4J" must not be used to endorse or promote
28 * products derived from this software without prior written
29 * permission. For written permission, please contact
30 * http://sourceforge.net/users/nicklothian/.
31 *
32 * 5. Products derived from this software may not be called
33 * "Classifier4J", nor may "Classifier4J" appear in their names
34 * without prior written permission. For written permission, please
35 * contact http://sourceforge.net/users/nicklothian/.
36 *
37 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
41 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
44 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
46 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
47 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48 * SUCH DAMAGE.
49 * ====================================================================
50 */
51
52 package net.sf.classifier4J.summariser;
53
54 import java.util.HashMap;
55 import java.util.Iterator;
56 import java.util.Map;
57 import java.util.Set;
58
59 import junit.framework.TestCase;
60
61 public class SimpleSummariserTest extends TestCase {
62
63 SimpleSummariser summariser = null;
64
65 /*
66 * @see TestCase#setUp()
67 */
68 protected void setUp() throws Exception {
69 summariser = new SimpleSummariser();
70 super.setUp();
71 }
72
73 /*
74 * @see TestCase#tearDown()
75 */
76 protected void tearDown() throws Exception {
77 summariser = null;
78 super.tearDown();
79 }
80
81 public void testSummarise() {
82
83 String input = "Classifier4J is a java package for working with text. Classifier4J includes a summariser.";
84 String expectedResult = "Classifier4J is a java package for working with text.";
85
86 String result = summariser.summarise(input, 1);
87 assertEquals(expectedResult, result);
88
89 input = "Classifier4J is a java package for working with text. Classifier4J includes a summariser. A Summariser allows the summary of text. A Summariser is really cool. I don't think there are any other java summarisers.";
90 expectedResult = "Classifier4J is a java package for working with text. Classifier4J includes a summariser.";
91 result = summariser.summarise(input, 2);
92 assertEquals(expectedResult, result);
93
94 /*
95 // This fails due to appending "." instead of whatever the correct punctuation is
96 input = "Classifier4J is a java package for working with text! Classifier4J includes a summariser.";
97 expectedResult = "Classifier4J is a java package for working with text";
98 result = summariser.summarise(input, 1);
99 System.out.println(expectedResult);
100 System.out.println(result);
101 assertEquals(expectedResult, result);
102 */
103
104 }
105
106 public void testGetMostFrequentWords() {
107 Map input = new HashMap();
108 String[] values = { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten" };
109 for (int i = 0; i < values.length; i++) {
110 input.put(values[i], new Integer(i));
111 }
112
113 Set result = summariser.getMostFrequentWords(3, input);
114 assertNotNull(result);
115 assertEquals(3, result.size());
116
117 Iterator it = result.iterator();
118 int count = 1;
119 while (it.hasNext()) {
120 String resultValue = (String) it.next();
121 assertEquals(values[values.length - count], resultValue);
122 count++;
123 }
124
125 result = summariser.getMostFrequentWords(4, input);
126 assertNotNull(result);
127 assertEquals(4, result.size());
128
129 it = result.iterator();
130 count = 1;
131 while (it.hasNext()) {
132 String resultValue = (String) it.next();
133 assertEquals(values[values.length - count], resultValue);
134 count++;
135 }
136
137 }
138
139 }