%line | %branch | |||||||||
---|---|---|---|---|---|---|---|---|---|---|
net.sf.classifier4J.DefaultStopWordsProvider |
|
|
1 | /* |
|
2 | * ==================================================================== |
|
3 | * |
|
4 | * The Apache Software License, Version 1.1 |
|
5 | * |
|
6 | * Copyright (c) 2003 Nick Lothian. All rights reserved. |
|
7 | * |
|
8 | * Redistribution and use in source and binary forms, with or without |
|
9 | * modification, are permitted provided that the following conditions |
|
10 | * are met: |
|
11 | * |
|
12 | * 1. Redistributions of source code must retain the above copyright |
|
13 | * notice, this list of conditions and the following disclaimer. |
|
14 | * |
|
15 | * 2. Redistributions in binary form must reproduce the above copyright |
|
16 | * notice, this list of conditions and the following disclaimer in |
|
17 | * the documentation and/or other materials provided with the |
|
18 | * distribution. |
|
19 | * |
|
20 | * 3. The end-user documentation included with the redistribution, if |
|
21 | * any, must include the following acknowlegement: |
|
22 | * "This product includes software developed by the |
|
23 | * developers of Classifier4J (http://classifier4j.sf.net/)." |
|
24 | * Alternately, this acknowlegement may appear in the software itself, |
|
25 | * if and wherever such third-party acknowlegements normally appear. |
|
26 | * |
|
27 | * 4. The name "Classifier4J" must not be used to endorse or promote |
|
28 | * products derived from this software without prior written |
|
29 | * permission. For written permission, please contact |
|
30 | * http://sourceforge.net/users/nicklothian/. |
|
31 | * |
|
32 | * 5. Products derived from this software may not be called |
|
33 | * "Classifier4J", nor may "Classifier4J" appear in their names |
|
34 | * without prior written permission. For written permission, please |
|
35 | * contact http://sourceforge.net/users/nicklothian/. |
|
36 | * |
|
37 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED |
|
38 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
|
39 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
|
40 | * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR |
|
41 | * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
42 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
43 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
|
44 | * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
|
45 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
|
46 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
|
47 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
48 | * SUCH DAMAGE. |
|
49 | * ==================================================================== |
|
50 | */ |
|
51 | ||
52 | package net.sf.classifier4J; |
|
53 | ||
54 | import java.util.Arrays; |
|
55 | ||
56 | import net.sf.classifier4J.util.ToStringBuilder; |
|
57 | ||
58 | ||
59 | /** |
|
60 | * @author Nick Lothian |
|
61 | * @author Peter Leschev |
|
62 | */ |
|
63 | public class DefaultStopWordsProvider implements IStopWordProvider { |
|
64 | // This array is sorted in the constructor |
|
65 | 32 | private String[] stopWords = { "a", "and", "the", "me", "i", "of", "if", "it", "is", "they", "there", "but", "or", "to", "this", "you", "in", "your", "on", "for", "as", "are", "that", "with", "have", "be", "at", "or", "was", "so", "out", "not", "an" }; |
66 | 32 | private String[] sortedStopWords = null; |
67 | ||
68 | 32 | public DefaultStopWordsProvider() { |
69 | 32 | sortedStopWords = getStopWords(); |
70 | 32 | Arrays.sort(sortedStopWords); |
71 | 32 | } |
72 | ||
73 | /** |
|
74 | * getter method which can be overridden to |
|
75 | * supply the stop words. The array returned by this |
|
76 | * method is sorted and then used internally |
|
77 | * |
|
78 | * @return the array of stop words |
|
79 | */ |
|
80 | public String[] getStopWords() { |
|
81 | 32 | return stopWords; |
82 | } |
|
83 | ||
84 | /** |
|
85 | * @see net.sf.classifier4J.IStopWordProvider#isStopWord(java.lang.String) |
|
86 | */ |
|
87 | public boolean isStopWord(String word) { |
|
88 | 1836 | if (word == null || "".equals(word)) { |
89 | 0 | return false; |
90 | } else { |
|
91 | // search the sorted array for the word, converted to lowercase |
|
92 | // if it is found, the index will be >= 0 |
|
93 | 1836 | return (Arrays.binarySearch(sortedStopWords, word.toLowerCase()) >= 0); |
94 | } |
|
95 | } |
|
96 | ||
97 | public String toString() { |
|
98 | 0 | return new ToStringBuilder(this).append("stopWords.size()", sortedStopWords.length).toString(); |
99 | } |
|
100 | } |
This report is generated by jcoverage, Maven and Maven JCoverage Plugin. |