| %line | %branch | |||||||||
|---|---|---|---|---|---|---|---|---|---|---|
| net.sf.classifier4J.DefaultStopWordsProvider |
|
|
| 1 | /* |
|
| 2 | * ==================================================================== |
|
| 3 | * |
|
| 4 | * The Apache Software License, Version 1.1 |
|
| 5 | * |
|
| 6 | * Copyright (c) 2003 Nick Lothian. All rights reserved. |
|
| 7 | * |
|
| 8 | * Redistribution and use in source and binary forms, with or without |
|
| 9 | * modification, are permitted provided that the following conditions |
|
| 10 | * are met: |
|
| 11 | * |
|
| 12 | * 1. Redistributions of source code must retain the above copyright |
|
| 13 | * notice, this list of conditions and the following disclaimer. |
|
| 14 | * |
|
| 15 | * 2. Redistributions in binary form must reproduce the above copyright |
|
| 16 | * notice, this list of conditions and the following disclaimer in |
|
| 17 | * the documentation and/or other materials provided with the |
|
| 18 | * distribution. |
|
| 19 | * |
|
| 20 | * 3. The end-user documentation included with the redistribution, if |
|
| 21 | * any, must include the following acknowlegement: |
|
| 22 | * "This product includes software developed by the |
|
| 23 | * developers of Classifier4J (http://classifier4j.sf.net/)." |
|
| 24 | * Alternately, this acknowlegement may appear in the software itself, |
|
| 25 | * if and wherever such third-party acknowlegements normally appear. |
|
| 26 | * |
|
| 27 | * 4. The name "Classifier4J" must not be used to endorse or promote |
|
| 28 | * products derived from this software without prior written |
|
| 29 | * permission. For written permission, please contact |
|
| 30 | * http://sourceforge.net/users/nicklothian/. |
|
| 31 | * |
|
| 32 | * 5. Products derived from this software may not be called |
|
| 33 | * "Classifier4J", nor may "Classifier4J" appear in their names |
|
| 34 | * without prior written permission. For written permission, please |
|
| 35 | * contact http://sourceforge.net/users/nicklothian/. |
|
| 36 | * |
|
| 37 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED |
|
| 38 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
|
| 39 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
|
| 40 | * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR |
|
| 41 | * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
| 42 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
| 43 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
|
| 44 | * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
|
| 45 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
|
| 46 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
|
| 47 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
| 48 | * SUCH DAMAGE. |
|
| 49 | * ==================================================================== |
|
| 50 | */ |
|
| 51 | ||
| 52 | package net.sf.classifier4J; |
|
| 53 | ||
| 54 | import java.util.Arrays; |
|
| 55 | ||
| 56 | import net.sf.classifier4J.util.ToStringBuilder; |
|
| 57 | ||
| 58 | ||
| 59 | /** |
|
| 60 | * @author Nick Lothian |
|
| 61 | * @author Peter Leschev |
|
| 62 | */ |
|
| 63 | public class DefaultStopWordsProvider implements IStopWordProvider { |
|
| 64 | // This array is sorted in the constructor |
|
| 65 | 32 | private String[] stopWords = { "a", "and", "the", "me", "i", "of", "if", "it", "is", "they", "there", "but", "or", "to", "this", "you", "in", "your", "on", "for", "as", "are", "that", "with", "have", "be", "at", "or", "was", "so", "out", "not", "an" }; |
| 66 | 32 | private String[] sortedStopWords = null; |
| 67 | ||
| 68 | 32 | public DefaultStopWordsProvider() { |
| 69 | 32 | sortedStopWords = getStopWords(); |
| 70 | 32 | Arrays.sort(sortedStopWords); |
| 71 | 32 | } |
| 72 | ||
| 73 | /** |
|
| 74 | * getter method which can be overridden to |
|
| 75 | * supply the stop words. The array returned by this |
|
| 76 | * method is sorted and then used internally |
|
| 77 | * |
|
| 78 | * @return the array of stop words |
|
| 79 | */ |
|
| 80 | public String[] getStopWords() { |
|
| 81 | 32 | return stopWords; |
| 82 | } |
|
| 83 | ||
| 84 | /** |
|
| 85 | * @see net.sf.classifier4J.IStopWordProvider#isStopWord(java.lang.String) |
|
| 86 | */ |
|
| 87 | public boolean isStopWord(String word) { |
|
| 88 | 1836 | if (word == null || "".equals(word)) { |
| 89 | 0 | return false; |
| 90 | } else { |
|
| 91 | // search the sorted array for the word, converted to lowercase |
|
| 92 | // if it is found, the index will be >= 0 |
|
| 93 | 1836 | return (Arrays.binarySearch(sortedStopWords, word.toLowerCase()) >= 0); |
| 94 | } |
|
| 95 | } |
|
| 96 | ||
| 97 | public String toString() { |
|
| 98 | 0 | return new ToStringBuilder(this).append("stopWords.size()", sortedStopWords.length).toString(); |
| 99 | } |
|
| 100 | } |
| This report is generated by jcoverage, Maven and Maven JCoverage Plugin. |