Coverage report

  %line %branch
net.sf.classifier4J.summariser.SimpleSummariser$1
100% 
100% 

 1  
 /*
 2  
  * ====================================================================
 3  
  * 
 4  
  * The Apache Software License, Version 1.1
 5  
  *
 6  
  * Copyright (c) 2003-2005 Nick Lothian. All rights reserved.
 7  
  *
 8  
  * Redistribution and use in source and binary forms, with or without
 9  
  * modification, are permitted provided that the following conditions
 10  
  * are met:
 11  
  *
 12  
  * 1. Redistributions of source code must retain the above copyright
 13  
  *    notice, this list of conditions and the following disclaimer. 
 14  
  *
 15  
  * 2. Redistributions in binary form must reproduce the above copyright
 16  
  *    notice, this list of conditions and the following disclaimer in
 17  
  *    the documentation and/or other materials provided with the
 18  
  *    distribution.
 19  
  *
 20  
  * 3. The end-user documentation included with the redistribution, if
 21  
  *    any, must include the following acknowlegement:  
 22  
  *       "This product includes software developed by the 
 23  
  *        developers of Classifier4J (http://classifier4j.sf.net/)."
 24  
  *    Alternately, this acknowlegement may appear in the software itself,
 25  
  *    if and wherever such third-party acknowlegements normally appear.
 26  
  *
 27  
  * 4. The name "Classifier4J" must not be used to endorse or promote 
 28  
  *    products derived from this software without prior written 
 29  
  *    permission. For written permission, please contact   
 30  
  *    http://sourceforge.net/users/nicklothian/.
 31  
  *
 32  
  * 5. Products derived from this software may not be called 
 33  
  *    "Classifier4J", nor may "Classifier4J" appear in their names 
 34  
  *    without prior written permission. For written permission, please 
 35  
  *    contact http://sourceforge.net/users/nicklothian/.
 36  
  *
 37  
  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 38  
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 39  
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 40  
  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 41  
  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 42  
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 43  
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 44  
  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 45  
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 46  
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 47  
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 48  
  * SUCH DAMAGE.
 49  
  * ====================================================================
 50  
  */
 51  
 
 52  
 package net.sf.classifier4J.summariser;
 53  
 
 54  
 import java.util.ArrayList;
 55  
 import java.util.Collections;
 56  
 import java.util.Comparator;
 57  
 import java.util.Iterator;
 58  
 import java.util.LinkedHashSet;
 59  
 import java.util.List;
 60  
 import java.util.Map;
 61  
 import java.util.Set;
 62  
 
 63  
 import net.sf.classifier4J.Utilities;
 64  
 
 65  
 public class SimpleSummariser implements ISummariser {
 66  
 
 67  
     private Integer findMaxValue(List input) {
 68  
         Collections.sort(input);
 69  
         return (Integer) input.get(0);
 70  
     }
 71  
 
 72  
 
 73  
     protected Set getMostFrequentWords(int count, Map wordFrequencies) {
 74  
         return Utilities.getMostFrequentWords(count, wordFrequencies);
 75  
     }
 76  
 
 77  
     /**
 78  
      * @see net.sf.classifier4J.summariser.ISummariser#summarise(java.lang.String)
 79  
      */
 80  
     public String summarise(String input, int numSentences) {
 81  
         // get the frequency of each word in the input
 82  
         Map wordFrequencies = Utilities.getWordFrequency(input);
 83  
 
 84  
         // now create a set of the X most frequent words
 85  
         Set mostFrequentWords = getMostFrequentWords(100, wordFrequencies);
 86  
 
 87  
         // break the input up into sentences
 88  
         // workingSentences is used for the analysis, but
 89  
         // actualSentences is used in the results so that the 
 90  
         // capitalisation will be correct.
 91  
         String[] workingSentences = Utilities.getSentences(input.toLowerCase());
 92  
         String[] actualSentences = Utilities.getSentences(input);
 93  
 
 94  
         // iterate over the most frequent words, and add the first sentence 
 95  
         // that includes each word to the result
 96  
         Set outputSentences = new LinkedHashSet();
 97  
         Iterator it = mostFrequentWords.iterator();
 98  
         while (it.hasNext()) {
 99  
             String word = (String) it.next();
 100  
             for (int i = 0; i < workingSentences.length; i++) {
 101  
                 if (workingSentences[i].indexOf(word) >= 0) {
 102  
                     outputSentences.add(actualSentences[i]);
 103  
                     break;
 104  
                 }
 105  
                 if (outputSentences.size() >= numSentences) {
 106  
                     break;
 107  
                 }
 108  
             }
 109  
             if (outputSentences.size() >= numSentences) {
 110  
                 break;
 111  
             }
 112  
 
 113  
         }
 114  
 
 115  
         List reorderedOutputSentences = reorderSentences(outputSentences, input);
 116  
 
 117  
         StringBuffer result = new StringBuffer("");
 118  
         it = reorderedOutputSentences.iterator();
 119  
         while (it.hasNext()) {
 120  
             String sentence = (String) it.next();
 121  
             result.append(sentence);
 122  
             result.append("."); // This isn't always correct - perhaps it should be whatever symbol the sentence finished with
 123  
             if (it.hasNext()) {
 124  
                 result.append(" ");
 125  
             }
 126  
         }
 127  
 
 128  
         return result.toString();
 129  
     }
 130  
 
 131  
     /**
 132  
      * @param outputSentences
 133  
      * @param input
 134  
      * @return
 135  
      */
 136  
     private List reorderSentences(Set outputSentences, final String input) {
 137  
         // reorder the sentences to the order they were in the 
 138  
         // original text
 139  
         ArrayList result = new ArrayList(outputSentences);
 140  
 
 141  
         Collections.sort(result, new Comparator() {
 142  4
             public int compare(Object arg0, Object arg1) {
 143  2
                 String sentence1 = (String) arg0;
 144  2
                 String sentence2 = (String) arg1;
 145  
 
 146  2
                 int indexOfSentence1 = input.indexOf(sentence1.trim());
 147  2
                 int indexOfSentence2 = input.indexOf(sentence2.trim());
 148  2
                 int result = indexOfSentence1 - indexOfSentence2;
 149  
 
 150  2
                 return result;
 151  
             }
 152  
 
 153  
         });
 154  
         return result;
 155  
     }
 156  
 
 157  
 }

This report is generated by jcoverage, Maven and Maven JCoverage Plugin.