Coverage report

  %line %branch
net.sf.classifier4J.bayesian.WordProbability
67% 
93% 

 1  
 /*
 2  
  * ====================================================================
 3  
  * 
 4  
  * The Apache Software License, Version 1.1
 5  
  *
 6  
  * Copyright (c) 2003 Nick Lothian. All rights reserved.
 7  
  *
 8  
  * Redistribution and use in source and binary forms, with or without
 9  
  * modification, are permitted provided that the following conditions
 10  
  * are met:
 11  
  *
 12  
  * 1. Redistributions of source code must retain the above copyright
 13  
  *    notice, this list of conditions and the following disclaimer. 
 14  
  *
 15  
  * 2. Redistributions in binary form must reproduce the above copyright
 16  
  *    notice, this list of conditions and the following disclaimer in
 17  
  *    the documentation and/or other materials provided with the
 18  
  *    distribution.
 19  
  *
 20  
  * 3. The end-user documentation included with the redistribution, if
 21  
  *    any, must include the following acknowlegement:  
 22  
  *       "This product includes software developed by the 
 23  
  *        developers of Classifier4J (http://classifier4j.sf.net/)."
 24  
  *    Alternately, this acknowlegement may appear in the software itself,
 25  
  *    if and wherever such third-party acknowlegements normally appear.
 26  
  *
 27  
  * 4. The name "Classifier4J" must not be used to endorse or promote 
 28  
  *    products derived from this software without prior written 
 29  
  *    permission. For written permission, please contact   
 30  
  *    http://sourceforge.net/users/nicklothian/.
 31  
  *
 32  
  * 5. Products derived from this software may not be called 
 33  
  *    "Classifier4J", nor may "Classifier4J" appear in their names 
 34  
  *    without prior written permission. For written permission, please 
 35  
  *    contact http://sourceforge.net/users/nicklothian/.
 36  
  *
 37  
  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 38  
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 39  
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 40  
  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 41  
  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 42  
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 43  
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 44  
  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 45  
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 46  
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 47  
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 48  
  * SUCH DAMAGE.
 49  
  * ====================================================================
 50  
  */
 51  
 
 52  
 package net.sf.classifier4J.bayesian;
 53  
 
 54  
 import java.io.Serializable;
 55  
 
 56  
 import net.sf.classifier4J.IClassifier;
 57  
 import net.sf.classifier4J.ICategorisedClassifier;
 58  
 import net.sf.classifier4J.util.*;
 59  
 import net.sf.classifier4J.util.CompareToBuilder;
 60  
 import net.sf.classifier4J.util.EqualsBuilder;
 61  
 import net.sf.classifier4J.util.ToStringBuilder;
 62  
 
 63  
 import org.apache.commons.logging.Log;
 64  
 import org.apache.commons.logging.LogFactory;
 65  
 
 66  
 /**
 67  
  * Represents the probability of a particular word. The user of this object
 68  
  * can either:
 69  
  * <ol>
 70  
  * 		<li>Set a specific probability for a particular word <I>or</I></li>
 71  
  * 		<li>Define the matching and non-matching counts for the particular word. 
 72  
  *        This class then calculates the probability for you.</li>
 73  
  * </ol>
 74  
  * 
 75  
  * @author Nick Lothian
 76  
  * @author Peter Leschev
 77  
  */
 78  
 public class WordProbability implements Comparable, Serializable {
 79  
 
 80  
     private static final int UNDEFINED = -1;
 81  
 
 82  502
     private String word = "";
 83  502
     private String category = ICategorisedClassifier.DEFAULT_CATEGORY;
 84  
 
 85  502
     private long matchingCount = UNDEFINED;
 86  502
     private long nonMatchingCount = UNDEFINED;
 87  
 
 88  502
     private double probability = IClassifier.NEUTRAL_PROBABILITY;
 89  
 
 90  0
     public WordProbability() {
 91  0
         setMatchingCount(0);
 92  0
         setNonMatchingCount(0);
 93  0
     }
 94  
 
 95  0
     public WordProbability(String w) {
 96  0
         setWord(w);
 97  0
         setMatchingCount(0);
 98  0
         setNonMatchingCount(0);
 99  0
     }
 100  
 
 101  0
     public WordProbability(String c, String w) {
 102  0
         setCategory(c);
 103  0
         setWord(w);
 104  0
         setMatchingCount(0);
 105  0
         setNonMatchingCount(0);
 106  0
     }
 107  
 
 108  20
     public WordProbability(String w, double probability) {
 109  20
         setWord(w);
 110  20
         setProbability(probability);
 111  20
     }
 112  
 
 113  482
     public WordProbability(String w, long matchingCount, class="keyword">long nonMatchingCount) {
 114  482
         setWord(w);
 115  482
         setMatchingCount(matchingCount);
 116  482
         setNonMatchingCount(nonMatchingCount);
 117  482
     }
 118  
 
 119  
     public void setWord(String w) {
 120  502
         this.word = w;
 121  502
     }
 122  
 
 123  
     public void setCategory(String category) {
 124  0
         this.category = category;
 125  0
     }
 126  
 
 127  
     public void setProbability(double probability) {
 128  20
         this.probability = probability;
 129  20
         this.matchingCount = UNDEFINED;
 130  20
         this.nonMatchingCount = UNDEFINED;
 131  20
     }
 132  
 
 133  
     public void setMatchingCount(long matchingCount) {
 134  586
         if (matchingCount < 0) {
 135  2
             throw new IllegalArgumentException("matchingCount must be greater than 0");
 136  
         }
 137  584
         this.matchingCount = matchingCount;
 138  584
         calculateProbability();
 139  584
     }
 140  
 
 141  
     public void setNonMatchingCount(long nonMatchingCount) {
 142  1516
         if (nonMatchingCount < 0) {
 143  2
             throw new IllegalArgumentException("nonMatchingCount must be greater than 0");
 144  
         }
 145  1514
         this.nonMatchingCount = nonMatchingCount;
 146  1514
         calculateProbability();
 147  1514
     }
 148  
 
 149  
     public void registerMatch() {
 150  2
         if (matchingCount == Long.MAX_VALUE) {
 151  2
             throw new UnsupportedOperationException("Long.MAX_VALUE reached, can't register more matches");
 152  
         }
 153  0
         matchingCount++;
 154  0
         calculateProbability();
 155  0
     }
 156  
 
 157  
     public void registerNonMatch() {
 158  2
         if (nonMatchingCount == Long.MAX_VALUE) {
 159  2
             throw new UnsupportedOperationException("Long.MAX_VALUE reached, can't register more matches");
 160  
         }
 161  0
         nonMatchingCount++;
 162  0
         calculateProbability();
 163  0
     }
 164  
 
 165  
     private void calculateProbability() {
 166  
         // the logger can't be a field because this class might be serialized 
 167  2098
         Log log = LogFactory.getLog(this.getClass());
 168  
 
 169  2098
         String method = "calculateProbability() ";
 170  
 
 171  2098
         if (log.isDebugEnabled()) {
 172  0
             log.debug(method + "START");
 173  
 
 174  0
             log.debug(method + "matchingCount = " + matchingCount);
 175  0
             log.debug(method + "nonMatchingCount = " + nonMatchingCount);
 176  
         }
 177  
 
 178  2098
         double result = IClassifier.NEUTRAL_PROBABILITY;
 179  
 
 180  2098
         if (matchingCount == 0) {
 181  1430
             if (nonMatchingCount == 0) {
 182  4
                 result = IClassifier.NEUTRAL_PROBABILITY;
 183  
             } else {
 184  1426
                 result = IClassifier.LOWER_BOUND;
 185  
             }
 186  
         } else {
 187  668
             result = BayesianClassifier.normaliseSignificance((double) matchingCount / (class="keyword">double) (matchingCount + nonMatchingCount));
 188  
         }
 189  
 
 190  2098
         probability = result;
 191  
 
 192  2098
         if (log.isDebugEnabled()) {
 193  0
             log.debug(method + "END Calculated [" + probability + "]");
 194  
         }
 195  2098
     }
 196  
 
 197  
     /**
 198  
          * @return
 199  
          */
 200  
     public double getProbability() {
 201  1122
         return probability;
 202  
     }
 203  
 
 204  
     public long getMatchingCount() {
 205  
 
 206  116
         if (matchingCount == UNDEFINED) {
 207  2
             throw new UnsupportedOperationException("MatchingCount has not been defined");
 208  
         }
 209  
 
 210  114
         return matchingCount;
 211  
     }
 212  
 
 213  
     public long getNonMatchingCount() {
 214  
 
 215  1046
         if (nonMatchingCount == UNDEFINED) {
 216  2
             throw new UnsupportedOperationException("nonMatchingCount has not been defined");
 217  
         }
 218  
 
 219  1044
         return nonMatchingCount;
 220  
     }
 221  
 
 222  
     /**
 223  
      * @return
 224  
      */
 225  
     public String getWord() {
 226  1614
         return word;
 227  
     }
 228  
 
 229  
     public String getCategory() {
 230  12
         return category;
 231  
     }
 232  
 
 233  
     public boolean equals(Object o) {
 234  2
         if (!(o instanceof WordProbability)) {
 235  0
             return false;
 236  
         }
 237  2
         WordProbability rhs = (WordProbability) o;
 238  2
         return new EqualsBuilder().append(getWord(), rhs.getWord()).append(getCategory(), rhs.getCategory()).isEquals();
 239  
     }
 240  
 
 241  
     public int compareTo(java.lang.Object o) {
 242  6
         if (!(o instanceof WordProbability)) {
 243  2
             throw new ClassCastException(o.getClass() + " is not a " + this.getClass());
 244  
         }
 245  4
         WordProbability rhs = (WordProbability) o;
 246  4
         return new CompareToBuilder().append(this.getCategory(), rhs.getCategory()).append(class="keyword">this.getWord(), rhs.getWord()).toComparison();
 247  
     }
 248  
 
 249  
     public String toString() {
 250  0
         return new ToStringBuilder(this).append("word", word).append("category", category).append("probability", probability).append("matchingCount", matchingCount).append("nonMatchingCount", nonMatchingCount).toString();
 251  
     }
 252  
 
 253  
     public int hashCode() {
 254  
         // you pick a hard-coded, randomly chosen, non-zero, odd number
 255  
         // ideally different for each class
 256  0
         return new HashCodeBuilder(17, 37).append(word).append(category).toHashCode();
 257  
     }
 258  
 }

This report is generated by jcoverage, Maven and Maven JCoverage Plugin.