View Javadoc

1   /*
2    * ====================================================================
3    * 
4    * The Apache Software License, Version 1.1
5    *
6    * Copyright (c) 2003 Nick Lothian. All rights reserved.
7    *
8    * Redistribution and use in source and binary forms, with or without
9    * modification, are permitted provided that the following conditions
10   * are met:
11   *
12   * 1. Redistributions of source code must retain the above copyright
13   *    notice, this list of conditions and the following disclaimer. 
14   *
15   * 2. Redistributions in binary form must reproduce the above copyright
16   *    notice, this list of conditions and the following disclaimer in
17   *    the documentation and/or other materials provided with the
18   *    distribution.
19   *
20   * 3. The end-user documentation included with the redistribution, if
21   *    any, must include the following acknowlegement:  
22   *       "This product includes software developed by the 
23   *        developers of Classifier4J (http://classifier4j.sf.net/)."
24   *    Alternately, this acknowlegement may appear in the software itself,
25   *    if and wherever such third-party acknowlegements normally appear.
26   *
27   * 4. The name "Classifier4J" must not be used to endorse or promote 
28   *    products derived from this software without prior written 
29   *    permission. For written permission, please contact   
30   *    http://sourceforge.net/users/nicklothian/.
31   *
32   * 5. Products derived from this software may not be called 
33   *    "Classifier4J", nor may "Classifier4J" appear in their names 
34   *    without prior written permission. For written permission, please 
35   *    contact http://sourceforge.net/users/nicklothian/.
36   *
37   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
41   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
44   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
46   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
47   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48   * SUCH DAMAGE.
49   * ====================================================================
50   */
51  
52  package net.sf.classifier4J;
53  
54  /*** 
55   * <p>Defines an interface for the classification of Strings.</p>
56   * 
57   * <p>Use the isMatch method if you want true/false matching, or use the
58   * classify method if you match probability.</p>
59   * 
60   * <p>The isMatch method will return a boolean representing if a string
61   * matches whatever criteria the implmentation is matching on. In the 
62   * default implemnetation this is done by calling classify and checking 
63   * if the returned match probability is greater than or equal to DEFAULT_CUTOFF.</p>
64   * 
65   * <p>The classify method will return a double value representing the
66   * likelyhood that the string passed to it is a match on whatever 
67   * criteria the implmentation is matching on.</p> 
68   * 
69   * <p>When implementing this class, it is recommended that 
70   * the classify method should not return the values 1 or 0 
71   * except in the cases there the classifier can guarentee that the string is 
72   * a match doe does not match. For non-absolute matching algorithms LOWER_BOUND
73   * and UPPER_BOUND should be used.</p> 
74   * 
75   * @author Nick Lothian
76   * @author Peter Leschev
77   * 
78   * @see AbstractClassifier
79   */
80  public interface IClassifier {
81      /***
82       * Default value to use if the implementation cannot work out how
83       * well a string matches.
84       */
85      public static double NEUTRAL_PROBABILITY = 0.5d;
86  
87      /***
88       * The minimum likelyhood that a string matches
89       */
90      public static double LOWER_BOUND = 0.01d;
91  
92      /***
93       * The maximum likelyhood that a string matches
94       */
95      public static double UPPER_BOUND = 0.99d;
96  
97      /***
98       * Default cutoff value used by defautl implmentation of 
99       * isMatch. Any match probability greater than or equal to this
100      * value will be classified as a match. 
101      * 
102      * The value is 0.9d
103      * 
104      */
105     public static double DEFAULT_CUTOFF = 0.9d;
106 
107     /***
108      * 
109      * Sets the cutoff below which the input is not considered a match
110      * 
111      * @param cutoff the level below which isMatch will return false. Should be between 0 and 1.
112      */
113     public void setMatchCutoff(double cutoff);
114 
115     /***
116      *
117      * Function to determine the probability string matches a criteria.
118      *   
119      * @param input the string to classify
120      * @return the likelyhood that this string is a match for this net.sf.classifier4J. 1 means 100% likely.
121          *
122          * @throws ClassifierException If a non-recoverable problem occurs
123      */
124     public double classify(String input) throws ClassifierException;
125 
126     /***
127      * 
128      * Function to determine if a string matches a criteria.
129      * 
130      * @param input the string to classify
131      * @return true if the input string has a probability >= the cutoff probability of 
132      * matching
133          *
134          * @throws ClassifierException If a non-recoverable problem occurs
135      */
136     public boolean isMatch(String input) throws ClassifierException;
137 
138     /***
139      * Convenience method which takes a match probability
140      * (calculated by {@link net.sf.classifier4J.IClassifier#classify(java.lang.String)})
141      * and checks if it would be classified as a match or not
142      * 
143      * @param matchProbability 
144      * @return true if match, false otherwise
145      */
146     public boolean isMatch(double matchProbability);
147 }