1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52 package net.sf.classifier4J;
53
54 /***
55 * <p>Defines an interface for the classification of Strings.</p>
56 *
57 * <p>Use the isMatch method if you want true/false matching, or use the
58 * classify method if you match probability.</p>
59 *
60 * <p>The isMatch method will return a boolean representing if a string
61 * matches whatever criteria the implmentation is matching on. In the
62 * default implemnetation this is done by calling classify and checking
63 * if the returned match probability is greater than or equal to DEFAULT_CUTOFF.</p>
64 *
65 * <p>The classify method will return a double value representing the
66 * likelyhood that the string passed to it is a match on whatever
67 * criteria the implmentation is matching on.</p>
68 *
69 * <p>When implementing this class, it is recommended that
70 * the classify method should not return the values 1 or 0
71 * except in the cases there the classifier can guarentee that the string is
72 * a match doe does not match. For non-absolute matching algorithms LOWER_BOUND
73 * and UPPER_BOUND should be used.</p>
74 *
75 * @author Nick Lothian
76 * @author Peter Leschev
77 *
78 * @see AbstractClassifier
79 */
80 public interface IClassifier {
81 /***
82 * Default value to use if the implementation cannot work out how
83 * well a string matches.
84 */
85 public static double NEUTRAL_PROBABILITY = 0.5d;
86
87 /***
88 * The minimum likelyhood that a string matches
89 */
90 public static double LOWER_BOUND = 0.01d;
91
92 /***
93 * The maximum likelyhood that a string matches
94 */
95 public static double UPPER_BOUND = 0.99d;
96
97 /***
98 * Default cutoff value used by defautl implmentation of
99 * isMatch. Any match probability greater than or equal to this
100 * value will be classified as a match.
101 *
102 * The value is 0.9d
103 *
104 */
105 public static double DEFAULT_CUTOFF = 0.9d;
106
107 /***
108 *
109 * Sets the cutoff below which the input is not considered a match
110 *
111 * @param cutoff the level below which isMatch will return false. Should be between 0 and 1.
112 */
113 public void setMatchCutoff(double cutoff);
114
115 /***
116 *
117 * Function to determine the probability string matches a criteria.
118 *
119 * @param input the string to classify
120 * @return the likelyhood that this string is a match for this net.sf.classifier4J. 1 means 100% likely.
121 *
122 * @throws ClassifierException If a non-recoverable problem occurs
123 */
124 public double classify(String input) throws ClassifierException;
125
126 /***
127 *
128 * Function to determine if a string matches a criteria.
129 *
130 * @param input the string to classify
131 * @return true if the input string has a probability >= the cutoff probability of
132 * matching
133 *
134 * @throws ClassifierException If a non-recoverable problem occurs
135 */
136 public boolean isMatch(String input) throws ClassifierException;
137
138 /***
139 * Convenience method which takes a match probability
140 * (calculated by {@link net.sf.classifier4J.IClassifier#classify(java.lang.String)})
141 * and checks if it would be classified as a match or not
142 *
143 * @param matchProbability
144 * @return true if match, false otherwise
145 */
146 public boolean isMatch(double matchProbability);
147 }