-
Notifications
You must be signed in to change notification settings - Fork 22
Expand file tree
/
Copy pathStringDistanceTeacher.java
More file actions
54 lines (44 loc) · 1.86 KB
/
StringDistanceTeacher.java
File metadata and controls
54 lines (44 loc) · 1.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
package com.wcohen.ss.api;
/**
* Train a StringDistanceLearner and return the learned
* StringDistance, using some unspecified source of information to
* train the learner.
*
*/
public abstract class StringDistanceTeacher
{
final public StringDistance train(StringDistanceLearner learner)
{
// TFIDF-style 'training' based on observing corpus statistics
learner.setStringWrapperPool( learner.prepare(stringWrapperIterator()) );
// provide examples for unsupervised/semi-supervised training
learner.setDistanceInstancePool( learner.prepare(distanceInstancePool() ));
// supervised training
for (DistanceInstanceIterator i=distanceExamplePool(); i.hasNext(); ) {
learner.addExample( i.nextDistanceInstance() );
}
// active or passive learning from labeled data
while (learner.hasNextQuery() && hasAnswers()) {
DistanceInstance query = learner.nextQuery();
DistanceInstance answeredQuery = labelInstance(query);
if (answeredQuery!=null) {
learner.addExample( answeredQuery );
}
}
// final result
return learner.getDistance();
}
/** Strings over which distances will be computed. */
abstract protected StringWrapperIterator stringWrapperIterator();
/** A pool of unlabeled pairs of strings over which distances will be computed,
* to be used for active or semi-supervised learning. */
abstract protected DistanceInstanceIterator distanceInstancePool();
/** A pool of unlabeled pairs of strings over which distances will be computed,
* to be used for supervised learning. */
abstract protected DistanceInstanceIterator distanceExamplePool();
/** Label an instance queried by the learner. Return null if the query
* can't be answered. */
abstract protected DistanceInstance labelInstance(DistanceInstance distanceInstance);
/** Return true if this teacher can answer more queries. */
abstract protected boolean hasAnswers();
}