|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectuk.ac.man.entitytagger.evaluate.Evaluate
public class Evaluate
| Nested Class Summary | |
|---|---|
(package private) class |
Evaluate.Tag
Simple dataholder class for a single mention |
| Constructor Summary | |
|---|---|
Evaluate()
|
|
| Method Summary | |
|---|---|
private java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.util.List<Evaluate.Tag>>> |
convert(java.util.Map<java.lang.String,java.util.List<Mention>> hash)
|
private static void |
filterTagsByDocLength(java.util.Map<java.lang.String,java.util.List<Mention>> mainTaggedSet,
java.util.Map<java.lang.String,java.lang.Integer> docLengthFilters)
|
static void |
filterTagsByRegexp(java.util.Map<java.lang.String,java.util.List<Mention>> mainTaggedSet,
java.util.Map<java.lang.String,java.util.regex.Pattern> filters)
|
(package private) static java.util.Set<java.lang.String> |
getDocumentSelection(java.lang.String[] documents,
int n)
Randomly returns n document ids from an array of document ids. |
private static java.util.Set<java.lang.String> |
getValidDocs(java.util.Set<java.lang.String> mainSet,
java.util.Set<java.lang.String> mainTaggedSet,
java.util.Set<java.lang.String> refSet,
java.util.Set<java.lang.String> refTaggedSet,
java.util.HashMap<java.lang.String,java.lang.String> articleConversions)
Function which given document id sets will return a set of IDs that are relevant for evaluation. |
private static java.util.HashMap<java.lang.String,java.lang.Integer> |
loadDocLengthFilters(DocumentIterator documents)
|
private static java.util.Set<java.lang.String> |
loadDocumentIDSet(java.io.File file)
|
private static java.util.HashMap<java.lang.String,java.lang.String> |
loadIndexfile(java.io.File file)
Loads an index file correlating two sets of document ids with eachother, so that they can be mapped to eachother (becoming equivalent during the evaluation) |
private static java.util.Set<java.lang.String> |
loadValidEntities(java.io.File file,
java.lang.String prefix)
Loads a list of entity ids that should be processed during evaluation (anything else will be ignored) |
static void |
main(java.lang.String[] args)
|
private static void |
printEffectiveStats(java.lang.String title,
java.util.Map<java.lang.String,java.util.List<Mention>> mainTaggedSet,
java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.util.List<Evaluate.Tag>>> mainTagsBySpecies,
java.util.Set<java.lang.String> doclist_a,
java.util.Set<java.lang.String> doclist_b,
java.util.Map<java.lang.String,java.lang.String> conversionMap)
Function which will print a few statistics to System.out |
(package private) Result[] |
process(java.util.Map<java.lang.String,java.util.List<Mention>> mainTags,
java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.util.List<Evaluate.Tag>>> mainTagsByDoc,
java.util.Map<java.lang.String,java.util.List<Mention>> refTags,
java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.util.List<Evaluate.Tag>>> refTagsByDoc,
java.util.Map<java.lang.String,java.lang.String> articleConversionMap,
java.util.Set<java.lang.String> validEntities,
java.io.File logFile,
boolean print,
java.util.Set<java.lang.String> validDocIDs,
java.lang.String title)
The main evaluation processing method |
private static void |
reduceIDs(java.util.Map<java.lang.String,java.util.List<Mention>> mainTaggedSet)
|
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Constructor Detail |
|---|
public Evaluate()
| Method Detail |
|---|
Result[] process(java.util.Map<java.lang.String,java.util.List<Mention>> mainTags,
java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.util.List<Evaluate.Tag>>> mainTagsByDoc,
java.util.Map<java.lang.String,java.util.List<Mention>> refTags,
java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.util.List<Evaluate.Tag>>> refTagsByDoc,
java.util.Map<java.lang.String,java.lang.String> articleConversionMap,
java.util.Set<java.lang.String> validEntities,
java.io.File logFile,
boolean print,
java.util.Set<java.lang.String> validDocIDs,
java.lang.String title)
mainTags - main tag setmainTagsByDoc - main tag set, separated by documentrefTags - reference (gold-standard) tag setrefTagsByDoc - reference (gold-standard) tag set, separated by documentarticleConversionMap - mapping between equivalent article IDs (e.g. PMID <-> PMCID)validEntities - a set of all entities that are valid (anything not in this set is ignored)logFile - the file where a list of TPs, FPs and FNs should be listed (may be null)print - if true, will print evaluation results to System.outvalidDocIDs - similar to validEntities, a set of document IDs that are valid (mentions from any documents not in this set will be ignored)title - job title (will be printed before results)
private java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.util.List<Evaluate.Tag>>> convert(java.util.Map<java.lang.String,java.util.List<Mention>> hash)
private static java.util.HashMap<java.lang.String,java.lang.String> loadIndexfile(java.io.File file)
file -
private static java.util.Set<java.lang.String> loadDocumentIDSet(java.io.File file)
private static java.util.Set<java.lang.String> loadValidEntities(java.io.File file,
java.lang.String prefix)
file - prefix - prefix which will be added to the beginning of each ID (may be null)
static java.util.Set<java.lang.String> getDocumentSelection(java.lang.String[] documents,
int n)
documents - an array of document idsn -
public static void main(java.lang.String[] args)
args - private static void reduceIDs(java.util.Map<java.lang.String,java.util.List<Mention>> mainTaggedSet)
private static void filterTagsByDocLength(java.util.Map<java.lang.String,java.util.List<Mention>> mainTaggedSet,
java.util.Map<java.lang.String,java.lang.Integer> docLengthFilters)
private static java.util.HashMap<java.lang.String,java.lang.Integer> loadDocLengthFilters(DocumentIterator documents)
public static void filterTagsByRegexp(java.util.Map<java.lang.String,java.util.List<Mention>> mainTaggedSet,
java.util.Map<java.lang.String,java.util.regex.Pattern> filters)
mainTaggedSet - filters -
private static void printEffectiveStats(java.lang.String title,
java.util.Map<java.lang.String,java.util.List<Mention>> mainTaggedSet,
java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.util.List<Evaluate.Tag>>> mainTagsBySpecies,
java.util.Set<java.lang.String> doclist_a,
java.util.Set<java.lang.String> doclist_b,
java.util.Map<java.lang.String,java.lang.String> conversionMap)
title - mainTaggedSet - mainTagsBySpecies - doclist_a - doclist_b - conversionMap -
private static java.util.Set<java.lang.String> getValidDocs(java.util.Set<java.lang.String> mainSet,
java.util.Set<java.lang.String> mainTaggedSet,
java.util.Set<java.lang.String> refSet,
java.util.Set<java.lang.String> refTaggedSet,
java.util.HashMap<java.lang.String,java.lang.String> articleConversions)
mainSet - the set of document IDs in our main set that could _potentially_ have been tagged (some may not have any tags due to not containing any entities).mainTaggedSet - the set of document IDS in our main set that have been tagged as containing entitiesrefSet - the set of document IDs in our reference set that could _potentially_ have been tagged (some may not have any tags due to not containing any entities).refTaggedSet - the set of document IDS in our reference set that have been tagged as containing entitiesarticleConversions - String <-> String conversion map, for mapping e.g. PMIDs to PMCIDs.
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||