|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectuk.ac.man.entitytagger.evaluate.Evaluate
public class Evaluate
Nested Class Summary | |
---|---|
(package private) class |
Evaluate.Tag
Simple dataholder class for a single mention |
Constructor Summary | |
---|---|
Evaluate()
|
Method Summary | |
---|---|
private java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.util.List<Evaluate.Tag>>> |
convert(java.util.Map<java.lang.String,java.util.List<Mention>> hash)
|
private static void |
filterTagsByDocLength(java.util.Map<java.lang.String,java.util.List<Mention>> mainTaggedSet,
java.util.Map<java.lang.String,java.lang.Integer> docLengthFilters)
|
static void |
filterTagsByRegexp(java.util.Map<java.lang.String,java.util.List<Mention>> mainTaggedSet,
java.util.Map<java.lang.String,java.util.regex.Pattern> filters)
|
(package private) static java.util.Set<java.lang.String> |
getDocumentSelection(java.lang.String[] documents,
int n)
Randomly returns n document ids from an array of document ids. |
private static java.util.Set<java.lang.String> |
getValidDocs(java.util.Set<java.lang.String> mainSet,
java.util.Set<java.lang.String> mainTaggedSet,
java.util.Set<java.lang.String> refSet,
java.util.Set<java.lang.String> refTaggedSet,
java.util.HashMap<java.lang.String,java.lang.String> articleConversions)
Function which given document id sets will return a set of IDs that are relevant for evaluation. |
private static java.util.HashMap<java.lang.String,java.lang.Integer> |
loadDocLengthFilters(DocumentIterator documents)
|
private static java.util.Set<java.lang.String> |
loadDocumentIDSet(java.io.File file)
|
private static java.util.HashMap<java.lang.String,java.lang.String> |
loadIndexfile(java.io.File file)
Loads an index file correlating two sets of document ids with eachother, so that they can be mapped to eachother (becoming equivalent during the evaluation) |
private static java.util.Set<java.lang.String> |
loadValidEntities(java.io.File file,
java.lang.String prefix)
Loads a list of entity ids that should be processed during evaluation (anything else will be ignored) |
static void |
main(java.lang.String[] args)
|
private static void |
printEffectiveStats(java.lang.String title,
java.util.Map<java.lang.String,java.util.List<Mention>> mainTaggedSet,
java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.util.List<Evaluate.Tag>>> mainTagsBySpecies,
java.util.Set<java.lang.String> doclist_a,
java.util.Set<java.lang.String> doclist_b,
java.util.Map<java.lang.String,java.lang.String> conversionMap)
Function which will print a few statistics to System.out |
(package private) Result[] |
process(java.util.Map<java.lang.String,java.util.List<Mention>> mainTags,
java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.util.List<Evaluate.Tag>>> mainTagsByDoc,
java.util.Map<java.lang.String,java.util.List<Mention>> refTags,
java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.util.List<Evaluate.Tag>>> refTagsByDoc,
java.util.Map<java.lang.String,java.lang.String> articleConversionMap,
java.util.Set<java.lang.String> validEntities,
java.io.File logFile,
boolean print,
java.util.Set<java.lang.String> validDocIDs,
java.lang.String title)
The main evaluation processing method |
private static void |
reduceIDs(java.util.Map<java.lang.String,java.util.List<Mention>> mainTaggedSet)
|
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Constructor Detail |
---|
public Evaluate()
Method Detail |
---|
Result[] process(java.util.Map<java.lang.String,java.util.List<Mention>> mainTags, java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.util.List<Evaluate.Tag>>> mainTagsByDoc, java.util.Map<java.lang.String,java.util.List<Mention>> refTags, java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.util.List<Evaluate.Tag>>> refTagsByDoc, java.util.Map<java.lang.String,java.lang.String> articleConversionMap, java.util.Set<java.lang.String> validEntities, java.io.File logFile, boolean print, java.util.Set<java.lang.String> validDocIDs, java.lang.String title)
mainTags
- main tag setmainTagsByDoc
- main tag set, separated by documentrefTags
- reference (gold-standard) tag setrefTagsByDoc
- reference (gold-standard) tag set, separated by documentarticleConversionMap
- mapping between equivalent article IDs (e.g. PMID <-> PMCID)validEntities
- a set of all entities that are valid (anything not in this set is ignored)logFile
- the file where a list of TPs, FPs and FNs should be listed (may be null)print
- if true, will print evaluation results to System.outvalidDocIDs
- similar to validEntities, a set of document IDs that are valid (mentions from any documents not in this set will be ignored)title
- job title (will be printed before results)
private java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.util.List<Evaluate.Tag>>> convert(java.util.Map<java.lang.String,java.util.List<Mention>> hash)
private static java.util.HashMap<java.lang.String,java.lang.String> loadIndexfile(java.io.File file)
file
-
private static java.util.Set<java.lang.String> loadDocumentIDSet(java.io.File file)
private static java.util.Set<java.lang.String> loadValidEntities(java.io.File file, java.lang.String prefix)
file
- prefix
- prefix which will be added to the beginning of each ID (may be null)
static java.util.Set<java.lang.String> getDocumentSelection(java.lang.String[] documents, int n)
documents
- an array of document idsn
-
public static void main(java.lang.String[] args)
args
- private static void reduceIDs(java.util.Map<java.lang.String,java.util.List<Mention>> mainTaggedSet)
private static void filterTagsByDocLength(java.util.Map<java.lang.String,java.util.List<Mention>> mainTaggedSet, java.util.Map<java.lang.String,java.lang.Integer> docLengthFilters)
private static java.util.HashMap<java.lang.String,java.lang.Integer> loadDocLengthFilters(DocumentIterator documents)
public static void filterTagsByRegexp(java.util.Map<java.lang.String,java.util.List<Mention>> mainTaggedSet, java.util.Map<java.lang.String,java.util.regex.Pattern> filters)
mainTaggedSet
- filters
- private static void printEffectiveStats(java.lang.String title, java.util.Map<java.lang.String,java.util.List<Mention>> mainTaggedSet, java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.util.List<Evaluate.Tag>>> mainTagsBySpecies, java.util.Set<java.lang.String> doclist_a, java.util.Set<java.lang.String> doclist_b, java.util.Map<java.lang.String,java.lang.String> conversionMap)
title
- mainTaggedSet
- mainTagsBySpecies
- doclist_a
- doclist_b
- conversionMap
- private static java.util.Set<java.lang.String> getValidDocs(java.util.Set<java.lang.String> mainSet, java.util.Set<java.lang.String> mainTaggedSet, java.util.Set<java.lang.String> refSet, java.util.Set<java.lang.String> refTaggedSet, java.util.HashMap<java.lang.String,java.lang.String> articleConversions)
mainSet
- the set of document IDs in our main set that could _potentially_ have been tagged (some may not have any tags due to not containing any entities).mainTaggedSet
- the set of document IDS in our main set that have been tagged as containing entitiesrefSet
- the set of document IDs in our reference set that could _potentially_ have been tagged (some may not have any tags due to not containing any entities).refTaggedSet
- the set of document IDS in our reference set that have been tagged as containing entitiesarticleConversions
- String <-> String conversion map, for mapping e.g. PMIDs to PMCIDs.
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |