|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object uk.ac.man.entitytagger.Mention
public class Mention
Class for representing a text match, containing the matched text, document coordinates and normalized IDs.
Nested Class Summary | |
---|---|
private class |
Mention.IDPair
|
class |
Mention.SimpleMention
|
Field Summary | |
---|---|
private java.lang.String |
comment
|
static java.lang.String |
COMMENT_SEPARATOR
|
private java.lang.String |
docid
|
private int |
end
|
private java.lang.String[] |
idLines
|
private java.lang.String[] |
ids
|
private java.lang.Double[] |
probabilities
|
private static long |
serialVersionUID
|
private int |
start
|
private java.lang.String |
text
|
Constructor Summary | |
---|---|
Mention(java.lang.String[] ids)
|
|
Mention(java.lang.String[] ids,
int start,
int end,
java.lang.String text)
|
|
Mention(java.lang.String id,
int start,
int end,
java.lang.String text)
|
Method Summary | |
---|---|
void |
addToPstmtBatch(java.sql.PreparedStatement pstmt)
Saves the match to a database using PreparedStatements. |
Mention |
clone()
|
int |
compareTo(Mention o)
|
boolean |
containsID(java.lang.String id)
|
void |
disambiguate(java.lang.String id)
If containsID(id) is true, will set ids to id only (any other ids will be deleted), otherwise throws an exception |
boolean |
equals(java.lang.Object o)
|
static Mention |
findClosestMention(java.util.List<Mention> mentions,
int pos)
|
java.lang.String |
getComment()
|
java.lang.String |
getDocid()
|
int |
getEnd()
|
java.lang.String[] |
getIds()
|
java.lang.String |
getIdsToString()
|
java.lang.String[] |
getIdsWithLineNumbers()
|
static java.util.List<Mention> |
getMentionsInRange(java.util.List<Mention> mentions,
int start,
int end)
|
java.lang.String |
getMostProbableID()
|
java.lang.String |
getMostProbableIDWithIdLine()
|
java.lang.Double[] |
getProbabilities()
|
int |
getStart()
|
java.lang.String |
getText()
|
boolean |
isAmbigous()
|
static java.util.List<Mention> |
loadFromFile(java.io.File file)
|
static java.util.List<Mention> |
loadFromFile(java.io.File file,
java.util.Set<java.lang.String> validDocumentIDs,
java.lang.String restrictPostfix,
java.util.HashMap<java.lang.String,java.lang.String> conversionMap)
Will load a set of matches from a file, constrained such that only matches from documents in validDocumentIDs are returned. |
static java.util.Map<java.lang.String,java.util.List<Mention>> |
loadFromFileToHash(java.io.File file,
java.util.Set<java.lang.String> validDocumentIDs,
java.lang.String restrictPostfix,
java.util.HashMap<java.lang.String,java.lang.String> conversionMap)
Loads matches using loafFromFile(file,validDocumentIDs,conversionMap) and then splits them up to allow access by document ID. |
boolean |
overlaps(Mention m2)
|
static boolean |
overlaps(Mention m1,
Mention m2)
|
boolean |
overlapsIgnoreDoc(Mention n)
|
static void |
saveToFile(java.util.ArrayList<Mention> matches,
java.io.File file)
Saves the list of matches to file, sorted by their start and end coordinates. |
static void |
saveToStreamInBCFormat(java.io.BufferedWriter outStream,
java.util.List<Mention> mentions,
java.lang.String restrictBySpecies,
java.util.Map<java.lang.String,java.lang.String> toSpeciesMap)
Saves a list of mentions to a stream, in BioCreative 2 format, suitable for evaluation. |
void |
setComment(java.lang.String comment)
|
void |
setDocid(java.lang.String docid)
|
void |
setEnd(int end)
|
void |
setIds(java.lang.String[] ids)
Will also clear the probabilities. |
void |
setProbabilities(java.lang.Double[] probabilities)
|
void |
setStart(int start)
|
void |
setText(java.lang.String text)
|
Mention.SimpleMention |
simplify(java.util.Map<java.lang.String,java.lang.String> descriptionMap)
|
static void |
sort(java.util.List<Mention> mentions)
|
void |
sortIDsByProbabilities()
|
java.lang.String |
toString()
|
Methods inherited from class java.lang.Object |
---|
finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait |
Field Detail |
---|
private static final long serialVersionUID
public static final java.lang.String COMMENT_SEPARATOR
private int start
private int end
private java.lang.String text
private java.lang.String[] ids
private java.lang.String comment
private java.lang.String docid
private java.lang.Double[] probabilities
private java.lang.String[] idLines
Constructor Detail |
---|
public Mention(java.lang.String[] ids)
public Mention(java.lang.String id, int start, int end, java.lang.String text)
public Mention(java.lang.String[] ids, int start, int end, java.lang.String text)
Method Detail |
---|
public Mention clone()
clone
in class java.lang.Object
public static void saveToStreamInBCFormat(java.io.BufferedWriter outStream, java.util.List<Mention> mentions, java.lang.String restrictBySpecies, java.util.Map<java.lang.String,java.lang.String> toSpeciesMap)
outStream
- mentions
- The mentions, from a _single_ document.restrictBySpecies
- if not null, any mentions not from the specified species will be ignoredtoSpeciesMap
- map from mention ids to species ids, used if restrictBySpecies is not nullpublic boolean containsID(java.lang.String id)
id
-
public Mention.SimpleMention simplify(java.util.Map<java.lang.String,java.lang.String> descriptionMap)
public void disambiguate(java.lang.String id)
id
-
java.lang.IllegalStateException
- if containsID(id) == falsepublic java.lang.String toString()
toString
in class java.lang.Object
public java.lang.String[] getIds()
public int getStart()
public int getEnd()
public boolean isAmbigous()
public boolean equals(java.lang.Object o)
equals
in class java.lang.Object
public static boolean overlaps(Mention m1, Mention m2)
m1
- m2
-
public boolean overlaps(Mention m2)
m2
-
public static java.util.Map<java.lang.String,java.util.List<Mention>> loadFromFileToHash(java.io.File file, java.util.Set<java.lang.String> validDocumentIDs, java.lang.String restrictPostfix, java.util.HashMap<java.lang.String,java.lang.String> conversionMap)
file
- validDocumentIDs
- conversionMap
-
public static java.util.List<Mention> loadFromFile(java.io.File file)
public static java.util.List<Mention> loadFromFile(java.io.File file, java.util.Set<java.lang.String> validDocumentIDs, java.lang.String restrictPostfix, java.util.HashMap<java.lang.String,java.lang.String> conversionMap)
file
- validDocumentIDs
- conversionMap
-
public static void saveToFile(java.util.ArrayList<Mention> matches, java.io.File file)
matches
- file
- public int compareTo(Mention o)
compareTo
in interface java.lang.Comparable<Mention>
public void setStart(int start)
public void setEnd(int end)
public java.lang.String getText()
public java.lang.String getComment()
public void setComment(java.lang.String comment)
comment
- extra data that can be associated with the objectpublic java.lang.String getDocid()
public void setDocid(java.lang.String docid)
docid
- the document id to setpublic java.lang.Double[] getProbabilities()
public void setProbabilities(java.lang.Double[] probabilities)
probabilities
- an array of probabilities associated with the IDs, need to be the same number as the IDspublic void setText(java.lang.String text)
text
- the text to setpublic java.lang.String getMostProbableID()
public void addToPstmtBatch(java.sql.PreparedStatement pstmt)
pstmt
- Statement for inserting the match to a database, with the following fields: 1: entity id, 2: document, 3: start, 4: end, 5: text, 6: commentpublic void setIds(java.lang.String[] ids)
ids
- the ids to setpublic java.lang.String getMostProbableIDWithIdLine()
public java.lang.String[] getIdsWithLineNumbers()
public java.lang.String getIdsToString()
public static Mention findClosestMention(java.util.List<Mention> mentions, int pos)
public static java.util.List<Mention> getMentionsInRange(java.util.List<Mention> mentions, int start, int end)
public void sortIDsByProbabilities()
public static void sort(java.util.List<Mention> mentions)
public boolean overlapsIgnoreDoc(Mention n)
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |