martin.common
Class ExtractAbbrev
java.lang.Object
martin.common.ExtractAbbrev
public class ExtractAbbrev
- extends java.lang.Object
The ExtractAbbrev class implements a simple algorithm for
extraction of abbreviations and their definitions from biomedical text.
Abbreviations (short forms) are extracted from the input file, and those abbreviations
for which a definition (long form) is found are printed out, along with that definition,
one per line.
A file consisting of short-form/long-form pairs (tab separated) can be specified
in tandem with the -testlist option for the purposes of evaluating the algorithm.
- Version:
- 03/12/03
- Author:
- Ariel Schwartz
- See Also:
A Simple Algorithm for Identifying Abbreviation Definitions in Biomedical Text
A.S. Schwartz, M.A. Hearst; Pacific Symposium on Biocomputing 8:451-462(2003)
for a detailed description of the algorithm.
http://biotext.berkeley.edu/software.html
Method Summary |
private Pair<java.lang.String> |
extractAbbrPair(java.lang.String shortForm,
java.lang.String longForm)
|
java.util.Map<java.lang.String,java.lang.String> |
extractAbbrPairs(java.io.File inFile)
|
private java.lang.String |
findBestLongForm(java.lang.String shortForm,
java.lang.String longForm)
|
private boolean |
hasCapital(java.lang.String str)
|
private boolean |
hasLetter(java.lang.String str)
|
private boolean |
isTrueDefinition(java.lang.String shortForm,
java.lang.String longForm)
|
private boolean |
isValidShortForm(java.lang.String str)
|
private void |
loadTrueDefinitions(java.lang.String inFile)
|
static void |
main(java.lang.String[] args)
|
private static void |
usage()
|
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
mTestDefinitions
java.util.HashMap mTestDefinitions
mStats
java.util.HashMap mStats
truePositives
int truePositives
falsePositives
int falsePositives
falseNegatives
int falseNegatives
trueNegatives
int trueNegatives
delimiter
char delimiter
testMode
boolean testMode
ExtractAbbrev
public ExtractAbbrev()
isValidShortForm
private boolean isValidShortForm(java.lang.String str)
hasLetter
private boolean hasLetter(java.lang.String str)
hasCapital
private boolean hasCapital(java.lang.String str)
loadTrueDefinitions
private void loadTrueDefinitions(java.lang.String inFile)
isTrueDefinition
private boolean isTrueDefinition(java.lang.String shortForm,
java.lang.String longForm)
extractAbbrPairs
public java.util.Map<java.lang.String,java.lang.String> extractAbbrPairs(java.io.File inFile)
findBestLongForm
private java.lang.String findBestLongForm(java.lang.String shortForm,
java.lang.String longForm)
extractAbbrPair
private Pair<java.lang.String> extractAbbrPair(java.lang.String shortForm,
java.lang.String longForm)
usage
private static void usage()
main
public static void main(java.lang.String[] args)