martin.common
Class ExtractAbbrev

java.lang.Object
  extended by martin.common.ExtractAbbrev

public class ExtractAbbrev
extends java.lang.Object

The ExtractAbbrev class implements a simple algorithm for extraction of abbreviations and their definitions from biomedical text. Abbreviations (short forms) are extracted from the input file, and those abbreviations for which a definition (long form) is found are printed out, along with that definition, one per line. A file consisting of short-form/long-form pairs (tab separated) can be specified in tandem with the -testlist option for the purposes of evaluating the algorithm.

Version:
03/12/03
Author:
Ariel Schwartz
See Also:
A Simple Algorithm for Identifying Abbreviation Definitions in Biomedical Text A.S. Schwartz, M.A. Hearst; Pacific Symposium on Biocomputing 8:451-462(2003) for a detailed description of the algorithm. http://biotext.berkeley.edu/software.html

Field Summary
(package private)  char delimiter
           
(package private)  int falseNegatives
           
(package private)  int falsePositives
           
(package private)  java.util.HashMap mStats
           
(package private)  java.util.HashMap mTestDefinitions
           
(package private)  boolean testMode
           
(package private)  int trueNegatives
           
(package private)  int truePositives
           
 
Constructor Summary
ExtractAbbrev()
           
 
Method Summary
private  Pair<java.lang.String> extractAbbrPair(java.lang.String shortForm, java.lang.String longForm)
           
 java.util.Map<java.lang.String,java.lang.String> extractAbbrPairs(java.io.File inFile)
           
private  java.lang.String findBestLongForm(java.lang.String shortForm, java.lang.String longForm)
           
private  boolean hasCapital(java.lang.String str)
           
private  boolean hasLetter(java.lang.String str)
           
private  boolean isTrueDefinition(java.lang.String shortForm, java.lang.String longForm)
           
private  boolean isValidShortForm(java.lang.String str)
           
private  void loadTrueDefinitions(java.lang.String inFile)
           
static void main(java.lang.String[] args)
           
private static void usage()
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

mTestDefinitions

java.util.HashMap mTestDefinitions

mStats

java.util.HashMap mStats

truePositives

int truePositives

falsePositives

int falsePositives

falseNegatives

int falseNegatives

trueNegatives

int trueNegatives

delimiter

char delimiter

testMode

boolean testMode
Constructor Detail

ExtractAbbrev

public ExtractAbbrev()
Method Detail

isValidShortForm

private boolean isValidShortForm(java.lang.String str)

hasLetter

private boolean hasLetter(java.lang.String str)

hasCapital

private boolean hasCapital(java.lang.String str)

loadTrueDefinitions

private void loadTrueDefinitions(java.lang.String inFile)

isTrueDefinition

private boolean isTrueDefinition(java.lang.String shortForm,
                                 java.lang.String longForm)

extractAbbrPairs

public java.util.Map<java.lang.String,java.lang.String> extractAbbrPairs(java.io.File inFile)

findBestLongForm

private java.lang.String findBestLongForm(java.lang.String shortForm,
                                          java.lang.String longForm)

extractAbbrPair

private Pair<java.lang.String> extractAbbrPair(java.lang.String shortForm,
                                               java.lang.String longForm)

usage

private static void usage()

main

public static void main(java.lang.String[] args)