uk.ac.man.documentparser.input
Class PMC
java.lang.Object
uk.ac.man.documentparser.input.PMC
- All Implemented Interfaces:
- java.lang.Iterable<Document>, java.util.Iterator<Document>, DocumentIterator
public class PMC
- extends java.lang.Object
- implements DocumentIterator
Constructor Summary |
PMC(java.io.File xmlLocation,
java.lang.String[] dtdLocations)
|
PMC(java.lang.StringBuffer data,
java.lang.String[] dtdLocation)
|
PMC(java.lang.String basePath,
java.lang.String[] dtdLocation,
java.lang.String pmcID,
boolean hasXML,
boolean hasXMLBody,
boolean hasOCR,
boolean hasPTT)
|
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
doc
private org.w3c.dom.Document doc
numArticles
private int numArticles
nextArticle
private int nextArticle
basePath
private java.lang.String basePath
pmcID
private java.lang.String pmcID
hasOCR
private boolean hasOCR
hasPTT
private boolean hasPTT
staticid
private static int staticid
xml
private java.lang.String xml
PMC
public PMC(java.io.File xmlLocation,
java.lang.String[] dtdLocations)
PMC
public PMC(java.lang.StringBuffer data,
java.lang.String[] dtdLocation)
PMC
public PMC(java.lang.String basePath,
java.lang.String[] dtdLocation,
java.lang.String pmcID,
boolean hasXML,
boolean hasXMLBody,
boolean hasOCR,
boolean hasPTT)
iterator
public java.util.Iterator<Document> iterator()
- Specified by:
iterator
in interface java.lang.Iterable<Document>
hasNext
public boolean hasNext()
- Specified by:
hasNext
in interface java.util.Iterator<Document>
getSection
private Section[] getSection(org.w3c.dom.NodeList mainElement)
loadFile
private java.lang.String loadFile(java.io.File f)
next
public Document next()
- Specified by:
next
in interface java.util.Iterator<Document>
removeSections
private void removeSections(Section[] sections,
java.lang.String keyword)
remove
public void remove()
- Specified by:
remove
in interface java.util.Iterator<Document>
skip
public void skip()
- Specified by:
skip
in interface DocumentIterator