protected class WebcrawlerConnector.ProcessActivityHTMLHandler extends WebcrawlerConnector.ProcessActivityLinkHandler implements IHTMLHandler
activities, baseDocumentIdentifier, contextDescription, documentIdentifier, filter, linkType
Constructor and Description |
---|
ProcessActivityHTMLHandler(java.lang.String documentIdentifier,
org.apache.manifoldcf.crawler.interfaces.IProcessActivity activities,
WebcrawlerConnector.DocumentURLFilter filter,
int metaRobotTagsUsage)
Constructor.
|
Modifier and Type | Method and Description |
---|---|
void |
finishUp()
Done with the document.
|
void |
noteAHREF(java.lang.String rawURL)
Note discovered href
|
void |
noteBASEHREF(java.lang.String rawURL)
Note discovered base
|
void |
noteFormEnd()
Note the end of a form
|
void |
noteFormInput(java.util.Map inputAttributes)
Note an input tag
|
void |
noteFormStart(java.util.Map formAttributes)
Note the start of a form
|
void |
noteFRAMESRC(java.lang.String rawURL)
Note discovered FRAME SRC
|
void |
noteIMGSRC(java.lang.String rawURL)
Note discovered IMG SRC
|
void |
noteLINKHREF(java.lang.String rawURL)
Note discovered href
|
void |
noteMetaTag(java.util.Map metaAttributes)
Note a meta tag
|
void |
noteTextCharacter(char textCharacter)
Note a character of text.
|
boolean |
shouldIndex()
Decide whether we should index.
|
noteDiscoveredBase, noteDiscoveredLink
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
noteDiscoveredBase, noteDiscoveredLink
public ProcessActivityHTMLHandler(java.lang.String documentIdentifier, org.apache.manifoldcf.crawler.interfaces.IProcessActivity activities, WebcrawlerConnector.DocumentURLFilter filter, int metaRobotTagsUsage)
public boolean shouldIndex()
public void noteTextCharacter(char textCharacter) throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteTextCharacter
in interface IHTMLHandler
org.apache.manifoldcf.core.interfaces.ManifoldCFException
public void noteMetaTag(java.util.Map metaAttributes) throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteMetaTag
in interface IMetaTagHandler
metaAttributes
- are the attributes that belong to the tag.org.apache.manifoldcf.core.interfaces.ManifoldCFException
public void noteFormStart(java.util.Map formAttributes) throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteFormStart
in interface IHTMLHandler
org.apache.manifoldcf.core.interfaces.ManifoldCFException
public void noteFormInput(java.util.Map inputAttributes) throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteFormInput
in interface IHTMLHandler
org.apache.manifoldcf.core.interfaces.ManifoldCFException
public void noteFormEnd() throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteFormEnd
in interface IHTMLHandler
org.apache.manifoldcf.core.interfaces.ManifoldCFException
public void noteBASEHREF(java.lang.String rawURL) throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteBASEHREF
in interface IHTMLHandler
org.apache.manifoldcf.core.interfaces.ManifoldCFException
public void noteAHREF(java.lang.String rawURL) throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteAHREF
in interface IHTMLHandler
org.apache.manifoldcf.core.interfaces.ManifoldCFException
public void noteLINKHREF(java.lang.String rawURL) throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteLINKHREF
in interface IHTMLHandler
org.apache.manifoldcf.core.interfaces.ManifoldCFException
public void noteIMGSRC(java.lang.String rawURL) throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteIMGSRC
in interface IHTMLHandler
org.apache.manifoldcf.core.interfaces.ManifoldCFException
public void noteFRAMESRC(java.lang.String rawURL) throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteFRAMESRC
in interface IHTMLHandler
org.apache.manifoldcf.core.interfaces.ManifoldCFException
public void finishUp() throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
IHTMLHandler
finishUp
in interface IHTMLHandler
org.apache.manifoldcf.core.interfaces.ManifoldCFException