protected class WebcrawlerConnector.ProcessActivityHTMLHandler extends WebcrawlerConnector.ProcessActivityLinkHandler implements IHTMLHandler
activities, baseDocumentIdentifier, contextDescription, documentIdentifier, filter, linkType| Constructor and Description |
|---|
ProcessActivityHTMLHandler(java.lang.String documentIdentifier,
org.apache.manifoldcf.crawler.interfaces.IProcessActivity activities,
WebcrawlerConnector.DocumentURLFilter filter,
int metaRobotTagsUsage)
Constructor.
|
| Modifier and Type | Method and Description |
|---|---|
void |
finishUp()
Done with the document.
|
void |
noteAHREF(java.lang.String rawURL)
Note discovered href
|
void |
noteBASEHREF(java.lang.String rawURL)
Note discovered base
|
void |
noteFormEnd()
Note the end of a form
|
void |
noteFormInput(java.util.Map inputAttributes)
Note an input tag
|
void |
noteFormStart(java.util.Map formAttributes)
Note the start of a form
|
void |
noteFRAMESRC(java.lang.String rawURL)
Note discovered FRAME SRC
|
void |
noteIMGSRC(java.lang.String rawURL)
Note discovered IMG SRC
|
void |
noteLINKHREF(java.lang.String rawURL)
Note discovered href
|
void |
noteMetaTag(java.util.Map metaAttributes)
Note a meta tag
|
void |
noteTextCharacter(char textCharacter)
Note a character of text.
|
boolean |
shouldIndex()
Decide whether we should index.
|
noteDiscoveredBase, noteDiscoveredLinkclone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, waitnoteDiscoveredBase, noteDiscoveredLinkpublic ProcessActivityHTMLHandler(java.lang.String documentIdentifier,
org.apache.manifoldcf.crawler.interfaces.IProcessActivity activities,
WebcrawlerConnector.DocumentURLFilter filter,
int metaRobotTagsUsage)
public boolean shouldIndex()
public void noteTextCharacter(char textCharacter)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteTextCharacter in interface IHTMLHandlerorg.apache.manifoldcf.core.interfaces.ManifoldCFExceptionpublic void noteMetaTag(java.util.Map metaAttributes)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteMetaTag in interface IMetaTagHandlermetaAttributes - are the attributes that belong to the tag.org.apache.manifoldcf.core.interfaces.ManifoldCFExceptionpublic void noteFormStart(java.util.Map formAttributes)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteFormStart in interface IHTMLHandlerorg.apache.manifoldcf.core.interfaces.ManifoldCFExceptionpublic void noteFormInput(java.util.Map inputAttributes)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteFormInput in interface IHTMLHandlerorg.apache.manifoldcf.core.interfaces.ManifoldCFExceptionpublic void noteFormEnd()
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteFormEnd in interface IHTMLHandlerorg.apache.manifoldcf.core.interfaces.ManifoldCFExceptionpublic void noteBASEHREF(java.lang.String rawURL)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteBASEHREF in interface IHTMLHandlerorg.apache.manifoldcf.core.interfaces.ManifoldCFExceptionpublic void noteAHREF(java.lang.String rawURL)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteAHREF in interface IHTMLHandlerorg.apache.manifoldcf.core.interfaces.ManifoldCFExceptionpublic void noteLINKHREF(java.lang.String rawURL)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteLINKHREF in interface IHTMLHandlerorg.apache.manifoldcf.core.interfaces.ManifoldCFExceptionpublic void noteIMGSRC(java.lang.String rawURL)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteIMGSRC in interface IHTMLHandlerorg.apache.manifoldcf.core.interfaces.ManifoldCFExceptionpublic void noteFRAMESRC(java.lang.String rawURL)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteFRAMESRC in interface IHTMLHandlerorg.apache.manifoldcf.core.interfaces.ManifoldCFExceptionpublic void finishUp()
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
IHTMLHandlerfinishUp in interface IHTMLHandlerorg.apache.manifoldcf.core.interfaces.ManifoldCFException