public interface IProcessActivity extends IHistoryActivity, IEventActivity, IAbortActivity, IFingerprintActivity, ICarrydownActivity
Modifier and Type | Field and Description |
---|---|
static java.lang.String |
_rcsid |
BAD_URL, EXCLUDED_CONTENT, EXCLUDED_DATE, EXCLUDED_LENGTH, EXCLUDED_MIMETYPE, EXCLUDED_URL, NULL_URL
Modifier and Type | Method and Description |
---|---|
void |
addDocumentReference(java.lang.String documentIdentifier)
Add a document description to the current job's queue.
|
void |
addDocumentReference(java.lang.String documentIdentifier,
java.lang.String parentIdentifier,
java.lang.String relationshipType)
Add a document description to the current job's queue.
|
void |
addDocumentReference(java.lang.String documentIdentifier,
java.lang.String parentIdentifier,
java.lang.String relationshipType,
java.lang.String[] dataNames,
java.lang.Object[][] dataValues)
Add a document description to the current job's queue.
|
void |
addDocumentReference(java.lang.String documentIdentifier,
java.lang.String parentIdentifier,
java.lang.String relationshipType,
java.lang.String[] dataNames,
java.lang.Object[][] dataValues,
java.lang.Long originationTime)
Add a document description to the current job's queue.
|
void |
addDocumentReference(java.lang.String documentIdentifier,
java.lang.String parentIdentifier,
java.lang.String relationshipType,
java.lang.String[] dataNames,
java.lang.Object[][] dataValues,
java.lang.Long originationTime,
java.lang.String[] prereqEventNames)
Add a document description to the current job's queue.
|
boolean |
checkDocumentNeedsReindexing(java.lang.String documentIdentifier,
java.lang.String newVersionString)
Check if a document needs to be reindexed, based on a computed version string.
|
boolean |
checkDocumentNeedsReindexing(java.lang.String documentIdentifier,
java.lang.String componentIdentifier,
java.lang.String newVersionString)
Check if a document needs to be reindexed, based on a computed version string.
|
void |
deleteDocument(java.lang.String documentIdentifier)
Delete the specified document permanently from the search engine index, and from the status table,
along with all its components.
|
void |
ingestDocumentWithException(java.lang.String documentIdentifier,
java.lang.String version,
java.lang.String documentURI,
RepositoryDocument data)
Ingest the current document.
|
void |
ingestDocumentWithException(java.lang.String documentIdentifier,
java.lang.String componentIdentifier,
java.lang.String version,
java.lang.String documentURI,
RepositoryDocument data)
Ingest the current document.
|
void |
noDocument(java.lang.String documentIdentifier,
java.lang.String version)
Remove the specified document from the search engine index, and update the
recorded version information for the document.
|
void |
noDocument(java.lang.String documentIdentifier,
java.lang.String componentIdentifier,
java.lang.String version)
Remove the specified document from the search engine index, and update the
recorded version information for the document.
|
void |
recordDocument(java.lang.String documentIdentifier,
java.lang.String version)
Record a document version, WITHOUT reindexing it, or removing it.
|
void |
recordDocument(java.lang.String documentIdentifier,
java.lang.String componentIdentifier,
java.lang.String version)
Record a document version, WITHOUT reindexing it, or removing it.
|
void |
removeDocument(java.lang.String documentIdentifier)
Remove the specified document primary component permanently from the search engine index,
and from the status table.
|
void |
retainAllComponentDocument(java.lang.String documentIdentifier)
Retain all existing document components of a primary document.
|
void |
retainDocument(java.lang.String documentIdentifier,
java.lang.String componentIdentifier)
Retain existing document component.
|
void |
setDocumentOriginationTime(java.lang.String documentIdentifier,
java.lang.Long originationTime)
Override a document's origination time.
|
void |
setDocumentScheduleBounds(java.lang.String documentIdentifier,
java.lang.Long lowerRecrawlBoundTime,
java.lang.Long upperRecrawlBoundTime,
java.lang.Long lowerExpireBoundTime,
java.lang.Long upperExpireBoundTime)
Override the schedule for the next time a document is crawled.
|
recordActivity
beginEventSequence, completeEventSequence, retryDocumentProcessing
createConnectionSpecificString, createGlobalString, createJobSpecificString
checkJobStillActive
checkDateIndexable, checkDocumentIndexable, checkLengthIndexable, checkMimeTypeIndexable, checkURLIndexable
retrieveParentData, retrieveParentDataAsFiles
static final java.lang.String _rcsid
boolean checkDocumentNeedsReindexing(java.lang.String documentIdentifier, java.lang.String newVersionString) throws ManifoldCFException
documentIdentifier
- is the document identifier.newVersionString
- is the newly-computed version string.ManifoldCFException
boolean checkDocumentNeedsReindexing(java.lang.String documentIdentifier, java.lang.String componentIdentifier, java.lang.String newVersionString) throws ManifoldCFException
documentIdentifier
- is the document identifier.componentIdentifier
- is the component document identifier, if any.newVersionString
- is the newly-computed version string.ManifoldCFException
void addDocumentReference(java.lang.String documentIdentifier, java.lang.String parentIdentifier, java.lang.String relationshipType, java.lang.String[] dataNames, java.lang.Object[][] dataValues, java.lang.Long originationTime, java.lang.String[] prereqEventNames) throws ManifoldCFException
documentIdentifier
- is the local document identifier to add (for the connector that
fetched the document).parentIdentifier
- is the document identifier that is considered to be the "parent"
of this identifier. May be null, if no hopcount filtering desired for this kind of relationship.
MUST be present in the case of carrydown information.relationshipType
- is the string describing the kind of relationship described by this
reference. This must be one of the strings returned by the IRepositoryConnector method
"getRelationshipTypes()". May be null.dataNames
- is the list of carry-down data from the parent to the child. May be null. Each name is limited to 255 characters!dataValues
- are the values that correspond to the data names in the dataNames parameter. May be null only if dataNames is null.
The type of each object must either be a String, or a CharacterInput.originationTime
- is the time, in ms since epoch, that the document originated. Pass null if none or unknown.prereqEventNames
- are the names of the prerequisite events which this document requires prior to processing. Pass null if none.ManifoldCFException
void addDocumentReference(java.lang.String documentIdentifier, java.lang.String parentIdentifier, java.lang.String relationshipType, java.lang.String[] dataNames, java.lang.Object[][] dataValues, java.lang.Long originationTime) throws ManifoldCFException
documentIdentifier
- is the document identifier to add (for the connector that
fetched the document).parentIdentifier
- is the document identifier that is considered to be the "parent"
of this identifier. May be null, if no hopcount filtering desired for this kind of relationship.
MUST be present in the case of carrydown information.relationshipType
- is the string describing the kind of relationship described by this
reference. This must be one of the strings returned by the IRepositoryConnector method
"getRelationshipTypes()". May be null.dataNames
- is the list of carry-down data from the parent to the child. May be null. Each name is limited to 255 characters!dataValues
- are the values that correspond to the data names in the dataNames parameter. May be null only if dataNames is null.
The type of each object must either be a String, or a CharacterInput.originationTime
- is the time, in ms since epoch, that the document originated. Pass null if none or unknown.ManifoldCFException
void addDocumentReference(java.lang.String documentIdentifier, java.lang.String parentIdentifier, java.lang.String relationshipType, java.lang.String[] dataNames, java.lang.Object[][] dataValues) throws ManifoldCFException
documentIdentifier
- is the document identifier to add (for the connector that
fetched the document).parentIdentifier
- is the document identifier that is considered to be the "parent"
of this identifier. May be null, if no hopcount filtering desired for this kind of relationship.
MUST be present in the case of carrydown information.relationshipType
- is the string describing the kind of relationship described by this
reference. This must be one of the strings returned by the IRepositoryConnector method
"getRelationshipTypes()". May be null.dataNames
- is the list of carry-down data from the parent to the child. May be null. Each name is limited to 255 characters!dataValues
- are the values that correspond to the data names in the dataNames parameter. May be null only if dataNames is null.
The type of each object must either be a String, or a CharacterInput.ManifoldCFException
void addDocumentReference(java.lang.String documentIdentifier, java.lang.String parentIdentifier, java.lang.String relationshipType) throws ManifoldCFException
documentIdentifier
- is the document identifier to add (for the connector that
fetched the document).parentIdentifier
- is the document identifier that is considered to be the "parent"
of this identifier. May be null, if no hopcount filtering desired for this kind of relationship.relationshipType
- is the string describing the kind of relationship described by this
reference. This must be one of the strings returned by the IRepositoryConnector method
"getRelationshipTypes()". May be null.ManifoldCFException
void addDocumentReference(java.lang.String documentIdentifier) throws ManifoldCFException
documentIdentifier
- is the document identifier to add (for the connector that
fetched the document).ManifoldCFException
void ingestDocumentWithException(java.lang.String documentIdentifier, java.lang.String version, java.lang.String documentURI, RepositoryDocument data) throws ManifoldCFException, ServiceInterruption, java.io.IOException
documentIdentifier
- is the document's identifier.version
- is the version of the document, as reported by the getDocumentVersions() method of the
corresponding repository connector. An empty version string signals that there is no calculable
document version string, and that the document should always be indexed.documentURI
- is the URI to use to retrieve this document from the search interface (and is
also the unique key in the index).data
- is the document data. The data is closed after ingestion is complete.java.io.IOException
- only when data stream reading fails.ManifoldCFException
ServiceInterruption
void ingestDocumentWithException(java.lang.String documentIdentifier, java.lang.String componentIdentifier, java.lang.String version, java.lang.String documentURI, RepositoryDocument data) throws ManifoldCFException, ServiceInterruption, java.io.IOException
documentIdentifier
- is the document's identifier.componentIdentifier
- is the component document identifier, if any.version
- is the version of the document, as reported by the getDocumentVersions() method of the
corresponding repository connector.documentURI
- is the URI to use to retrieve this document from the search interface (and is
also the unique key in the index).data
- is the document data. The data is closed after ingestion is complete.java.io.IOException
- only when data stream reading fails.ManifoldCFException
ServiceInterruption
void noDocument(java.lang.String documentIdentifier, java.lang.String version) throws ManifoldCFException, ServiceInterruption
documentIdentifier
- is the document's local identifier.version
- is the version string to be recorded for the document.ManifoldCFException
ServiceInterruption
void noDocument(java.lang.String documentIdentifier, java.lang.String componentIdentifier, java.lang.String version) throws ManifoldCFException, ServiceInterruption
documentIdentifier
- is the document's local identifier.componentIdentifier
- is the component document identifier, if any.version
- is the version string to be recorded for the document.ManifoldCFException
ServiceInterruption
void removeDocument(java.lang.String documentIdentifier) throws ManifoldCFException, ServiceInterruption
documentIdentifier
- is the document's identifier.ManifoldCFException
ServiceInterruption
void retainDocument(java.lang.String documentIdentifier, java.lang.String componentIdentifier) throws ManifoldCFException
documentIdentifier
- is the document's identifier.componentIdentifier
- is the component document identifier, which cannot be null.ManifoldCFException
void retainAllComponentDocument(java.lang.String documentIdentifier) throws ManifoldCFException
documentIdentifier
- is the document's identifier.ManifoldCFException
void recordDocument(java.lang.String documentIdentifier, java.lang.String version) throws ManifoldCFException
documentIdentifier
- is the document identifier.version
- is the document version.ManifoldCFException
void recordDocument(java.lang.String documentIdentifier, java.lang.String componentIdentifier, java.lang.String version) throws ManifoldCFException
documentIdentifier
- is the document identifier.componentIdentifier
- is the component document identifier, if any.version
- is the document version.ManifoldCFException
void deleteDocument(java.lang.String documentIdentifier) throws ManifoldCFException
documentIdentifier
- is the document's identifier.ManifoldCFException
void setDocumentScheduleBounds(java.lang.String documentIdentifier, java.lang.Long lowerRecrawlBoundTime, java.lang.Long upperRecrawlBoundTime, java.lang.Long lowerExpireBoundTime, java.lang.Long upperExpireBoundTime) throws ManifoldCFException
documentIdentifier
- is the document's identifier.lowerRecrawlBoundTime
- is the time in ms since epoch that the reschedule time should not fall BELOW, or null if none.upperRecrawlBoundTime
- is the time in ms since epoch that the reschedule time should not rise ABOVE, or null if none.lowerExpireBoundTime
- is the time in ms since epoch that the expire time should not fall BELOW, or null if none.upperExpireBoundTime
- is the time in ms since epoch that the expire time should not rise ABOVE, or null if none.ManifoldCFException
void setDocumentOriginationTime(java.lang.String documentIdentifier, java.lang.Long originationTime) throws ManifoldCFException
documentIdentifier
- is the document's identifier.originationTime
- is the document's origination time, or null if unknown.ManifoldCFException