protected static class WorkerThread.ProcessActivity extends java.lang.Object implements IProcessActivity
Modifier and Type | Field and Description |
---|---|
protected java.util.Set<java.lang.String> |
abortSet |
protected java.util.Set<java.lang.String> |
allComponentsSet |
protected IRepositoryConnection |
connection |
protected java.lang.String |
connectionName |
protected IRepositoryConnector |
connector |
protected IRepositoryConnectionManager |
connMgr |
protected long |
currentTime |
protected java.util.Set<java.lang.String> |
documentDeletedSet |
protected java.lang.Long |
expireInterval |
protected int |
hopcountMode |
protected IIncrementalIngester |
ingester |
protected WorkerThread.OutputActivity |
ingestLogger |
protected java.lang.Long |
jobID |
protected IJobManager |
jobManager |
protected java.lang.String[] |
legalLinkTypes |
protected java.util.Map<java.lang.String,java.lang.Long> |
lowerExpireBounds |
protected java.util.Map<java.lang.String,java.lang.Long> |
lowerRescheduleBounds |
protected java.lang.Long |
maxInterval |
protected java.util.Map<java.lang.String,java.lang.Long> |
originationTimes |
protected IPipelineSpecification |
pipelineSpecification |
protected java.util.Map<java.lang.String,QueuedDocument> |
previousDocuments |
protected java.lang.String |
processID |
protected java.lang.Long |
recrawlInterval |
protected java.util.Map<WorkerThread.DocumentReference,WorkerThread.DocumentReference> |
referenceList |
protected IReprioritizationTracker |
rt |
protected java.util.Map<java.lang.String,java.util.Set<java.lang.String>> |
touchedComponentSet |
protected java.util.Set<java.lang.String> |
touchedPrimarySet |
protected java.util.Set<java.lang.String> |
touchedSet |
protected java.util.Map<java.lang.String,java.lang.Long> |
upperExpireBounds |
protected java.util.Map<java.lang.String,java.lang.Long> |
upperRescheduleBounds |
_rcsid
BAD_URL, EXCLUDED_CONTENT, EXCLUDED_DATE, EXCLUDED_LENGTH, EXCLUDED_MIMETYPE, EXCLUDED_URL, NULL_URL
Constructor and Description |
---|
ProcessActivity(java.lang.Long jobID,
java.lang.String processID,
IReprioritizationTracker rt,
IJobManager jobManager,
IIncrementalIngester ingester,
java.lang.String connectionName,
IPipelineSpecification pipelineSpecification,
java.util.Map<java.lang.String,QueuedDocument> previousDocuments,
long currentTime,
java.lang.Long expireInterval,
java.lang.Long recrawlInterval,
java.lang.Long maxInterval,
int hopcountMode,
IRepositoryConnection connection,
IRepositoryConnector connector,
IRepositoryConnectionManager connMgr,
java.lang.String[] legalLinkTypes,
WorkerThread.OutputActivity ingestLogger)
Constructor.
|
Modifier and Type | Method and Description |
---|---|
void |
addDocumentReference(java.lang.String localIdentifier)
Add a document description to the current job's queue.
|
void |
addDocumentReference(java.lang.String localIdentifier,
java.lang.String parentIdentifier,
java.lang.String relationshipType)
Add a document description to the current job's queue.
|
void |
addDocumentReference(java.lang.String localIdentifier,
java.lang.String parentIdentifier,
java.lang.String relationshipType,
java.lang.String[] dataNames,
java.lang.Object[][] dataValues)
Add a document description to the current job's queue.
|
void |
addDocumentReference(java.lang.String localIdentifier,
java.lang.String parentIdentifier,
java.lang.String relationshipType,
java.lang.String[] dataNames,
java.lang.Object[][] dataValues,
java.lang.Long originationTime)
Add a document description to the current job's queue.
|
void |
addDocumentReference(java.lang.String localIdentifier,
java.lang.String parentIdentifier,
java.lang.String relationshipType,
java.lang.String[] dataNames,
java.lang.Object[][] dataValues,
java.lang.Long originationTime,
java.lang.String[] prereqEventNames)
Add a document description to the current job's queue.
|
boolean |
beginEventSequence(java.lang.String eventName)
Begin an event sequence.
|
java.lang.Long |
calculateDocumentExpireTime(long currentTime,
java.lang.String localIdentifier) |
java.lang.Long |
calculateDocumentRescheduleTime(long currentTime,
long timeAmt,
java.lang.String localIdentifier) |
protected void |
checkAllComponentsMultipleDispositions(java.lang.String documentIdentifier) |
boolean |
checkDateIndexable(java.util.Date date)
Detect if a date is indexable or not.
|
boolean |
checkDocumentIndexable(java.io.File localFile)
Check whether a document is indexable by the currently specified output connector.
|
boolean |
checkDocumentNeedsReindexing(java.lang.String documentIdentifier,
java.lang.String newVersionString)
Check if a document needs to be reindexed, based on a computed version string.
|
boolean |
checkDocumentNeedsReindexing(java.lang.String documentIdentifier,
java.lang.String componentIdentifier,
java.lang.String newVersionString)
Check if a document needs to be reindexed, based on a computed version string.
|
void |
checkJobStillActive()
Check whether current job is still active.
|
boolean |
checkLengthIndexable(long length)
Check whether a document of a specified length is indexable by the currently specified output connector.
|
boolean |
checkMimeTypeIndexable(java.lang.String mimeType)
Check whether a mime type is indexable by the currently specified output connector.
|
protected void |
checkMultipleDispositions(java.lang.String documentIdentifier,
java.lang.String componentIdentifier,
java.lang.String componentIdentifierHash) |
boolean |
checkURLIndexable(java.lang.String url)
Pre-determine whether a document's URL is indexable by this connector.
|
void |
completeEventSequence(java.lang.String eventName)
Complete an event sequence.
|
protected IPipelineSpecificationWithVersions |
computePipelineSpecificationWithVersions(java.lang.String documentIdentifierHash,
java.lang.String componentIdentifierHash,
java.lang.String documentIdentifier) |
java.lang.String |
createConnectionSpecificString(java.lang.String simpleString)
Create a connection-specific string from a simple string.
|
java.lang.String |
createGlobalString(java.lang.String simpleString)
Create a global string from a simple string.
|
java.lang.String |
createJobSpecificString(java.lang.String simpleString)
Create a job-based string from a simple string.
|
void |
deleteDocument(java.lang.String documentIdentifier)
Delete the specified document from the search engine index, and from the status table.
|
void |
discard()
Clean up any dangling information, before abandoning this process activity object
|
void |
flush()
Flush the outstanding references into the database.
|
java.lang.Long |
getDocumentExpirationLowerBoundTime(java.lang.String localIdentifier)
Find a document's lower expiration time bound, if any
|
java.lang.Long |
getDocumentExpirationUpperBoundTime(java.lang.String localIdentifier)
Find a document's upper expiration time bound, if any
|
java.lang.Long |
getDocumentOriginationTime(java.lang.String localIdentifier)
Get a document's origination time
|
java.lang.Long |
getDocumentRescheduleLowerBoundTime(java.lang.String localIdentifier)
Find a document's lower rescheduling time bound, if any
|
java.lang.Long |
getDocumentRescheduleUpperBoundTime(java.lang.String localIdentifier)
Find a document's upper rescheduling time bound, if any
|
void |
ingestDocumentWithException(java.lang.String documentIdentifier,
java.lang.String version,
java.lang.String documentURI,
RepositoryDocument data)
Ingest the current document.
|
void |
ingestDocumentWithException(java.lang.String documentIdentifier,
java.lang.String componentIdentifier,
java.lang.String version,
java.lang.String documentURI,
RepositoryDocument data)
Ingest the current document.
|
void |
noDocument(java.lang.String documentIdentifier,
java.lang.String version)
Remove the specified document from the search engine index, while keeping track of the version information
for it (to reduce churn).
|
void |
noDocument(java.lang.String documentIdentifier,
java.lang.String componentIdentifier,
java.lang.String version)
Remove the specified document from the search engine index, and update the
recorded version information for the document.
|
protected void |
processDocumentReferences()
Process outstanding document references, in batch.
|
void |
recordActivity(java.lang.Long startTime,
java.lang.String activityType,
java.lang.Long dataSize,
java.lang.String entityIdentifier,
java.lang.String resultCode,
java.lang.String resultDescription,
java.lang.String[] childIdentifiers)
Record time-stamped information about the activity of the connector.
|
void |
recordDocument(java.lang.String documentIdentifier,
java.lang.String version)
Record a document version, but don't ingest it.
|
void |
recordDocument(java.lang.String documentIdentifier,
java.lang.String componentIdentifier,
java.lang.String version)
Record a document version, WITHOUT reindexing it, or removing it.
|
void |
removeDocument(java.lang.String documentIdentifier)
Remove the specified document primary component permanently from the search engine index,
and from the status table.
|
void |
resetTimes()
Reset the recorded times
|
void |
retainAllComponentDocument(java.lang.String documentIdentifier)
Retain all existing document components of a primary document.
|
void |
retainDocument(java.lang.String documentIdentifier,
java.lang.String componentIdentifier)
Retain existing document component.
|
java.lang.String[] |
retrieveParentData(java.lang.String localIdentifier,
java.lang.String dataName)
Retrieve data passed from parents to a specified child document.
|
CharacterInput[] |
retrieveParentDataAsFiles(java.lang.String localIdentifier,
java.lang.String dataName)
Retrieve data passed from parents to a specified child document.
|
void |
retryDocumentProcessing(java.lang.String localIdentifier)
Abort processing a document (for sequencing reasons).
|
void |
setDocumentOriginationTime(java.lang.String localIdentifier,
java.lang.Long originationTime)
Override a document's origination time.
|
void |
setDocumentScheduleBounds(java.lang.String localIdentifier,
java.lang.Long lowerRecrawlBoundTime,
java.lang.Long upperRecrawlBoundTime,
java.lang.Long lowerExpireBoundTime,
java.lang.Long upperExpireBoundTime)
Override the schedule for the next time a document is crawled.
|
protected void |
touchAllComponentsSet(java.lang.String documentIdentifier) |
protected void |
touchComponentSet(java.lang.String documentIdentifier,
java.lang.String componentIdentifierHash) |
boolean |
wasDocumentAborted(java.lang.String documentIdentifier)
Check whether a document was aborted or not.
|
boolean |
wasDocumentComponentTouched(java.lang.String documentIdentifier,
java.lang.String componentIdentifierHash)
Check whether a document component was touched or not.
|
boolean |
wasDocumentDeleted(java.lang.String documentIdentifier)
Check whether document was deleted or not.
|
boolean |
wasDocumentTouched(java.lang.String documentIdentifier)
Check whether a document (and its version string) was touched or not.
|
protected final java.lang.Long jobID
protected final java.lang.String processID
protected final IJobManager jobManager
protected final IIncrementalIngester ingester
protected final java.lang.String connectionName
protected final IPipelineSpecification pipelineSpecification
protected final java.util.Map<java.lang.String,QueuedDocument> previousDocuments
protected final long currentTime
protected final java.lang.Long expireInterval
protected final java.lang.Long recrawlInterval
protected final java.lang.Long maxInterval
protected final int hopcountMode
protected final IRepositoryConnection connection
protected final IRepositoryConnector connector
protected final IRepositoryConnectionManager connMgr
protected final java.lang.String[] legalLinkTypes
protected final WorkerThread.OutputActivity ingestLogger
protected final IReprioritizationTracker rt
protected final java.util.Map<WorkerThread.DocumentReference,WorkerThread.DocumentReference> referenceList
protected final java.util.Map<java.lang.String,java.lang.Long> lowerRescheduleBounds
protected final java.util.Map<java.lang.String,java.lang.Long> upperRescheduleBounds
protected final java.util.Map<java.lang.String,java.lang.Long> lowerExpireBounds
protected final java.util.Map<java.lang.String,java.lang.Long> upperExpireBounds
protected final java.util.Map<java.lang.String,java.lang.Long> originationTimes
protected final java.util.Set<java.lang.String> abortSet
protected final java.util.Set<java.lang.String> touchedSet
protected final java.util.Set<java.lang.String> documentDeletedSet
protected final java.util.Set<java.lang.String> allComponentsSet
protected final java.util.Map<java.lang.String,java.util.Set<java.lang.String>> touchedComponentSet
protected final java.util.Set<java.lang.String> touchedPrimarySet
public ProcessActivity(java.lang.Long jobID, java.lang.String processID, IReprioritizationTracker rt, IJobManager jobManager, IIncrementalIngester ingester, java.lang.String connectionName, IPipelineSpecification pipelineSpecification, java.util.Map<java.lang.String,QueuedDocument> previousDocuments, long currentTime, java.lang.Long expireInterval, java.lang.Long recrawlInterval, java.lang.Long maxInterval, int hopcountMode, IRepositoryConnection connection, IRepositoryConnector connector, IRepositoryConnectionManager connMgr, java.lang.String[] legalLinkTypes, WorkerThread.OutputActivity ingestLogger)
jobManager
- is the job manageringester
- is the ingesterpublic void discard() throws ManifoldCFException
ManifoldCFException
public boolean wasDocumentTouched(java.lang.String documentIdentifier)
public boolean wasDocumentComponentTouched(java.lang.String documentIdentifier, java.lang.String componentIdentifierHash)
public boolean wasDocumentDeleted(java.lang.String documentIdentifier)
public boolean wasDocumentAborted(java.lang.String documentIdentifier)
public boolean checkDocumentNeedsReindexing(java.lang.String documentIdentifier, java.lang.String newVersionString) throws ManifoldCFException
checkDocumentNeedsReindexing
in interface IProcessActivity
documentIdentifier
- is the document identifier.newVersionString
- is the newly-computed version string.ManifoldCFException
public boolean checkDocumentNeedsReindexing(java.lang.String documentIdentifier, java.lang.String componentIdentifier, java.lang.String newVersionString) throws ManifoldCFException
checkDocumentNeedsReindexing
in interface IProcessActivity
documentIdentifier
- is the document identifier.componentIdentifier
- is the component document identifier, if any.newVersionString
- is the newly-computed version string.ManifoldCFException
public void addDocumentReference(java.lang.String localIdentifier, java.lang.String parentIdentifier, java.lang.String relationshipType, java.lang.String[] dataNames, java.lang.Object[][] dataValues, java.lang.Long originationTime, java.lang.String[] prereqEventNames) throws ManifoldCFException
addDocumentReference
in interface IProcessActivity
localIdentifier
- is the local document identifier to add (for the connector that
fetched the document).parentIdentifier
- is the document identifier that is considered to be the "parent"
of this identifier. May be null, if no hopcount filtering desired for this kind of relationship.relationshipType
- is the string describing the kind of relationship described by this
reference. This must be one of the strings returned by the IRepositoryConnector method
"getRelationshipTypes()". May be null.dataNames
- is the list of carry-down data from the parent to the child. May be null. Each name is limited to 255 characters!dataValues
- are the values that correspond to the data names in the dataNames parameter. May be null only if dataNames is null.
The type of each object must either be a String, or a CharacterInput.originationTime
- is the time, in ms since epoch, that the document originated. Pass null if none or unknown.prereqEventNames
- are the names of the prerequisite events which this document requires prior to processing. Pass null if none.ManifoldCFException
public void addDocumentReference(java.lang.String localIdentifier, java.lang.String parentIdentifier, java.lang.String relationshipType, java.lang.String[] dataNames, java.lang.Object[][] dataValues, java.lang.Long originationTime) throws ManifoldCFException
addDocumentReference
in interface IProcessActivity
localIdentifier
- is the local document identifier to add (for the connector that
fetched the document).parentIdentifier
- is the document identifier that is considered to be the "parent"
of this identifier. May be null, if no hopcount filtering desired for this kind of relationship.relationshipType
- is the string describing the kind of relationship described by this
reference. This must be one of the strings returned by the IRepositoryConnector method
"getRelationshipTypes()". May be null.dataNames
- is the list of carry-down data from the parent to the child. May be null. Each name is limited to 255 characters!dataValues
- are the values that correspond to the data names in the dataNames parameter. May be null only if dataNames is null.originationTime
- is the time, in ms since epoch, that the document originated. Pass null if none or unknown.ManifoldCFException
public void addDocumentReference(java.lang.String localIdentifier, java.lang.String parentIdentifier, java.lang.String relationshipType, java.lang.String[] dataNames, java.lang.Object[][] dataValues) throws ManifoldCFException
addDocumentReference
in interface IProcessActivity
localIdentifier
- is the local document identifier to add (for the connector that
fetched the document).parentIdentifier
- is the document identifier that is considered to be the "parent"
of this identifier. May be null, if no hopcount filtering desired for this kind of relationship.relationshipType
- is the string describing the kind of relationship described by this
reference. This must be one of the strings returned by the IRepositoryConnector method
"getRelationshipTypes()". May be null.dataNames
- is the list of carry-down data from the parent to the child. May be null. Each name is limited to 255 characters!dataValues
- are the values that correspond to the data names in the dataNames parameter. May be null only if dataNames is null.ManifoldCFException
public void addDocumentReference(java.lang.String localIdentifier, java.lang.String parentIdentifier, java.lang.String relationshipType) throws ManifoldCFException
addDocumentReference
in interface IProcessActivity
localIdentifier
- is the local document identifier to add (for the connector that
fetched the document).parentIdentifier
- is the document identifier that is considered to be the "parent"
of this identifier. May be null, if no hopcount filtering desired for this kind of relationship.relationshipType
- is the string describing the kind of relationship described by this
reference. This must be one of the strings returned by the IRepositoryConnector method
"getRelationshipTypes()". May be null.ManifoldCFException
public void addDocumentReference(java.lang.String localIdentifier) throws ManifoldCFException
addDocumentReference
in interface IProcessActivity
localIdentifier
- is the local document identifier to add (for the connector that
fetched the document).ManifoldCFException
public java.lang.String[] retrieveParentData(java.lang.String localIdentifier, java.lang.String dataName) throws ManifoldCFException
retrieveParentData
in interface ICarrydownActivity
localIdentifier
- is the document identifier of the document we want the recorded data for.dataName
- is the name of the data items to retrieve.ManifoldCFException
public CharacterInput[] retrieveParentDataAsFiles(java.lang.String localIdentifier, java.lang.String dataName) throws ManifoldCFException
retrieveParentDataAsFiles
in interface ICarrydownActivity
localIdentifier
- is the document identifier of the document we want the recorded data for.dataName
- is the name of the data items to retrieve.ManifoldCFException
public void recordDocument(java.lang.String documentIdentifier, java.lang.String version) throws ManifoldCFException
recordDocument
in interface IProcessActivity
documentIdentifier
- is the document identifier.version
- is the document version.ManifoldCFException
public void recordDocument(java.lang.String documentIdentifier, java.lang.String componentIdentifier, java.lang.String version) throws ManifoldCFException
recordDocument
in interface IProcessActivity
documentIdentifier
- is the document identifier.componentIdentifier
- is the component document identifier, if any.version
- is the document version.ManifoldCFException
public void ingestDocumentWithException(java.lang.String documentIdentifier, java.lang.String version, java.lang.String documentURI, RepositoryDocument data) throws ManifoldCFException, ServiceInterruption, java.io.IOException
ingestDocumentWithException
in interface IProcessActivity
documentIdentifier
- is the document's local identifier.version
- is the version of the document, as reported by the getDocumentVersions() method of the
corresponding repository connector.documentURI
- is the URI to use to retrieve this document from the search interface (and is
also the unique key in the index).data
- is the document data. The data is closed after ingestion is complete.java.io.IOException
- only when data stream reading fails.ManifoldCFException
ServiceInterruption
public void ingestDocumentWithException(java.lang.String documentIdentifier, java.lang.String componentIdentifier, java.lang.String version, java.lang.String documentURI, RepositoryDocument data) throws ManifoldCFException, ServiceInterruption, java.io.IOException
ingestDocumentWithException
in interface IProcessActivity
documentIdentifier
- is the document's identifier.componentIdentifier
- is the component document identifier, if any.version
- is the version of the document, as reported by the getDocumentVersions() method of the
corresponding repository connector.documentURI
- is the URI to use to retrieve this document from the search interface (and is
also the unique key in the index).data
- is the document data. The data is closed after ingestion is complete.java.io.IOException
- only when data stream reading fails.ManifoldCFException
ServiceInterruption
public void noDocument(java.lang.String documentIdentifier, java.lang.String version) throws ManifoldCFException, ServiceInterruption
noDocument
in interface IProcessActivity
documentIdentifier
- is the document's local identifier.version
- is the version string to be recorded for the document.ManifoldCFException
ServiceInterruption
public void noDocument(java.lang.String documentIdentifier, java.lang.String componentIdentifier, java.lang.String version) throws ManifoldCFException, ServiceInterruption
noDocument
in interface IProcessActivity
documentIdentifier
- is the document's local identifier.componentIdentifier
- is the component document identifier, if any.version
- is the version string to be recorded for the document.ManifoldCFException
ServiceInterruption
public void removeDocument(java.lang.String documentIdentifier) throws ManifoldCFException, ServiceInterruption
removeDocument
in interface IProcessActivity
documentIdentifier
- is the document's identifier.ManifoldCFException
ServiceInterruption
public void retainDocument(java.lang.String documentIdentifier, java.lang.String componentIdentifier) throws ManifoldCFException
retainDocument
in interface IProcessActivity
documentIdentifier
- is the document's identifier.componentIdentifier
- is the component document identifier, which cannot be null.ManifoldCFException
public void retainAllComponentDocument(java.lang.String documentIdentifier) throws ManifoldCFException
retainAllComponentDocument
in interface IProcessActivity
documentIdentifier
- is the document's identifier.ManifoldCFException
public void deleteDocument(java.lang.String documentIdentifier) throws ManifoldCFException
deleteDocument
in interface IProcessActivity
documentIdentifier
- is the document's identifier.ManifoldCFException
public void setDocumentScheduleBounds(java.lang.String localIdentifier, java.lang.Long lowerRecrawlBoundTime, java.lang.Long upperRecrawlBoundTime, java.lang.Long lowerExpireBoundTime, java.lang.Long upperExpireBoundTime) throws ManifoldCFException
setDocumentScheduleBounds
in interface IProcessActivity
localIdentifier
- is the document's local identifier.lowerRecrawlBoundTime
- is the time in ms since epoch that the reschedule time should not fall BELOW, or null if none.upperRecrawlBoundTime
- is the time in ms since epoch that the reschedule time should not rise ABOVE, or null if none.lowerExpireBoundTime
- is the time in ms since epoch that the expire time should not fall BELOW, or null if none.upperExpireBoundTime
- is the time in ms since epoch that the expire time should not rise ABOVE, or null if none.ManifoldCFException
public void setDocumentOriginationTime(java.lang.String localIdentifier, java.lang.Long originationTime) throws ManifoldCFException
setDocumentOriginationTime
in interface IProcessActivity
localIdentifier
- is the document's local identifier.originationTime
- is the document's origination time, or null if unknown.ManifoldCFException
public java.lang.Long getDocumentRescheduleLowerBoundTime(java.lang.String localIdentifier)
public java.lang.Long getDocumentRescheduleUpperBoundTime(java.lang.String localIdentifier)
public java.lang.Long getDocumentExpirationLowerBoundTime(java.lang.String localIdentifier)
public java.lang.Long getDocumentExpirationUpperBoundTime(java.lang.String localIdentifier)
public java.lang.Long getDocumentOriginationTime(java.lang.String localIdentifier)
public java.lang.Long calculateDocumentRescheduleTime(long currentTime, long timeAmt, java.lang.String localIdentifier)
public java.lang.Long calculateDocumentExpireTime(long currentTime, java.lang.String localIdentifier)
public void resetTimes()
public void recordActivity(java.lang.Long startTime, java.lang.String activityType, java.lang.Long dataSize, java.lang.String entityIdentifier, java.lang.String resultCode, java.lang.String resultDescription, java.lang.String[] childIdentifiers) throws ManifoldCFException
recordActivity
in interface IHistoryActivity
startTime
- is either null or the time since the start of epoch in milliseconds (Jan 1, 1970). Every
activity has an associated time; the startTime field records when the activity began. A null value
indicates that the start time and the finishing time are the same.activityType
- is a string which is fully interpretable only in the context of the connector involved, which is
used to categorize what kind of activity is being recorded. For example, a web connector might record a
"fetch document" activity. Cannot be null.dataSize
- is the number of bytes of data involved in the activity, or null if not applicable.entityIdentifier
- is a (possibly long) string which identifies the object involved in the history record.
The interpretation of this field will differ from connector to connector. May be null.resultCode
- contains a terse description of the result of the activity. The description is limited in
size to 255 characters, and can be interpreted only in the context of the current connector. May be null.resultDescription
- is a (possibly long) human-readable string which adds detail, if required, to the result
described in the resultCode field. This field is not meant to be queried on. May be null.childIdentifiers
- is a set of child entity identifiers associated with this activity. May be null.ManifoldCFException
public void flush() throws ManifoldCFException
ManifoldCFException
protected void processDocumentReferences() throws ManifoldCFException
ManifoldCFException
public void checkJobStillActive() throws ManifoldCFException, ServiceInterruption
checkJobStillActive
in interface IAbortActivity
ManifoldCFException
ServiceInterruption
public boolean beginEventSequence(java.lang.String eventName) throws ManifoldCFException
beginEventSequence
in interface IEventActivity
eventName
- is the event name.ManifoldCFException
public void completeEventSequence(java.lang.String eventName) throws ManifoldCFException
completeEventSequence
in interface IEventActivity
eventName
- is the event name.ManifoldCFException
public void retryDocumentProcessing(java.lang.String localIdentifier) throws ManifoldCFException
retryDocumentProcessing
in interface IEventActivity
localIdentifier
- is the document identifier to requeueManifoldCFException
public boolean checkDateIndexable(java.util.Date date) throws ManifoldCFException, ServiceInterruption
checkDateIndexable
in interface IFingerprintActivity
date
- is the date of the document; may be nullManifoldCFException
ServiceInterruption
public boolean checkMimeTypeIndexable(java.lang.String mimeType) throws ManifoldCFException, ServiceInterruption
checkMimeTypeIndexable
in interface IFingerprintActivity
mimeType
- is the mime type to check, not including any character set specification.ManifoldCFException
ServiceInterruption
public boolean checkDocumentIndexable(java.io.File localFile) throws ManifoldCFException, ServiceInterruption
checkDocumentIndexable
in interface IFingerprintActivity
localFile
- is the local copy of the file to check.ManifoldCFException
ServiceInterruption
public boolean checkLengthIndexable(long length) throws ManifoldCFException, ServiceInterruption
checkLengthIndexable
in interface IFingerprintActivity
length
- is the length to check.ManifoldCFException
ServiceInterruption
public boolean checkURLIndexable(java.lang.String url) throws ManifoldCFException, ServiceInterruption
checkURLIndexable
in interface IFingerprintActivity
url
- is the URL of the document.ManifoldCFException
ServiceInterruption
public java.lang.String createGlobalString(java.lang.String simpleString)
createGlobalString
in interface INamingActivity
simpleString
- is the simple string.public java.lang.String createConnectionSpecificString(java.lang.String simpleString)
createConnectionSpecificString
in interface INamingActivity
simpleString
- is the simple string.public java.lang.String createJobSpecificString(java.lang.String simpleString)
createJobSpecificString
in interface INamingActivity
simpleString
- is the simple string.protected void checkAllComponentsMultipleDispositions(java.lang.String documentIdentifier)
protected void checkMultipleDispositions(java.lang.String documentIdentifier, java.lang.String componentIdentifier, java.lang.String componentIdentifierHash)
protected void touchAllComponentsSet(java.lang.String documentIdentifier)
protected void touchComponentSet(java.lang.String documentIdentifier, java.lang.String componentIdentifierHash)
protected IPipelineSpecificationWithVersions computePipelineSpecificationWithVersions(java.lang.String documentIdentifierHash, java.lang.String componentIdentifierHash, java.lang.String documentIdentifier)