public class FileConnector
extends org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector
| Modifier and Type | Field and Description |
|---|---|
static java.lang.String |
_rcsid |
protected static java.lang.String[] |
activitiesList |
protected static java.lang.String |
ACTIVITY_READ |
protected static java.lang.String |
RELATIONSHIP_CHILD |
currentContext, paramsGLOBAL_DENY_TOKEN, JOBMODE_CONTINUOUS, JOBMODE_ONCEONLY, MODEL_ADD, MODEL_ADD_CHANGE, MODEL_ADD_CHANGE_DELETE, MODEL_ALL, MODEL_CHAINED_ADD, MODEL_CHAINED_ADD_CHANGE, MODEL_CHAINED_ADD_CHANGE_DELETE, MODEL_PARTIAL| Constructor and Description |
|---|
FileConnector()
Constructor.
|
| Modifier and Type | Method and Description |
|---|---|
java.lang.String |
addSeedDocuments(org.apache.manifoldcf.crawler.interfaces.ISeedingActivity activities,
org.apache.manifoldcf.core.interfaces.Specification spec,
java.lang.String lastSeedVersion,
long seedTime,
int jobMode)
Queue "seed" documents.
|
protected static boolean |
checkInclude(java.io.File file,
java.lang.String fileName,
org.apache.manifoldcf.core.interfaces.Specification documentSpecification)
Check if a file or directory should be included, given a document specification.
|
protected static boolean |
checkIngest(java.io.File file,
org.apache.manifoldcf.core.interfaces.Specification documentSpecification)
Check if a file should be ingested, given a document specification.
|
protected static boolean |
checkMatch(java.lang.String sourceMatch,
int sourceIndex,
java.lang.String match)
Check a match between two strings with wildcards.
|
protected static java.lang.String |
convertToURI(java.lang.String documentIdentifier)
Convert a document identifier to a URI.
|
protected static java.lang.String |
convertToWGETURI(java.lang.String path)
Convert a document identifier to a URI.
|
protected static java.lang.String |
findConvertPath(org.apache.manifoldcf.core.interfaces.Specification spec,
java.io.File theFile)
This method finds the part of the path that should be converted to a URI.
|
java.lang.String[] |
getActivitiesList()
List the activities we might report on.
|
java.lang.String[] |
getBinNames(java.lang.String documentIdentifier)
For any given document, list the bins that it is a member of.
|
int |
getConnectorModel()
Tell the world what model this connector uses for getDocumentIdentifiers().
|
java.lang.String[] |
getRelationshipTypes()
Return the list of relationship types that this connector recognizes.
|
protected static java.lang.String |
mapExtensionToMimeType(java.lang.String fileName)
Map an extension to a mime type
|
protected static int |
matchSubPath(java.lang.String subPath,
java.lang.String fullPath)
Match a sub-path.
|
void |
outputSpecificationBody(org.apache.manifoldcf.core.interfaces.IHTTPOutput out,
java.util.Locale locale,
org.apache.manifoldcf.core.interfaces.Specification ds,
int connectionSequenceNumber,
int actualSequenceNumber,
java.lang.String tabName)
Output the specification body section.
|
void |
outputSpecificationHeader(org.apache.manifoldcf.core.interfaces.IHTTPOutput out,
java.util.Locale locale,
org.apache.manifoldcf.core.interfaces.Specification ds,
int connectionSequenceNumber,
java.util.List<java.lang.String> tabsArray)
Output the specification header section.
|
protected static boolean |
processCheck(boolean caseSensitive,
java.lang.String sourceMatch,
int sourceIndex,
java.lang.String match,
int matchIndex)
Recursive worker method for checkMatch.
|
void |
processDocuments(java.lang.String[] documentIdentifiers,
org.apache.manifoldcf.crawler.interfaces.IExistingVersions statuses,
org.apache.manifoldcf.core.interfaces.Specification spec,
org.apache.manifoldcf.crawler.interfaces.IProcessActivity activities,
int jobMode,
boolean usesDefaultAuthority)
Process a set of documents.
|
java.lang.String |
processSpecificationPost(org.apache.manifoldcf.core.interfaces.IPostParameters variableContext,
java.util.Locale locale,
org.apache.manifoldcf.core.interfaces.Specification ds,
int connectionSequenceNumber)
Process a specification post.
|
void |
viewSpecification(org.apache.manifoldcf.core.interfaces.IHTTPOutput out,
java.util.Locale locale,
org.apache.manifoldcf.core.interfaces.Specification ds,
int connectionSequenceNumber)
View specification.
|
getFormCheckJavascriptMethodName, getFormPresaveCheckJavascriptMethodName, getMaxDocumentRequest, requestInfocheck, clearThreadContext, connect, deinstall, disconnect, getConfiguration, install, isConnected, outputConfigurationBody, outputConfigurationBody, outputConfigurationHeader, outputConfigurationHeader, outputConfigurationHeader, pack, packFixedList, packList, packList, poll, processConfigurationPost, processConfigurationPost, setThreadContext, unpack, unpackFixedList, unpackList, viewConfiguration, viewConfigurationclone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, waitcheck, clearThreadContext, connect, deinstall, disconnect, getConfiguration, install, isConnected, outputConfigurationBody, outputConfigurationHeader, poll, processConfigurationPost, setThreadContext, viewConfigurationpublic static final java.lang.String _rcsid
protected static final java.lang.String ACTIVITY_READ
protected static final java.lang.String RELATIONSHIP_CHILD
protected static final java.lang.String[] activitiesList
public int getConnectorModel()
getConnectorModel in interface org.apache.manifoldcf.crawler.interfaces.IRepositoryConnectorgetConnectorModel in class org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnectorpublic java.lang.String[] getRelationshipTypes()
getRelationshipTypes in interface org.apache.manifoldcf.crawler.interfaces.IRepositoryConnectorgetRelationshipTypes in class org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnectorpublic java.lang.String[] getActivitiesList()
getActivitiesList in interface org.apache.manifoldcf.crawler.interfaces.IRepositoryConnectorgetActivitiesList in class org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnectorpublic java.lang.String[] getBinNames(java.lang.String documentIdentifier)
getBinNames in interface org.apache.manifoldcf.crawler.interfaces.IRepositoryConnectorgetBinNames in class org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnectorprotected static java.lang.String convertToWGETURI(java.lang.String path)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
path - is the document filePath.org.apache.manifoldcf.core.interfaces.ManifoldCFExceptionprotected static java.lang.String convertToURI(java.lang.String documentIdentifier)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
documentIdentifier - is the document identifier.org.apache.manifoldcf.core.interfaces.ManifoldCFExceptionpublic java.lang.String addSeedDocuments(org.apache.manifoldcf.crawler.interfaces.ISeedingActivity activities,
org.apache.manifoldcf.core.interfaces.Specification spec,
java.lang.String lastSeedVersion,
long seedTime,
int jobMode)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException,
org.apache.manifoldcf.agents.interfaces.ServiceInterruption
addSeedDocuments in interface org.apache.manifoldcf.crawler.interfaces.IRepositoryConnectoraddSeedDocuments in class org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnectoractivities - is the interface this method should use to perform whatever framework actions are desired.spec - is a document specification (that comes from the job).seedTime - is the end of the time range of documents to consider, exclusive.lastSeedVersion - is the last seeding version string for this job, or null if the job has no previous seeding version string.jobMode - is an integer describing how the job is being run, whether continuous or once-only.org.apache.manifoldcf.core.interfaces.ManifoldCFExceptionorg.apache.manifoldcf.agents.interfaces.ServiceInterruptionpublic void processDocuments(java.lang.String[] documentIdentifiers,
org.apache.manifoldcf.crawler.interfaces.IExistingVersions statuses,
org.apache.manifoldcf.core.interfaces.Specification spec,
org.apache.manifoldcf.crawler.interfaces.IProcessActivity activities,
int jobMode,
boolean usesDefaultAuthority)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException,
org.apache.manifoldcf.agents.interfaces.ServiceInterruption
processDocuments in interface org.apache.manifoldcf.crawler.interfaces.IRepositoryConnectorprocessDocuments in class org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnectordocumentIdentifiers - is the set of document identifiers to process.statuses - are the currently-stored document versions for each document in the set of document identifiers
passed in above.activities - is the interface this method should use to queue up new document references
and ingest documents.jobMode - is an integer describing how the job is being run, whether continuous or once-only.usesDefaultAuthority - will be true only if the authority in use for these documents is the default one.org.apache.manifoldcf.core.interfaces.ManifoldCFExceptionorg.apache.manifoldcf.agents.interfaces.ServiceInterruptionprotected static java.lang.String findConvertPath(org.apache.manifoldcf.core.interfaces.Specification spec,
java.io.File theFile)
spec - is the document specification.protected static java.lang.String mapExtensionToMimeType(java.lang.String fileName)
public void outputSpecificationHeader(org.apache.manifoldcf.core.interfaces.IHTTPOutput out,
java.util.Locale locale,
org.apache.manifoldcf.core.interfaces.Specification ds,
int connectionSequenceNumber,
java.util.List<java.lang.String> tabsArray)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException,
java.io.IOException
outputSpecificationHeader in interface org.apache.manifoldcf.crawler.interfaces.IRepositoryConnectoroutputSpecificationHeader in class org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnectorout - is the output to which any HTML should be sent.locale - is the locale the output is preferred to be in.ds - is the current document specification for this job.connectionSequenceNumber - is the unique number of this connection within the job.tabsArray - is an array of tab names. Add to this array any tab names that are specific to the connector.org.apache.manifoldcf.core.interfaces.ManifoldCFExceptionjava.io.IOExceptionpublic void outputSpecificationBody(org.apache.manifoldcf.core.interfaces.IHTTPOutput out,
java.util.Locale locale,
org.apache.manifoldcf.core.interfaces.Specification ds,
int connectionSequenceNumber,
int actualSequenceNumber,
java.lang.String tabName)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException,
java.io.IOException
outputSpecificationBody in interface org.apache.manifoldcf.crawler.interfaces.IRepositoryConnectoroutputSpecificationBody in class org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnectorout - is the output to which any HTML should be sent.locale - is the locale the output is preferred to be in.ds - is the current document specification for this job.connectionSequenceNumber - is the unique number of this connection within the job.actualSequenceNumber - is the connection within the job that has currently been selected.tabName - is the current tab name. (actualSequenceNumber, tabName) form a unique tuple within
the job.org.apache.manifoldcf.core.interfaces.ManifoldCFExceptionjava.io.IOExceptionpublic java.lang.String processSpecificationPost(org.apache.manifoldcf.core.interfaces.IPostParameters variableContext,
java.util.Locale locale,
org.apache.manifoldcf.core.interfaces.Specification ds,
int connectionSequenceNumber)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
processSpecificationPost in interface org.apache.manifoldcf.crawler.interfaces.IRepositoryConnectorprocessSpecificationPost in class org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnectorvariableContext - contains the post data, including binary file-upload information.locale - is the locale the output is preferred to be in.ds - is the current document specification for this job.connectionSequenceNumber - is the unique number of this connection within the job.org.apache.manifoldcf.core.interfaces.ManifoldCFExceptionpublic void viewSpecification(org.apache.manifoldcf.core.interfaces.IHTTPOutput out,
java.util.Locale locale,
org.apache.manifoldcf.core.interfaces.Specification ds,
int connectionSequenceNumber)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException,
java.io.IOException
viewSpecification in interface org.apache.manifoldcf.crawler.interfaces.IRepositoryConnectorviewSpecification in class org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnectorout - is the output to which any HTML should be sent.locale - is the locale the output is preferred to be in.ds - is the current document specification for this job.connectionSequenceNumber - is the unique number of this connection within the job.org.apache.manifoldcf.core.interfaces.ManifoldCFExceptionjava.io.IOExceptionprotected static boolean checkInclude(java.io.File file,
java.lang.String fileName,
org.apache.manifoldcf.core.interfaces.Specification documentSpecification)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
fileName - is the canonical file name.documentSpecification - is the specification.org.apache.manifoldcf.core.interfaces.ManifoldCFExceptionprotected static boolean checkIngest(java.io.File file,
org.apache.manifoldcf.core.interfaces.Specification documentSpecification)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
file - is the file.documentSpecification - is the specification.org.apache.manifoldcf.core.interfaces.ManifoldCFExceptionprotected static int matchSubPath(java.lang.String subPath,
java.lang.String fullPath)
subPath - is the sub path.fullPath - is the full path.protected static boolean checkMatch(java.lang.String sourceMatch,
int sourceIndex,
java.lang.String match)
sourceMatch - is the expanded string (no wildcards)sourceIndex - is the starting point in the expanded string.match - is the wildcard-based string.protected static boolean processCheck(boolean caseSensitive,
java.lang.String sourceMatch,
int sourceIndex,
java.lang.String match,
int matchIndex)
caseSensitive - is true if file names are case sensitive.sourceMatch - is the source string (w/o wildcards)sourceIndex - is the current point in the source string.match - is the match string (w/wildcards)matchIndex - is the current point in the match string.