Class WebcrawlerConfig


  • public class WebcrawlerConfig
    extends java.lang.Object
    Constants for the Webcrawler connector configuration.
    • Field Detail

      • PARAMETER_ROBOTSUSAGE

        public static final java.lang.String PARAMETER_ROBOTSUSAGE
        Robots usage (a parameter)
        See Also:
        Constant Field Values
      • PARAMETER_META_ROBOTS_TAGS_USAGE

        public static final java.lang.String PARAMETER_META_ROBOTS_TAGS_USAGE
        Meta robots tags usage (a parameter)
        See Also:
        Constant Field Values
      • PARAMETER_EMAIL

        public static final java.lang.String PARAMETER_EMAIL
        Email (a parameter)
        See Also:
        Constant Field Values
      • PARAMETER_PROXYHOST

        public static final java.lang.String PARAMETER_PROXYHOST
        Proxy host name (parameter)
        See Also:
        Constant Field Values
      • PARAMETER_PROXYPORT

        public static final java.lang.String PARAMETER_PROXYPORT
        Proxy port (parameter)
        See Also:
        Constant Field Values
      • PARAMETER_PROXYAUTHDOMAIN

        public static final java.lang.String PARAMETER_PROXYAUTHDOMAIN
        Proxy auth domain (parameter)
        See Also:
        Constant Field Values
      • PARAMETER_PROXYAUTHUSERNAME

        public static final java.lang.String PARAMETER_PROXYAUTHUSERNAME
        Proxy auth username (parameter)
        See Also:
        Constant Field Values
      • PARAMETER_PROXYAUTHPASSWORD

        public static final java.lang.String PARAMETER_PROXYAUTHPASSWORD
        Proxy auth password (parameter)
        See Also:
        Constant Field Values
      • NODE_BINDESC

        public static final java.lang.String NODE_BINDESC
        The bin description node
        See Also:
        Constant Field Values
      • ATTR_BINREGEXP

        public static final java.lang.String ATTR_BINREGEXP
        The bin regular expression
        See Also:
        Constant Field Values
      • ATTR_INSENSITIVE

        public static final java.lang.String ATTR_INSENSITIVE
        Whether the match is case insensitive
        See Also:
        Constant Field Values
      • NODE_MAXCONNECTIONS

        public static final java.lang.String NODE_MAXCONNECTIONS
        The max connections node
        See Also:
        Constant Field Values
      • NODE_MAXKBPERSECOND

        public static final java.lang.String NODE_MAXKBPERSECOND
        The bandwidth node
        See Also:
        Constant Field Values
      • NODE_MAXFETCHESPERMINUTE

        public static final java.lang.String NODE_MAXFETCHESPERMINUTE
        The max fetch rate node
        See Also:
        Constant Field Values
      • ATTR_VALUE

        public static final java.lang.String ATTR_VALUE
        The value attribute (used for maxconnections and maxkbpersecond)
        See Also:
        Constant Field Values
      • NODE_ACCESSCREDENTIAL

        public static final java.lang.String NODE_ACCESSCREDENTIAL
        Access control description node
        See Also:
        Constant Field Values
      • ATTR_URLREGEXP

        public static final java.lang.String ATTR_URLREGEXP
        Regexp for access control node
        See Also:
        Constant Field Values
      • ATTR_TYPE

        public static final java.lang.String ATTR_TYPE
        Type of security
        See Also:
        Constant Field Values
      • ATTRVALUE_BASIC

        public static final java.lang.String ATTRVALUE_BASIC
        Type value for basic authentication
        See Also:
        Constant Field Values
      • ATTRVALUE_NTLM

        public static final java.lang.String ATTRVALUE_NTLM
        Type value for NTLM authentication
        See Also:
        Constant Field Values
      • ATTRVALUE_SESSION

        public static final java.lang.String ATTRVALUE_SESSION
        Type value for session-based authentication
        See Also:
        Constant Field Values
      • ATTR_DOMAIN

        public static final java.lang.String ATTR_DOMAIN
        Domain/realm part of credentials (if any)
        See Also:
        Constant Field Values
      • ATTR_USERNAME

        public static final java.lang.String ATTR_USERNAME
        Username part of credentials
        See Also:
        Constant Field Values
      • ATTR_PASSWORD

        public static final java.lang.String ATTR_PASSWORD
        Password part of credentials
        See Also:
        Constant Field Values
      • NODE_AUTHPAGE

        public static final java.lang.String NODE_AUTHPAGE
        Authentication page description node
        See Also:
        Constant Field Values
      • ATTRVALUE_FORM

        public static final java.lang.String ATTRVALUE_FORM
        Authentication page type: Form
        See Also:
        Constant Field Values
      • ATTRVALUE_LINK

        public static final java.lang.String ATTRVALUE_LINK
        Authentication page type: Link
        See Also:
        Constant Field Values
      • ATTRVALUE_REDIRECTION

        public static final java.lang.String ATTRVALUE_REDIRECTION
        Authentication page type: Redirection
        See Also:
        Constant Field Values
      • ATTRVALUE_CONTENT

        public static final java.lang.String ATTRVALUE_CONTENT
        Authentication page type: Access
        See Also:
        Constant Field Values
      • ATTR_MATCHREGEXP

        public static final java.lang.String ATTR_MATCHREGEXP
        Form name or link target regexp for authentication page
        See Also:
        Constant Field Values
      • ATTR_OVERRIDETARGETURL

        public static final java.lang.String ATTR_OVERRIDETARGETURL
        URL to fetch next in a sequence (an override)
        See Also:
        Constant Field Values
      • NODE_AUTHPARAMETER

        public static final java.lang.String NODE_AUTHPARAMETER
        Authentication parameter node
        See Also:
        Constant Field Values
      • ATTR_NAMEREGEXP

        public static final java.lang.String ATTR_NAMEREGEXP
        Authentication parameter name regexp
        See Also:
        Constant Field Values
      • NODE_TRUST

        public static final java.lang.String NODE_TRUST
        Trust store description node
        See Also:
        Constant Field Values
      • ATTR_TRUSTSTORE

        public static final java.lang.String ATTR_TRUSTSTORE
        Trust store section of authentication record
        See Also:
        Constant Field Values
      • ATTR_TRUSTEVERYTHING

        public static final java.lang.String ATTR_TRUSTEVERYTHING
        "Trust everything" attribute - replacing truststore if set to 'true'
        See Also:
        Constant Field Values
      • NODE_MAP

        public static final java.lang.String NODE_MAP
        Map entry specification node. Has two attributes: 'match' and 'map'.
        See Also:
        Constant Field Values
      • NODE_SEEDS

        public static final java.lang.String NODE_SEEDS
        The seeds node. The value of this node contains the seeds, as a large text area.
        See Also:
        Constant Field Values
      • NODE_INCLUDES

        public static final java.lang.String NODE_INCLUDES
        Include regexps node. The value of this node contains the regexps that must match the canonical URL in order for that URL to be included in the crawl. These regexps are newline separated, and # starts a comment.
        See Also:
        Constant Field Values
      • NODE_EXCLUDES

        public static final java.lang.String NODE_EXCLUDES
        Exclude regexps node. The value of this node contains the regexps that if any one matches, causes the URL to be excluded from the crawl. These regexps are newline separated, and # starts a comment.
        See Also:
        Constant Field Values
      • NODE_INCLUDESINDEX

        public static final java.lang.String NODE_INCLUDESINDEX
        Include regexps node. The value of this node contains the regexps that must match the canonical URL in order for that URL to be included for indexing. These regexps are newline separated, and # starts a comment.
        See Also:
        Constant Field Values
      • NODE_EXCLUDESINDEX

        public static final java.lang.String NODE_EXCLUDESINDEX
        Exclude regexps node. The value of this node contains the regexps that if any one matches, causes the URL to be excluded from indexing. These regexps are newline separated, and # starts a comment.
        See Also:
        Constant Field Values
      • NODE_EXCLUDESCONTENTINDEX

        public static final java.lang.String NODE_EXCLUDESCONTENTINDEX
        Exclude any page containing specified regex in their body from index
        See Also:
        Constant Field Values
      • NODE_LIMITTOSEEDS

        public static final java.lang.String NODE_LIMITTOSEEDS
        Limit to seeds. When value attribute is true, only seed domains will be permitted.
        See Also:
        Constant Field Values
      • NODE_FORCEINCLUSION

        public static final java.lang.String NODE_FORCEINCLUSION
        Force the inclusion of redirections. When value attribute is true, redirected URL will be included.
        See Also:
        Constant Field Values
      • NODE_URLSPEC

        public static final java.lang.String NODE_URLSPEC
        Canonicalization rule. Attributes are regexp, description, reorder, javasessionremoval, aspsessionremoval, phpsessionremoval, bvsessionremoval
        See Also:
        Constant Field Values
      • NODE_ACCESS

        public static final java.lang.String NODE_ACCESS
        Forced acl access token node. Attribute is "token".
        See Also:
        Constant Field Values
      • NODE_EXCLUDEHEADER

        public static final java.lang.String NODE_EXCLUDEHEADER
        Exclude header node. The value of this node lists a single header (in lower case) that should be excluded from the document metadata
        See Also:
        Constant Field Values
      • ATTR_REGEXP

        public static final java.lang.String ATTR_REGEXP
        regexp attribute
        See Also:
        Constant Field Values
      • ATTR_DESCRIPTION

        public static final java.lang.String ATTR_DESCRIPTION
        description attribute
        See Also:
        Constant Field Values
      • ATTR_REORDER

        public static final java.lang.String ATTR_REORDER
        reorder attribute
        See Also:
        Constant Field Values
      • ATTR_JAVASESSIONREMOVAL

        public static final java.lang.String ATTR_JAVASESSIONREMOVAL
        javasessionremoval attribute
        See Also:
        Constant Field Values
      • ATTR_ASPSESSIONREMOVAL

        public static final java.lang.String ATTR_ASPSESSIONREMOVAL
        aspsessionremoval attribute
        See Also:
        Constant Field Values
      • ATTR_PHPSESSIONREMOVAL

        public static final java.lang.String ATTR_PHPSESSIONREMOVAL
        phpsessionremoval attribute
        See Also:
        Constant Field Values
      • ATTR_BVSESSIONREMOVAL

        public static final java.lang.String ATTR_BVSESSIONREMOVAL
        bvsessionremoval attribute
        See Also:
        Constant Field Values
      • ATTR_LOWERCASE

        public static final java.lang.String ATTR_LOWERCASE
        map to lower case
        See Also:
        Constant Field Values
      • ATTR_NAME

        public static final java.lang.String ATTR_NAME
        name attribute
        See Also:
        Constant Field Values
      • ATTR_TOKEN

        public static final java.lang.String ATTR_TOKEN
        token attribute
        See Also:
        Constant Field Values
      • ATTRVALUE_YES

        public static final java.lang.String ATTRVALUE_YES
        Value yes
        See Also:
        Constant Field Values
      • ATTRVALUE_NO

        public static final java.lang.String ATTRVALUE_NO
        Value no
        See Also:
        Constant Field Values
      • ATTRVALUE_FALSE

        public static final java.lang.String ATTRVALUE_FALSE
        Value false
        See Also:
        Constant Field Values
      • ATTRVALUE_TRUE

        public static final java.lang.String ATTRVALUE_TRUE
        Value true
        See Also:
        Constant Field Values
      • ATTR_MATCH

        public static final java.lang.String ATTR_MATCH
        Match attribute
        See Also:
        Constant Field Values
    • Constructor Detail

      • WebcrawlerConfig

        public WebcrawlerConfig()