package com.norconex.collector.http.crawler;

import com.norconex.collector.core.checksum.IMetadataChecksummer;
import com.norconex.collector.core.crawler.AbstractCrawlerConfig;
import com.norconex.collector.http.checksum.impl.LastModifiedMetadataChecksummer;
import com.norconex.collector.http.client.IHttpClientFactory;
import com.norconex.collector.http.client.impl.GenericHttpClientFactory;
import com.norconex.collector.http.delay.IDelayResolver;
import com.norconex.collector.http.delay.impl.GenericDelayResolver;
import com.norconex.collector.http.fetch.IHttpDocumentFetcher;
import com.norconex.collector.http.fetch.IHttpMetadataFetcher;
import com.norconex.collector.http.fetch.impl.GenericDocumentFetcher;
import com.norconex.collector.http.processor.IHttpDocumentProcessor;
import com.norconex.collector.http.recrawl.IRecrawlableResolver;
import com.norconex.collector.http.recrawl.impl.GenericRecrawlableResolver;
import com.norconex.collector.http.redirect.IRedirectURLProvider;
import com.norconex.collector.http.redirect.impl.GenericRedirectURLProvider;
import com.norconex.collector.http.robot.IRobotsMetaProvider;
import com.norconex.collector.http.robot.IRobotsTxtProvider;
import com.norconex.collector.http.robot.impl.StandardRobotsMetaProvider;
import com.norconex.collector.http.robot.impl.StandardRobotsTxtProvider;
import com.norconex.collector.http.sitemap.ISitemapResolverFactory;
import com.norconex.collector.http.sitemap.impl.StandardSitemapResolverFactory;
import com.norconex.collector.http.url.ICanonicalLinkDetector;
import com.norconex.collector.http.url.ILinkExtractor;
import com.norconex.collector.http.url.IURLNormalizer;
import com.norconex.collector.http.url.impl.GenericCanonicalLinkDetector;
import com.norconex.collector.http.url.impl.GenericLinkExtractor;
import com.norconex.collector.http.url.impl.GenericURLNormalizer;
import com.norconex.commons.lang.config.XMLConfigurationUtil;
import com.norconex.commons.lang.xml.EnhancedXMLStreamWriter;
import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import javax.xml.stream.XMLStreamException;
import org.apache.commons.configuration.HierarchicalConfiguration;
import org.apache.commons.configuration.XMLConfiguration;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

/* loaded from: input_file:com/norconex/collector/http/crawler/HttpCrawlerConfig.class */
public class HttpCrawlerConfig extends AbstractCrawlerConfig {
    private static final Logger LOG = LogManager.getLogger(HttpCrawlerConfig.class);
    private String[] startURLs;
    private String[] startURLsFiles;
    private String[] startSitemapURLs;
    private IStartURLsProvider[] startURLsProviders;
    private boolean ignoreRobotsTxt;
    private boolean ignoreRobotsMeta;
    private boolean ignoreSitemap;
    private boolean keepDownloads;
    private boolean ignoreCanonicalLinks;
    private boolean keepOutOfScopeLinks;
    private boolean skipMetaFetcherOnBadStatus;
    private boolean linkExtractorQuitAtDepth;
    private String userAgent;
    private IHttpMetadataFetcher metadataFetcher;
    private IHttpDocumentProcessor[] preImportProcessors;
    private IHttpDocumentProcessor[] postImportProcessors;
    private int maxDepth = -1;
    private URLCrawlScopeStrategy urlCrawlScopeStrategy = new URLCrawlScopeStrategy();
    private IURLNormalizer urlNormalizer = new GenericURLNormalizer();
    private IDelayResolver delayResolver = new GenericDelayResolver();
    private IHttpClientFactory httpClientFactory = new GenericHttpClientFactory();
    private IHttpDocumentFetcher documentFetcher = new GenericDocumentFetcher();
    private ICanonicalLinkDetector canonicalLinkDetector = new GenericCanonicalLinkDetector();
    private IRedirectURLProvider redirectURLProvider = new GenericRedirectURLProvider();
    private ILinkExtractor[] linkExtractors = {new GenericLinkExtractor()};
    private IRobotsTxtProvider robotsTxtProvider = new StandardRobotsTxtProvider();
    private IRobotsMetaProvider robotsMetaProvider = new StandardRobotsMetaProvider();
    private ISitemapResolverFactory sitemapResolverFactory = new StandardSitemapResolverFactory();
    private IMetadataChecksummer metadataChecksummer = new LastModifiedMetadataChecksummer();
    private IRecrawlableResolver recrawlableResolver = new GenericRecrawlableResolver();

    public String[] getStartURLs() {
        return (String[]) ArrayUtils.clone(this.startURLs);
    }

    public void setStartURLs(String... strArr) {
        this.startURLs = (String[]) ArrayUtils.clone(strArr);
    }

    public String[] getStartURLsFiles() {
        return this.startURLsFiles;
    }

    public void setStartURLsFiles(String... strArr) {
        this.startURLsFiles = (String[]) ArrayUtils.clone(strArr);
    }

    public String[] getStartSitemapURLs() {
        return this.startSitemapURLs;
    }

    public void setStartSitemapURLs(String... strArr) {
        this.startSitemapURLs = (String[]) ArrayUtils.clone(strArr);
    }

    public IStartURLsProvider[] getStartURLsProviders() {
        return this.startURLsProviders;
    }

    public void setStartURLsProviders(IStartURLsProvider... iStartURLsProviderArr) {
        this.startURLsProviders = iStartURLsProviderArr;
    }

    public void setMaxDepth(int i) {
        this.maxDepth = i;
    }

    public int getMaxDepth() {
        return this.maxDepth;
    }

    public IHttpClientFactory getHttpClientFactory() {
        return this.httpClientFactory;
    }

    public void setHttpClientFactory(IHttpClientFactory iHttpClientFactory) {
        this.httpClientFactory = iHttpClientFactory;
    }

    public IHttpDocumentFetcher getDocumentFetcher() {
        return this.documentFetcher;
    }

    public void setDocumentFetcher(IHttpDocumentFetcher iHttpDocumentFetcher) {
        this.documentFetcher = iHttpDocumentFetcher;
    }

    public IHttpMetadataFetcher getMetadataFetcher() {
        return this.metadataFetcher;
    }

    public void setMetadataFetcher(IHttpMetadataFetcher iHttpMetadataFetcher) {
        this.metadataFetcher = iHttpMetadataFetcher;
    }

    public ICanonicalLinkDetector getCanonicalLinkDetector() {
        return this.canonicalLinkDetector;
    }

    public void setCanonicalLinkDetector(ICanonicalLinkDetector iCanonicalLinkDetector) {
        this.canonicalLinkDetector = iCanonicalLinkDetector;
    }

    public ILinkExtractor[] getLinkExtractors() {
        return (ILinkExtractor[]) ArrayUtils.clone(this.linkExtractors);
    }

    public void setLinkExtractors(ILinkExtractor... iLinkExtractorArr) {
        this.linkExtractors = (ILinkExtractor[]) ArrayUtils.clone(iLinkExtractorArr);
    }

    public IRobotsTxtProvider getRobotsTxtProvider() {
        return this.robotsTxtProvider;
    }

    public void setRobotsTxtProvider(IRobotsTxtProvider iRobotsTxtProvider) {
        this.robotsTxtProvider = iRobotsTxtProvider;
    }

    public IURLNormalizer getUrlNormalizer() {
        return this.urlNormalizer;
    }

    public void setUrlNormalizer(IURLNormalizer iURLNormalizer) {
        this.urlNormalizer = iURLNormalizer;
    }

    public IDelayResolver getDelayResolver() {
        return this.delayResolver;
    }

    public void setDelayResolver(IDelayResolver iDelayResolver) {
        this.delayResolver = iDelayResolver;
    }

    public IHttpDocumentProcessor[] getPreImportProcessors() {
        return (IHttpDocumentProcessor[]) ArrayUtils.clone(this.preImportProcessors);
    }

    public void setPreImportProcessors(IHttpDocumentProcessor... iHttpDocumentProcessorArr) {
        this.preImportProcessors = (IHttpDocumentProcessor[]) ArrayUtils.clone(iHttpDocumentProcessorArr);
    }

    public IHttpDocumentProcessor[] getPostImportProcessors() {
        return (IHttpDocumentProcessor[]) ArrayUtils.clone(this.postImportProcessors);
    }

    public void setPostImportProcessors(IHttpDocumentProcessor... iHttpDocumentProcessorArr) {
        this.postImportProcessors = (IHttpDocumentProcessor[]) ArrayUtils.clone(iHttpDocumentProcessorArr);
    }

    public boolean isIgnoreRobotsTxt() {
        return this.ignoreRobotsTxt;
    }

    public void setIgnoreRobotsTxt(boolean z) {
        this.ignoreRobotsTxt = z;
    }

    public boolean isKeepDownloads() {
        return this.keepDownloads;
    }

    public void setKeepDownloads(boolean z) {
        this.keepDownloads = z;
    }

    public boolean isLinkExtractorQuitAtDepth() {
        return this.linkExtractorQuitAtDepth;
    }

    public void setLinkExtractorQuitAtDepth(boolean z) {
        this.linkExtractorQuitAtDepth = z;
    }

    public boolean isKeepOutOfScopeLinks() {
        return this.keepOutOfScopeLinks;
    }

    public void setKeepOutOfScopeLinks(boolean z) {
        this.keepOutOfScopeLinks = z;
    }

    public IMetadataChecksummer getMetadataChecksummer() {
        return this.metadataChecksummer;
    }

    public void setMetadataChecksummer(IMetadataChecksummer iMetadataChecksummer) {
        this.metadataChecksummer = iMetadataChecksummer;
    }

    public boolean isIgnoreRobotsMeta() {
        return this.ignoreRobotsMeta;
    }

    public void setIgnoreRobotsMeta(boolean z) {
        this.ignoreRobotsMeta = z;
    }

    public IRobotsMetaProvider getRobotsMetaProvider() {
        return this.robotsMetaProvider;
    }

    public void setRobotsMetaProvider(IRobotsMetaProvider iRobotsMetaProvider) {
        this.robotsMetaProvider = iRobotsMetaProvider;
    }

    public boolean isIgnoreSitemap() {
        return this.ignoreSitemap;
    }

    public void setIgnoreSitemap(boolean z) {
        this.ignoreSitemap = z;
    }

    public ISitemapResolverFactory getSitemapResolverFactory() {
        return this.sitemapResolverFactory;
    }

    public void setSitemapResolverFactory(ISitemapResolverFactory iSitemapResolverFactory) {
        this.sitemapResolverFactory = iSitemapResolverFactory;
    }

    public String getUserAgent() {
        return this.userAgent;
    }

    public void setUserAgent(String str) {
        this.userAgent = str;
    }

    public boolean isIgnoreCanonicalLinks() {
        return this.ignoreCanonicalLinks;
    }

    public void setIgnoreCanonicalLinks(boolean z) {
        this.ignoreCanonicalLinks = z;
    }

    public URLCrawlScopeStrategy getURLCrawlScopeStrategy() {
        return this.urlCrawlScopeStrategy;
    }

    public void setUrlCrawlScopeStrategy(URLCrawlScopeStrategy uRLCrawlScopeStrategy) {
        this.urlCrawlScopeStrategy = uRLCrawlScopeStrategy;
    }

    public IRedirectURLProvider getRedirectURLProvider() {
        return this.redirectURLProvider;
    }

    public void setRedirectURLProvider(IRedirectURLProvider iRedirectURLProvider) {
        this.redirectURLProvider = iRedirectURLProvider;
    }

    public IRecrawlableResolver getRecrawlableResolver() {
        return this.recrawlableResolver;
    }

    public void setRecrawlableResolver(IRecrawlableResolver iRecrawlableResolver) {
        this.recrawlableResolver = iRecrawlableResolver;
    }

    public boolean isSkipMetaFetcherOnBadStatus() {
        return this.skipMetaFetcherOnBadStatus;
    }

    public void setSkipMetaFetcherOnBadStatus(boolean z) {
        this.skipMetaFetcherOnBadStatus = z;
    }

    protected void saveCrawlerConfigToXML(Writer writer) throws IOException {
        try {
            writer.flush();
            EnhancedXMLStreamWriter enhancedXMLStreamWriter = new EnhancedXMLStreamWriter(writer);
            enhancedXMLStreamWriter.writeElementString("userAgent", getUserAgent());
            enhancedXMLStreamWriter.writeElementInteger("maxDepth", Integer.valueOf(getMaxDepth()));
            enhancedXMLStreamWriter.writeElementBoolean("keepDownloads", Boolean.valueOf(isKeepDownloads()));
            enhancedXMLStreamWriter.writeElementBoolean("keepOutOfScopeLinks", Boolean.valueOf(isKeepOutOfScopeLinks()));
            enhancedXMLStreamWriter.writeElementBoolean("linkExtractorQuitAtDepth", Boolean.valueOf(isLinkExtractorQuitAtDepth()));
            enhancedXMLStreamWriter.writeStartElement("startURLs");
            enhancedXMLStreamWriter.writeAttributeBoolean("stayOnProtocol", Boolean.valueOf(this.urlCrawlScopeStrategy.isStayOnProtocol()));
            enhancedXMLStreamWriter.writeAttributeBoolean("stayOnDomain", Boolean.valueOf(this.urlCrawlScopeStrategy.isStayOnDomain()));
            enhancedXMLStreamWriter.writeAttributeBoolean("includeSubdomains", Boolean.valueOf(this.urlCrawlScopeStrategy.isIncludeSubdomains()));
            enhancedXMLStreamWriter.writeAttributeBoolean("stayOnPort", Boolean.valueOf(this.urlCrawlScopeStrategy.isStayOnPort()));
            String[] startURLs = getStartURLs();
            if (startURLs != null) {
                for (String str : startURLs) {
                    enhancedXMLStreamWriter.writeElementString("url", str);
                }
            }
            String[] startURLsFiles = getStartURLsFiles();
            if (startURLsFiles != null) {
                for (String str2 : startURLsFiles) {
                    enhancedXMLStreamWriter.writeElementString("urlsFile", str2);
                }
            }
            String[] startSitemapURLs = getStartSitemapURLs();
            if (startSitemapURLs != null) {
                for (String str3 : startSitemapURLs) {
                    enhancedXMLStreamWriter.writeElementString("sitemap", str3);
                }
            }
            enhancedXMLStreamWriter.flush();
            IStartURLsProvider[] startURLsProviders = getStartURLsProviders();
            if (startURLsProviders != null) {
                for (IStartURLsProvider iStartURLsProvider : startURLsProviders) {
                    writeObject(writer, "provider", iStartURLsProvider);
                }
            }
            writer.flush();
            enhancedXMLStreamWriter.writeEndElement();
            enhancedXMLStreamWriter.flush();
            writeObject(writer, "urlNormalizer", getUrlNormalizer());
            writeObject(writer, "delay", getDelayResolver());
            writeObject(writer, "httpClientFactory", getHttpClientFactory());
            writeObject(writer, "robotsTxt", getRobotsTxtProvider(), isIgnoreRobotsTxt());
            writeObject(writer, "sitemapResolverFactory", getSitemapResolverFactory(), isIgnoreSitemap());
            writeObject(writer, "canonicalLinkDetector", getCanonicalLinkDetector());
            writeObject(writer, "redirectURLProvider", getRedirectURLProvider());
            writeObject(writer, "recrawlableResolver", getRecrawlableResolver());
            writer.flush();
            enhancedXMLStreamWriter.flush();
            StringWriter stringWriter = new StringWriter();
            writeObject(stringWriter, "metadataFetcher", getMetadataFetcher());
            writer.write(stringWriter.toString().replaceFirst("^(<metadataFetcher)", "$1 skipOnBadStatus=\"" + isSkipMetaFetcherOnBadStatus() + "\""));
            writer.flush();
            writeObject(writer, "metadataChecksummer", getMetadataChecksummer());
            writeObject(writer, "documentFetcher", getDocumentFetcher());
            writeObject(writer, "robotsMeta", getRobotsMetaProvider(), isIgnoreRobotsMeta());
            writeArray(writer, "linkExtractors", "extractor", getLinkExtractors());
            writeArray(writer, "preImportProcessors", "processor", getPreImportProcessors());
            writeArray(writer, "postImportProcessors", "processor", getPostImportProcessors());
        } catch (XMLStreamException e) {
            throw new IOException("Could not write to XML config: " + getId(), e);
        }
    }

    protected void loadCrawlerConfigFromXML(XMLConfiguration xMLConfiguration) {
        loadSimpleSettings(xMLConfiguration);
        setHttpClientFactory((IHttpClientFactory) XMLConfigurationUtil.newInstance(xMLConfiguration, "httpClientFactory", getHttpClientFactory()));
        setRobotsTxtProvider((IRobotsTxtProvider) XMLConfigurationUtil.newInstance(xMLConfiguration, "robotsTxt", getRobotsTxtProvider()));
        setIgnoreRobotsTxt(xMLConfiguration.getBoolean("robotsTxt[@ignore]", isIgnoreRobotsTxt()));
        ISitemapResolverFactory iSitemapResolverFactory = (ISitemapResolverFactory) XMLConfigurationUtil.newInstance(xMLConfiguration, "sitemapResolverFactory", getSitemapResolverFactory());
        setIgnoreSitemap(xMLConfiguration.getBoolean("sitemapResolverFactory[@ignore]", isIgnoreSitemap()));
        List configurationsAt = xMLConfiguration.configurationsAt("sitemap");
        if (iSitemapResolverFactory == null && configurationsAt != null && !configurationsAt.isEmpty() && xMLConfiguration.configurationAt("sitemap") != null) {
            LOG.warn("The <sitemap ...> tag used as a crawler setting is deprecated, use <sitemapResolverFactory...> instead. The <sitemap> tag can now be used as a start URL.");
            iSitemapResolverFactory = (ISitemapResolverFactory) XMLConfigurationUtil.newInstance(xMLConfiguration, "sitemap");
            setIgnoreSitemap(xMLConfiguration.getBoolean("sitemap[@ignore]", isIgnoreSitemap()));
        }
        if (iSitemapResolverFactory == null) {
            iSitemapResolverFactory = getSitemapResolverFactory();
        }
        setSitemapResolverFactory(iSitemapResolverFactory);
        setCanonicalLinkDetector((ICanonicalLinkDetector) XMLConfigurationUtil.newInstance(xMLConfiguration, "canonicalLinkDetector", getCanonicalLinkDetector()));
        setIgnoreCanonicalLinks(xMLConfiguration.getBoolean("canonicalLinkDetector[@ignore]", isIgnoreCanonicalLinks()));
        setRedirectURLProvider((IRedirectURLProvider) XMLConfigurationUtil.newInstance(xMLConfiguration, "redirectURLProvider", getRedirectURLProvider()));
        setRecrawlableResolver((IRecrawlableResolver) XMLConfigurationUtil.newInstance(xMLConfiguration, "recrawlableResolver", getRecrawlableResolver()));
        setSkipMetaFetcherOnBadStatus(xMLConfiguration.getBoolean("metadataFetcher[@skipOnBadStatus]", isSkipMetaFetcherOnBadStatus()));
        setMetadataFetcher((IHttpMetadataFetcher) XMLConfigurationUtil.newInstance(xMLConfiguration, "metadataFetcher", getMetadataFetcher()));
        setMetadataChecksummer((IMetadataChecksummer) XMLConfigurationUtil.newInstance(xMLConfiguration, "metadataChecksummer", getMetadataChecksummer()));
        setDocumentFetcher((IHttpDocumentFetcher) XMLConfigurationUtil.newInstance(xMLConfiguration, "documentFetcher", getDocumentFetcher()));
        setRobotsMetaProvider((IRobotsMetaProvider) XMLConfigurationUtil.newInstance(xMLConfiguration, "robotsMeta", getRobotsMetaProvider()));
        setIgnoreRobotsMeta(xMLConfiguration.getBoolean("robotsMeta[@ignore]", isIgnoreRobotsMeta()));
        setLinkExtractors((ILinkExtractor[]) defaultIfEmpty(loadLinkExtractors(xMLConfiguration, "linkExtractors.extractor"), getLinkExtractors()));
        setPreImportProcessors((IHttpDocumentProcessor[]) defaultIfEmpty(loadProcessors(xMLConfiguration, "preImportProcessors.processor"), getPreImportProcessors()));
        setPostImportProcessors((IHttpDocumentProcessor[]) defaultIfEmpty(loadProcessors(xMLConfiguration, "postImportProcessors.processor"), getPostImportProcessors()));
    }

    private void loadSimpleSettings(XMLConfiguration xMLConfiguration) {
        setUserAgent(xMLConfiguration.getString("userAgent", getUserAgent()));
        setUrlNormalizer((IURLNormalizer) XMLConfigurationUtil.newInstance(xMLConfiguration, "urlNormalizer", getUrlNormalizer()));
        setDelayResolver((IDelayResolver) XMLConfigurationUtil.newInstance(xMLConfiguration, "delay", getDelayResolver()));
        setMaxDepth(xMLConfiguration.getInt("maxDepth", getMaxDepth()));
        setKeepDownloads(xMLConfiguration.getBoolean("keepDownloads", isKeepDownloads()));
        setKeepOutOfScopeLinks(xMLConfiguration.getBoolean("keepOutOfScopeLinks", isKeepOutOfScopeLinks()));
        setKeepOutOfScopeLinks(xMLConfiguration.getBoolean("linkExtractorQuitAtDepth", isLinkExtractorQuitAtDepth()));
        setIgnoreCanonicalLinks(xMLConfiguration.getBoolean("ignoreCanonicalLinks", isIgnoreCanonicalLinks()));
        this.urlCrawlScopeStrategy.setStayOnProtocol(xMLConfiguration.getBoolean("startURLs[@stayOnProtocol]", this.urlCrawlScopeStrategy.isStayOnProtocol()));
        this.urlCrawlScopeStrategy.setStayOnDomain(xMLConfiguration.getBoolean("startURLs[@stayOnDomain]", this.urlCrawlScopeStrategy.isStayOnDomain()));
        this.urlCrawlScopeStrategy.setIncludeSubdomains(xMLConfiguration.getBoolean("startURLs[@includeSubdomains]", this.urlCrawlScopeStrategy.isIncludeSubdomains()));
        this.urlCrawlScopeStrategy.setStayOnPort(xMLConfiguration.getBoolean("startURLs[@stayOnPort]", this.urlCrawlScopeStrategy.isStayOnPort()));
        setStartURLs((String[]) defaultIfEmpty(xMLConfiguration.getStringArray("startURLs.url"), getStartURLs()));
        setStartURLsFiles((String[]) defaultIfEmpty(xMLConfiguration.getStringArray("startURLs.urlsFile"), getStartURLsFiles()));
        setStartSitemapURLs((String[]) defaultIfEmpty(xMLConfiguration.getStringArray("startURLs.sitemap"), getStartSitemapURLs()));
        setStartURLsProviders((IStartURLsProvider[]) defaultIfEmpty(loadStartURLsProviders(xMLConfiguration), getStartURLsProviders()));
    }

    private IStartURLsProvider[] loadStartURLsProviders(XMLConfiguration xMLConfiguration) {
        ArrayList arrayList = new ArrayList();
        Iterator it = xMLConfiguration.configurationsAt("startURLs.provider").iterator();
        while (it.hasNext()) {
            IStartURLsProvider iStartURLsProvider = (IStartURLsProvider) XMLConfigurationUtil.newInstance((HierarchicalConfiguration) it.next());
            arrayList.add(iStartURLsProvider);
            LOG.info("Start URLs provider loaded: " + iStartURLsProvider);
        }
        return (IStartURLsProvider[]) arrayList.toArray(new IStartURLsProvider[0]);
    }

    private IHttpDocumentProcessor[] loadProcessors(XMLConfiguration xMLConfiguration, String str) {
        ArrayList arrayList = new ArrayList();
        Iterator it = xMLConfiguration.configurationsAt(str).iterator();
        while (it.hasNext()) {
            IHttpDocumentProcessor iHttpDocumentProcessor = (IHttpDocumentProcessor) XMLConfigurationUtil.newInstance((HierarchicalConfiguration) it.next());
            arrayList.add(iHttpDocumentProcessor);
            LOG.info("HTTP document processor loaded: " + iHttpDocumentProcessor);
        }
        return (IHttpDocumentProcessor[]) arrayList.toArray(new IHttpDocumentProcessor[0]);
    }

    private ILinkExtractor[] loadLinkExtractors(XMLConfiguration xMLConfiguration, String str) {
        ArrayList arrayList = new ArrayList();
        Iterator it = xMLConfiguration.configurationsAt(str).iterator();
        while (it.hasNext()) {
            ILinkExtractor iLinkExtractor = (ILinkExtractor) XMLConfigurationUtil.newInstance((HierarchicalConfiguration) it.next(), new GenericLinkExtractor());
            if (iLinkExtractor != null) {
                arrayList.add(iLinkExtractor);
                LOG.info("Link extractor loaded: " + iLinkExtractor);
            }
        }
        return (ILinkExtractor[]) arrayList.toArray(new ILinkExtractor[0]);
    }

    public boolean equals(Object obj) {
        if (!(obj instanceof HttpCrawlerConfig)) {
            return false;
        }
        HttpCrawlerConfig httpCrawlerConfig = (HttpCrawlerConfig) obj;
        return new EqualsBuilder().appendSuper(super.equals(httpCrawlerConfig)).append(this.maxDepth, httpCrawlerConfig.maxDepth).append(this.startURLs, httpCrawlerConfig.startURLs).append(this.startURLsFiles, httpCrawlerConfig.startURLsFiles).append(this.startSitemapURLs, httpCrawlerConfig.startSitemapURLs).append(this.startURLsProviders, httpCrawlerConfig.startURLsProviders).append(this.ignoreRobotsTxt, httpCrawlerConfig.ignoreRobotsTxt).append(this.ignoreRobotsMeta, httpCrawlerConfig.ignoreRobotsMeta).append(this.ignoreSitemap, httpCrawlerConfig.ignoreSitemap).append(this.keepDownloads, httpCrawlerConfig.keepDownloads).append(this.keepOutOfScopeLinks, httpCrawlerConfig.keepOutOfScopeLinks).append(this.linkExtractorQuitAtDepth, httpCrawlerConfig.linkExtractorQuitAtDepth).append(this.ignoreCanonicalLinks, httpCrawlerConfig.ignoreCanonicalLinks).append(this.skipMetaFetcherOnBadStatus, httpCrawlerConfig.skipMetaFetcherOnBadStatus).append(this.userAgent, httpCrawlerConfig.userAgent).append(this.urlCrawlScopeStrategy, httpCrawlerConfig.urlCrawlScopeStrategy).append(this.urlNormalizer, httpCrawlerConfig.urlNormalizer).append(this.delayResolver, httpCrawlerConfig.delayResolver).append(this.httpClientFactory, httpCrawlerConfig.httpClientFactory).append(this.documentFetcher, httpCrawlerConfig.documentFetcher).append(this.canonicalLinkDetector, httpCrawlerConfig.canonicalLinkDetector).append(this.redirectURLProvider, httpCrawlerConfig.redirectURLProvider).append(this.recrawlableResolver, httpCrawlerConfig.recrawlableResolver).append(this.metadataFetcher, httpCrawlerConfig.metadataFetcher).append(this.linkExtractors, httpCrawlerConfig.linkExtractors).append(this.robotsTxtProvider, httpCrawlerConfig.robotsTxtProvider).append(this.robotsMetaProvider, httpCrawlerConfig.robotsMetaProvider).append(this.sitemapResolverFactory, httpCrawlerConfig.sitemapResolverFactory).append(this.metadataChecksummer, httpCrawlerConfig.metadataChecksummer).append(this.preImportProcessors, httpCrawlerConfig.preImportProcessors).append(this.postImportProcessors, httpCrawlerConfig.postImportProcessors).isEquals();
    }

    public int hashCode() {
        return new HashCodeBuilder().appendSuper(super.hashCode()).append(this.maxDepth).append(this.startURLs).append(this.startURLsFiles).append(this.startSitemapURLs).append(this.startURLsProviders).append(this.ignoreRobotsTxt).append(this.ignoreRobotsMeta).append(this.ignoreSitemap).append(this.keepDownloads).append(this.keepOutOfScopeLinks).append(this.linkExtractorQuitAtDepth).append(this.ignoreCanonicalLinks).append(this.skipMetaFetcherOnBadStatus).append(this.userAgent).append(this.urlCrawlScopeStrategy).append(this.urlNormalizer).append(this.delayResolver).append(this.httpClientFactory).append(this.documentFetcher).append(this.canonicalLinkDetector).append(this.redirectURLProvider).append(this.recrawlableResolver).append(this.metadataFetcher).append(this.linkExtractors).append(this.robotsTxtProvider).append(this.robotsMetaProvider).append(this.sitemapResolverFactory).append(this.metadataChecksummer).append(this.preImportProcessors).append(this.postImportProcessors).toHashCode();
    }

    public String toString() {
        return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE).appendSuper(super.toString()).append("maxDepth", this.maxDepth).append("startURLs", this.startURLs).append("startURLsFiles", this.startURLsFiles).append("startSitemapURLs", this.startSitemapURLs).append("startURLsProviders", this.startURLsProviders).append("ignoreRobotsTxt", this.ignoreRobotsTxt).append("ignoreRobotsMeta", this.ignoreRobotsMeta).append("ignoreSitemap", this.ignoreSitemap).append("keepDownloads", this.keepDownloads).append("keepOutOfScopeLinks", this.keepOutOfScopeLinks).append("linkExtractorQuitAtDepth", this.linkExtractorQuitAtDepth).append("ignoreCanonicalLinks", this.ignoreCanonicalLinks).append("skipMetaFetcherOnBadStatus", this.skipMetaFetcherOnBadStatus).append("userAgent", this.userAgent).append("urlCrawlScopeStrategy", this.urlCrawlScopeStrategy).append("urlNormalizer", this.urlNormalizer).append("delayResolver", this.delayResolver).append("httpClientFactory", this.httpClientFactory).append("documentFetcher", this.documentFetcher).append("canonicalLinkDetector", this.canonicalLinkDetector).append("redirectURLProvider", this.redirectURLProvider).append("recrawlableResolver", this.recrawlableResolver).append("metadataFetcher", this.metadataFetcher).append("linkExtractors", this.linkExtractors).append("robotsTxtProvider", this.robotsTxtProvider).append("robotsMetaProvider", this.robotsMetaProvider).append("sitemapResolverFactory", this.sitemapResolverFactory).append("metadataChecksummer", this.metadataChecksummer).append("preImportProcessors", this.preImportProcessors).append("postImportProcessors", this.postImportProcessors).toString();
    }
}
