package com.norconex.collector.http.pipeline.queue;

import com.norconex.collector.core.pipeline.BasePipelineContext;
import com.norconex.collector.core.pipeline.queue.QueueReferenceStage;
import com.norconex.collector.core.pipeline.queue.ReferenceFiltersStage;
import com.norconex.collector.http.crawler.HttpCrawlerEvent;
import com.norconex.collector.http.data.HttpCrawlData;
import com.norconex.collector.http.data.HttpCrawlState;
import com.norconex.collector.http.robot.RobotsTxt;
import com.norconex.collector.http.sitemap.SitemapURLAdder;
import com.norconex.commons.lang.pipeline.Pipeline;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

/* loaded from: input_file:com/norconex/collector/http/pipeline/queue/HttpQueuePipeline.class */
public final class HttpQueuePipeline extends Pipeline<BasePipelineContext> {
    private static final Logger LOG = LogManager.getLogger(HttpQueuePipeline.class);

    /* loaded from: input_file:com/norconex/collector/http/pipeline/queue/HttpQueuePipeline$DepthValidationStage.class */
    private static class DepthValidationStage extends AbstractQueueStage {
        private DepthValidationStage() {
        }

        @Override // com.norconex.collector.http.pipeline.queue.AbstractQueueStage
        public boolean executeStage(HttpQueuePipelineContext httpQueuePipelineContext) {
            if (httpQueuePipelineContext.m37getConfig().getMaxDepth() == -1 || httpQueuePipelineContext.m36getCrawlData().getDepth() <= httpQueuePipelineContext.m37getConfig().getMaxDepth()) {
                return true;
            }
            if (HttpQueuePipeline.LOG.isDebugEnabled()) {
                HttpQueuePipeline.LOG.debug("URL too deep to process (" + httpQueuePipelineContext.m36getCrawlData().getDepth() + "): " + httpQueuePipelineContext.m36getCrawlData().getReference());
            }
            httpQueuePipelineContext.m36getCrawlData().setState(HttpCrawlState.TOO_DEEP);
            httpQueuePipelineContext.m38getCrawler().fireCrawlerEvent(HttpCrawlerEvent.REJECTED_TOO_DEEP, httpQueuePipelineContext.m36getCrawlData(), Integer.valueOf(httpQueuePipelineContext.m36getCrawlData().getDepth()));
            return false;
        }
    }

    /* loaded from: input_file:com/norconex/collector/http/pipeline/queue/HttpQueuePipeline$SitemapStage.class */
    private static class SitemapStage extends AbstractQueueStage {
        private SitemapStage() {
        }

        @Override // com.norconex.collector.http.pipeline.queue.AbstractQueueStage
        public boolean executeStage(final HttpQueuePipelineContext httpQueuePipelineContext) {
            if (httpQueuePipelineContext.m37getConfig().isIgnoreSitemap() || httpQueuePipelineContext.getSitemapResolver() == null) {
                return true;
            }
            String urlRoot = httpQueuePipelineContext.m36getCrawlData().getUrlRoot();
            String[] strArr = null;
            RobotsTxt robotsTxt = HttpQueuePipeline.getRobotsTxt(httpQueuePipelineContext);
            if (robotsTxt != null) {
                strArr = robotsTxt.getSitemapLocations();
            }
            httpQueuePipelineContext.getSitemapResolver().resolveSitemaps(httpQueuePipelineContext.getHttpClient(), urlRoot, strArr, new SitemapURLAdder() { // from class: com.norconex.collector.http.pipeline.queue.HttpQueuePipeline.SitemapStage.1
                @Override // com.norconex.collector.http.sitemap.SitemapURLAdder
                public void add(HttpCrawlData httpCrawlData) {
                    new HttpQueuePipeline().execute(new HttpQueuePipelineContext(httpQueuePipelineContext.m38getCrawler(), httpQueuePipelineContext.getCrawlDataStore(), httpCrawlData));
                }
            }, false);
            return true;
        }
    }

    /* loaded from: input_file:com/norconex/collector/http/pipeline/queue/HttpQueuePipeline$URLNormalizerStage.class */
    private static class URLNormalizerStage extends AbstractQueueStage {
        private URLNormalizerStage() {
        }

        @Override // com.norconex.collector.http.pipeline.queue.AbstractQueueStage
        public boolean executeStage(HttpQueuePipelineContext httpQueuePipelineContext) {
            if (httpQueuePipelineContext.m37getConfig().getUrlNormalizer() == null) {
                return true;
            }
            String normalizeURL = httpQueuePipelineContext.m37getConfig().getUrlNormalizer().normalizeURL(httpQueuePipelineContext.m36getCrawlData().getReference());
            if (normalizeURL == null) {
                httpQueuePipelineContext.m36getCrawlData().setState(HttpCrawlState.REJECTED);
                return false;
            }
            httpQueuePipelineContext.m36getCrawlData().setReference(normalizeURL);
            return true;
        }
    }

    public HttpQueuePipeline() {
        addStage(new DepthValidationStage());
        addStage(new ReferenceFiltersStage());
        addStage(new RobotsTxtFiltersStage());
        addStage(new URLNormalizerStage());
        addStage(new SitemapStage());
        addStage(new QueueReferenceStage());
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static RobotsTxt getRobotsTxt(HttpQueuePipelineContext httpQueuePipelineContext) {
        if (httpQueuePipelineContext.m37getConfig().isIgnoreRobotsTxt()) {
            return null;
        }
        return httpQueuePipelineContext.m37getConfig().getRobotsTxtProvider().getRobotsTxt(httpQueuePipelineContext.getHttpClient(), httpQueuePipelineContext.m36getCrawlData().getReference(), httpQueuePipelineContext.m37getConfig().getUserAgent());
    }
}
