package com.norconex.collector.http.pipeline.importer;

import com.norconex.collector.core.CollectorException;
import com.norconex.collector.core.data.CrawlState;
import com.norconex.collector.core.pipeline.importer.DocumentFiltersStage;
import com.norconex.collector.core.pipeline.importer.ImportModuleStage;
import com.norconex.collector.core.pipeline.importer.ImporterPipelineContext;
import com.norconex.collector.core.pipeline.importer.ImporterPipelineUtil;
import com.norconex.collector.core.pipeline.importer.SaveDocumentStage;
import com.norconex.collector.http.client.impl.GenericHttpClientFactory;
import com.norconex.collector.http.crawler.HttpCrawlerEvent;
import com.norconex.collector.http.delay.IDelayResolver;
import com.norconex.collector.http.processor.IHttpDocumentProcessor;
import com.norconex.commons.lang.pipeline.Pipeline;
import java.io.IOException;
import java.io.Reader;
import org.apache.commons.collections4.queue.CircularFifoQueue;

/* loaded from: input_file:com/norconex/collector/http/pipeline/importer/HttpImporterPipeline.class */
public class HttpImporterPipeline extends Pipeline<ImporterPipelineContext> {
    public static final CircularFifoQueue<String> GOOD_REDIRECTS = new CircularFifoQueue<>(GenericHttpClientFactory.DEFAULT_MAX_IDLE_TIME);

    /* loaded from: input_file:com/norconex/collector/http/pipeline/importer/HttpImporterPipeline$DelayResolverStage.class */
    private static class DelayResolverStage extends AbstractImporterStage {
        private DelayResolverStage() {
        }

        @Override // com.norconex.collector.http.pipeline.importer.AbstractImporterStage
        public boolean executeStage(HttpImporterPipelineContext httpImporterPipelineContext) {
            IDelayResolver delayResolver = httpImporterPipelineContext.m29getConfig().getDelayResolver();
            if (delayResolver == null) {
                return true;
            }
            if (httpImporterPipelineContext.m29getConfig().isIgnoreRobotsTxt()) {
                delayResolver.delay(null, httpImporterPipelineContext.m28getCrawlData().getReference());
                return true;
            }
            delayResolver.delay(httpImporterPipelineContext.m29getConfig().getRobotsTxtProvider().getRobotsTxt(httpImporterPipelineContext.getHttpClient(), httpImporterPipelineContext.m28getCrawlData().getReference(), httpImporterPipelineContext.m29getConfig().getUserAgent()), httpImporterPipelineContext.m28getCrawlData().getReference());
            return true;
        }
    }

    /* loaded from: input_file:com/norconex/collector/http/pipeline/importer/HttpImporterPipeline$DocumentCanonicalStage.class */
    private static class DocumentCanonicalStage extends AbstractImporterStage {
        private DocumentCanonicalStage() {
        }

        @Override // com.norconex.collector.http.pipeline.importer.AbstractImporterStage
        public boolean executeStage(HttpImporterPipelineContext httpImporterPipelineContext) {
            return HttpImporterPipelineUtil.resolveCanonical(httpImporterPipelineContext, false);
        }
    }

    /* loaded from: input_file:com/norconex/collector/http/pipeline/importer/HttpImporterPipeline$DocumentPreProcessingStage.class */
    private static class DocumentPreProcessingStage extends AbstractImporterStage {
        private DocumentPreProcessingStage() {
        }

        @Override // com.norconex.collector.http.pipeline.importer.AbstractImporterStage
        public boolean executeStage(HttpImporterPipelineContext httpImporterPipelineContext) {
            if (httpImporterPipelineContext.m29getConfig().getPreImportProcessors() == null) {
                return true;
            }
            for (IHttpDocumentProcessor iHttpDocumentProcessor : httpImporterPipelineContext.m29getConfig().getPreImportProcessors()) {
                iHttpDocumentProcessor.processDocument(httpImporterPipelineContext.getHttpClient(), httpImporterPipelineContext.m27getDocument());
                httpImporterPipelineContext.m30getCrawler().fireCrawlerEvent("DOCUMENT_PREIMPORTED", httpImporterPipelineContext.m28getCrawlData(), iHttpDocumentProcessor);
            }
            return true;
        }
    }

    /* loaded from: input_file:com/norconex/collector/http/pipeline/importer/HttpImporterPipeline$MetadataCanonicalGETStage.class */
    private static class MetadataCanonicalGETStage extends AbstractImporterStage {
        private MetadataCanonicalGETStage() {
        }

        @Override // com.norconex.collector.http.pipeline.importer.AbstractImporterStage
        public boolean executeStage(HttpImporterPipelineContext httpImporterPipelineContext) {
            return HttpImporterPipelineUtil.resolveCanonical(httpImporterPipelineContext, true);
        }
    }

    /* loaded from: input_file:com/norconex/collector/http/pipeline/importer/HttpImporterPipeline$MetadataCanonicalHEADStage.class */
    private static class MetadataCanonicalHEADStage extends AbstractImporterStage {
        private MetadataCanonicalHEADStage() {
        }

        @Override // com.norconex.collector.http.pipeline.importer.AbstractImporterStage
        public boolean executeStage(HttpImporterPipelineContext httpImporterPipelineContext) {
            if (httpImporterPipelineContext.isHttpHeadSuccessful()) {
                return HttpImporterPipelineUtil.resolveCanonical(httpImporterPipelineContext, true);
            }
            return true;
        }
    }

    /* loaded from: input_file:com/norconex/collector/http/pipeline/importer/HttpImporterPipeline$MetadataFiltersGETStage.class */
    private static class MetadataFiltersGETStage extends AbstractImporterStage {
        private MetadataFiltersGETStage() {
        }

        @Override // com.norconex.collector.http.pipeline.importer.AbstractImporterStage
        public boolean executeStage(HttpImporterPipelineContext httpImporterPipelineContext) {
            if (httpImporterPipelineContext.isHttpHeadSuccessful() || !ImporterPipelineUtil.isHeadersRejected(httpImporterPipelineContext)) {
                return true;
            }
            httpImporterPipelineContext.m28getCrawlData().setState(CrawlState.REJECTED);
            return false;
        }
    }

    /* loaded from: input_file:com/norconex/collector/http/pipeline/importer/HttpImporterPipeline$MetadataFiltersHEADStage.class */
    private static class MetadataFiltersHEADStage extends AbstractImporterStage {
        private MetadataFiltersHEADStage() {
        }

        @Override // com.norconex.collector.http.pipeline.importer.AbstractImporterStage
        public boolean executeStage(HttpImporterPipelineContext httpImporterPipelineContext) {
            if (!httpImporterPipelineContext.isHttpHeadSuccessful() || !ImporterPipelineUtil.isHeadersRejected(httpImporterPipelineContext)) {
                return true;
            }
            httpImporterPipelineContext.m28getCrawlData().setState(CrawlState.REJECTED);
            return false;
        }
    }

    /* loaded from: input_file:com/norconex/collector/http/pipeline/importer/HttpImporterPipeline$RobotsMetaCreateStage.class */
    private static class RobotsMetaCreateStage extends AbstractImporterStage {
        private RobotsMetaCreateStage() {
        }

        @Override // com.norconex.collector.http.pipeline.importer.AbstractImporterStage
        public boolean executeStage(HttpImporterPipelineContext httpImporterPipelineContext) {
            if (httpImporterPipelineContext.m29getConfig().isIgnoreRobotsMeta()) {
                return true;
            }
            try {
                Reader contentReader = httpImporterPipelineContext.getContentReader();
                httpImporterPipelineContext.setRobotsMeta(httpImporterPipelineContext.m29getConfig().getRobotsMetaProvider().getRobotsMeta(contentReader, httpImporterPipelineContext.m28getCrawlData().getReference(), httpImporterPipelineContext.m27getDocument().getContentType(), httpImporterPipelineContext.getMetadata()));
                contentReader.close();
                httpImporterPipelineContext.m30getCrawler().fireCrawlerEvent(HttpCrawlerEvent.CREATED_ROBOTS_META, httpImporterPipelineContext.m28getCrawlData(), httpImporterPipelineContext.getRobotsMeta());
                return true;
            } catch (IOException e) {
                throw new CollectorException("Cannot create RobotsMeta for : " + httpImporterPipelineContext.m28getCrawlData().getReference(), e);
            }
        }
    }

    /* loaded from: input_file:com/norconex/collector/http/pipeline/importer/HttpImporterPipeline$RobotsMetaNoIndexStage.class */
    private static class RobotsMetaNoIndexStage extends AbstractImporterStage {
        private RobotsMetaNoIndexStage() {
        }

        @Override // com.norconex.collector.http.pipeline.importer.AbstractImporterStage
        public boolean executeStage(HttpImporterPipelineContext httpImporterPipelineContext) {
            boolean z = httpImporterPipelineContext.m29getConfig().isIgnoreRobotsMeta() || httpImporterPipelineContext.getRobotsMeta() == null || !httpImporterPipelineContext.getRobotsMeta().isNoindex();
            if (z) {
                return z;
            }
            httpImporterPipelineContext.m30getCrawler().fireCrawlerEvent(HttpCrawlerEvent.REJECTED_ROBOTS_META_NOINDEX, httpImporterPipelineContext.m28getCrawlData(), httpImporterPipelineContext.getRobotsMeta());
            httpImporterPipelineContext.m28getCrawlData().setState(CrawlState.REJECTED);
            return false;
        }
    }

    public HttpImporterPipeline(boolean z, boolean z2, boolean z3) {
        if (!z2) {
            addStage(new RecrawlableResolverStage());
        }
        addStage(new DelayResolverStage());
        addStage(new MetadataFetcherStage());
        addStage(new MetadataFiltersHEADStage());
        addStage(new MetadataCanonicalHEADStage());
        addStage(new MetadataChecksumStage(true));
        addStage(new DocumentFetcherStage());
        if (z) {
            addStage(new SaveDocumentStage());
        }
        addStage(new MetadataCanonicalGETStage());
        addStage(new DocumentCanonicalStage());
        addStage(new RobotsMetaCreateStage());
        addStage(new LinkExtractorStage(z3));
        addStage(new RobotsMetaNoIndexStage());
        addStage(new MetadataFiltersGETStage());
        addStage(new MetadataChecksumStage(false));
        addStage(new DocumentFiltersStage());
        addStage(new DocumentPreProcessingStage());
        addStage(new ImportModuleStage());
    }
}
