package com.norconex.collector.http.pipeline.importer;

import com.norconex.collector.core.data.CrawlState;
import com.norconex.collector.http.data.HttpCrawlData;
import com.norconex.collector.http.data.HttpCrawlState;
import com.norconex.collector.http.fetch.HttpFetchResponse;
import com.norconex.collector.http.redirect.RedirectStrategyWrapper;
import com.norconex.importer.doc.ContentTypeDetector;
import com.norconex.importer.util.CharsetUtil;
import java.io.IOException;
import java.nio.charset.UnsupportedCharsetException;
import java.util.Date;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

/* loaded from: input_file:com/norconex/collector/http/pipeline/importer/DocumentFetcherStage.class */
class DocumentFetcherStage extends AbstractImporterStage {
    private static final Logger LOG = LogManager.getLogger(DocumentFetcherStage.class);

    @Override // com.norconex.collector.http.pipeline.importer.AbstractImporterStage
    public boolean executeStage(HttpImporterPipelineContext httpImporterPipelineContext) {
        HttpCrawlData m28getCrawlData = httpImporterPipelineContext.m28getCrawlData();
        HttpFetchResponse fetchDocument = httpImporterPipelineContext.m29getConfig().getDocumentFetcher().fetchDocument(httpImporterPipelineContext.getHttpClient(), httpImporterPipelineContext.m27getDocument());
        m28getCrawlData.setCrawlDate(new Date());
        try {
            HttpImporterPipelineUtil.enhanceHTTPHeaders(httpImporterPipelineContext.m27getDocument().m12getMetadata());
        } catch (UnsupportedCharsetException e) {
            LOG.warn("Unsupported character encoding \"" + e.getCharsetName() + "\" defined in \"Content-Type\" HTTP response header. Detection will be attempted instead for \"" + httpImporterPipelineContext.m27getDocument().getReference() + "\".");
            try {
                String contentType = new ContentTypeDetector().detect(httpImporterPipelineContext.getContent()).toString();
                String detectCharset = CharsetUtil.detectCharset(httpImporterPipelineContext.getContent(), (String) null);
                httpImporterPipelineContext.getMetadata().setString("collector.content-type", new String[]{contentType});
                httpImporterPipelineContext.getMetadata().setString("collector.content-encoding", new String[]{detectCharset});
                LOG.info("Detected '" + contentType + "' and '" + detectCharset + "' as the content type and character encoding for \"" + httpImporterPipelineContext.m27getDocument().getReference() + "\".");
            } catch (IOException e2) {
                LOG.warn("Could not detect content type and character encoding from content for \"" + httpImporterPipelineContext.m27getDocument().getReference() + "\".");
            }
        }
        HttpImporterPipelineUtil.applyMetadataToDocument(httpImporterPipelineContext.m27getDocument());
        m28getCrawlData.setContentType(httpImporterPipelineContext.m27getDocument().getContentType());
        String redirectURL = RedirectStrategyWrapper.getRedirectURL();
        if (StringUtils.isNotBlank(redirectURL)) {
            HttpImporterPipelineUtil.queueRedirectURL(httpImporterPipelineContext, fetchDocument, redirectURL);
            return false;
        }
        CrawlState crawlState = fetchDocument.getCrawlState();
        m28getCrawlData.setState(crawlState);
        if (crawlState.isGoodState()) {
            httpImporterPipelineContext.fireCrawlerEvent("DOCUMENT_FETCHED", m28getCrawlData, fetchDocument);
            return true;
        }
        httpImporterPipelineContext.fireCrawlerEvent(crawlState.isOneOf(new CrawlState[]{HttpCrawlState.NOT_FOUND}) ? "REJECTED_NOTFOUND" : "REJECTED_BAD_STATUS", m28getCrawlData, fetchDocument);
        return false;
    }
}
