package com.norconex.collector.http.fetch.impl;

import com.norconex.collector.core.CollectorException;
import com.norconex.collector.core.data.CrawlState;
import com.norconex.collector.http.data.HttpCrawlState;
import com.norconex.collector.http.doc.HttpDocument;
import com.norconex.collector.http.fetch.HttpFetchResponse;
import com.norconex.collector.http.fetch.IHttpDocumentFetcher;
import com.norconex.commons.lang.config.IXMLConfigurable;
import com.norconex.commons.lang.config.XMLConfigurationUtil;
import com.norconex.commons.lang.file.ContentType;
import com.norconex.commons.lang.url.HttpURL;
import com.norconex.commons.lang.xml.EnhancedXMLStreamWriter;
import com.norconex.importer.doc.ContentTypeDetector;
import com.norconex.importer.util.CharsetUtil;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.Writer;
import java.net.URI;
import java.nio.charset.StandardCharsets;
import javax.xml.stream.XMLStreamException;
import org.apache.commons.configuration.XMLConfiguration;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.output.NullOutputStream;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.http.Header;
import org.apache.http.HttpResponse;
import org.apache.http.client.AuthCache;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpRequestBase;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.impl.client.BasicAuthCache;
import org.apache.http.protocol.HttpContext;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

/* loaded from: input_file:com/norconex/collector/http/fetch/impl/GenericDocumentFetcher.class */
public class GenericDocumentFetcher implements IHttpDocumentFetcher, IXMLConfigurable {
    private static final Logger LOG = LogManager.getLogger(GenericDocumentFetcher.class);
    private int[] validStatusCodes;
    private int[] notFoundStatusCodes;
    private String headersPrefix;
    private boolean detectContentType;
    private boolean detectCharset;
    private final ContentTypeDetector contentTypeDetector;
    private final AuthCache authCache;
    private Object userToken;

    public GenericDocumentFetcher() {
        this(GenericMetadataFetcher.DEFAULT_VALID_STATUS_CODES);
    }

    public GenericDocumentFetcher(int[] iArr) {
        this.notFoundStatusCodes = GenericMetadataFetcher.DEFAULT_NOT_FOUND_STATUS_CODES;
        this.contentTypeDetector = new ContentTypeDetector();
        this.authCache = new BasicAuthCache();
        setValidStatusCodes(iArr);
    }

    public int[] getValidStatusCodes() {
        return ArrayUtils.clone(this.validStatusCodes);
    }

    public final void setValidStatusCodes(int... iArr) {
        this.validStatusCodes = ArrayUtils.clone(iArr);
    }

    public int[] getNotFoundStatusCodes() {
        return ArrayUtils.clone(this.notFoundStatusCodes);
    }

    public final void setNotFoundStatusCodes(int... iArr) {
        this.notFoundStatusCodes = ArrayUtils.clone(iArr);
    }

    public String getHeadersPrefix() {
        return this.headersPrefix;
    }

    public void setHeadersPrefix(String str) {
        this.headersPrefix = str;
    }

    public boolean isDetectContentType() {
        return this.detectContentType;
    }

    public void setDetectContentType(boolean z) {
        this.detectContentType = z;
    }

    public boolean isDetectCharset() {
        return this.detectCharset;
    }

    public void setDetectCharset(boolean z) {
        this.detectCharset = z;
    }

    @Override // com.norconex.collector.http.fetch.IHttpDocumentFetcher
    public HttpFetchResponse fetchDocument(HttpClient httpClient, HttpDocument httpDocument) {
        LOG.debug("Fetching document: " + httpDocument.getReference());
        HttpRequestBase httpRequestBase = null;
        try {
            try {
                HttpRequestBase createUriRequest = createUriRequest(httpDocument);
                HttpClientContext create = HttpClientContext.create();
                create.setAuthCache(this.authCache);
                if (this.userToken != null) {
                    create.setUserToken(this.userToken);
                }
                HttpResponse execute = httpClient.execute((HttpUriRequest) createUriRequest, (HttpContext) create);
                int statusCode = execute.getStatusLine().getStatusCode();
                String reasonPhrase = execute.getStatusLine().getReasonPhrase();
                InputStream content = execute.getEntity().getContent();
                if (ArrayUtils.contains(this.validStatusCodes, statusCode)) {
                    for (Header header : execute.getAllHeaders()) {
                        String name = header.getName();
                        if (StringUtils.isNotBlank(this.headersPrefix)) {
                            name = this.headersPrefix + name;
                        }
                        if (httpDocument.m12getMetadata().getString(name) == null) {
                            httpDocument.m12getMetadata().addString(name, new String[]{header.getValue()});
                        }
                    }
                    httpDocument.setContent(httpDocument.getContent().newInputStream(content));
                    IOUtils.copy(httpDocument.getContent(), new NullOutputStream());
                    this.userToken = create.getUserToken();
                    performDetection(httpDocument);
                    HttpFetchResponse httpFetchResponse = new HttpFetchResponse(HttpCrawlState.NEW, statusCode, reasonPhrase);
                    if (createUriRequest != null) {
                        createUriRequest.releaseConnection();
                    }
                    return httpFetchResponse;
                }
                if (LOG.isTraceEnabled()) {
                    LOG.trace("Rejected response content: " + IOUtils.toString(content, StandardCharsets.UTF_8));
                    IOUtils.closeQuietly(content);
                } else {
                    BufferedInputStream bufferedInputStream = new BufferedInputStream(content);
                    for (int read = bufferedInputStream.read(); read != -1; read = bufferedInputStream.read()) {
                    }
                    IOUtils.closeQuietly(bufferedInputStream);
                }
                if (ArrayUtils.contains(this.notFoundStatusCodes, statusCode)) {
                    HttpFetchResponse httpFetchResponse2 = new HttpFetchResponse(HttpCrawlState.NOT_FOUND, statusCode, reasonPhrase);
                    if (createUriRequest != null) {
                        createUriRequest.releaseConnection();
                    }
                    return httpFetchResponse2;
                }
                LOG.debug("Unsupported HTTP Response: " + execute.getStatusLine());
                HttpFetchResponse httpFetchResponse3 = new HttpFetchResponse(CrawlState.BAD_STATUS, statusCode, reasonPhrase);
                if (createUriRequest != null) {
                    createUriRequest.releaseConnection();
                }
                return httpFetchResponse3;
            } catch (Exception e) {
                if (LOG.isDebugEnabled()) {
                    LOG.info("Cannot fetch document: " + httpDocument.getReference() + " (" + e.getMessage() + ")", e);
                } else {
                    LOG.info("Cannot fetch document: " + httpDocument.getReference() + " (" + e.getMessage() + ")");
                }
                throw new CollectorException(e);
            }
        } catch (Throwable th) {
            if (0 != 0) {
                httpRequestBase.releaseConnection();
            }
            throw th;
        }
    }

    private void performDetection(HttpDocument httpDocument) throws IOException {
        ContentType detect;
        if (this.detectContentType && (detect = this.contentTypeDetector.detect(httpDocument.getContent(), httpDocument.getReference())) != null) {
            httpDocument.m12getMetadata().setString("collector.content-type", new String[]{detect.toString()});
        }
        if (this.detectCharset) {
            String detectCharset = CharsetUtil.detectCharset(httpDocument.getContent());
            if (StringUtils.isNotBlank(detectCharset)) {
                httpDocument.m12getMetadata().setString("collector.content-encoding", new String[]{detectCharset});
            }
        }
    }

    protected HttpRequestBase createUriRequest(HttpDocument httpDocument) {
        URI uri = HttpURL.toURI(httpDocument.getReference());
        if (LOG.isDebugEnabled()) {
            LOG.debug("Encoded URI: " + uri);
        }
        return new HttpGet(uri);
    }

    public void loadFromXML(Reader reader) {
        XMLConfiguration newXMLConfiguration = XMLConfigurationUtil.newXMLConfiguration(reader);
        String string = newXMLConfiguration.getString("validStatusCodes");
        int[] iArr = this.validStatusCodes;
        if (StringUtils.isNotBlank(string)) {
            String[] split = string.split(",");
            iArr = new int[split.length];
            for (int i = 0; i < split.length; i++) {
                iArr[i] = Integer.parseInt(split[i]);
            }
        }
        setValidStatusCodes(iArr);
        String string2 = newXMLConfiguration.getString("notFoundStatusCodes");
        int[] iArr2 = this.notFoundStatusCodes;
        if (StringUtils.isNotBlank(string2)) {
            String[] split2 = string2.split(",");
            iArr2 = new int[split2.length];
            for (int i2 = 0; i2 < split2.length; i2++) {
                iArr2[i2] = Integer.parseInt(split2[i2]);
            }
        }
        setNotFoundStatusCodes(iArr2);
        setHeadersPrefix(newXMLConfiguration.getString("headersPrefix"));
        setDetectContentType(newXMLConfiguration.getBoolean("[@detectContentType]", isDetectContentType()));
        setDetectCharset(newXMLConfiguration.getBoolean("[@detectCharset]", isDetectCharset()));
    }

    public void saveToXML(Writer writer) throws IOException {
        try {
            EnhancedXMLStreamWriter enhancedXMLStreamWriter = new EnhancedXMLStreamWriter(writer);
            enhancedXMLStreamWriter.writeStartElement("documentFetcher");
            enhancedXMLStreamWriter.writeAttribute("class", getClass().getCanonicalName());
            enhancedXMLStreamWriter.writeAttributeBoolean("detectContentType", Boolean.valueOf(isDetectContentType()));
            enhancedXMLStreamWriter.writeAttributeBoolean("detectCharset", Boolean.valueOf(isDetectCharset()));
            enhancedXMLStreamWriter.writeElementString("validStatusCodes", StringUtils.join(this.validStatusCodes, ','));
            enhancedXMLStreamWriter.writeElementString("notFoundStatusCodes", StringUtils.join(this.notFoundStatusCodes, ','));
            enhancedXMLStreamWriter.writeElementString("headersPrefix", this.headersPrefix);
            enhancedXMLStreamWriter.writeEndElement();
            enhancedXMLStreamWriter.flush();
        } catch (XMLStreamException e) {
            throw new IOException("Cannot save as XML.", e);
        }
    }

    public boolean equals(Object obj) {
        if (!(obj instanceof GenericDocumentFetcher)) {
            return false;
        }
        GenericDocumentFetcher genericDocumentFetcher = (GenericDocumentFetcher) obj;
        return new EqualsBuilder().append(this.validStatusCodes, genericDocumentFetcher.validStatusCodes).append(this.notFoundStatusCodes, genericDocumentFetcher.notFoundStatusCodes).append(this.headersPrefix, genericDocumentFetcher.headersPrefix).append(this.detectContentType, genericDocumentFetcher.detectContentType).append(this.detectCharset, genericDocumentFetcher.detectCharset).isEquals();
    }

    public int hashCode() {
        return new HashCodeBuilder().append(this.validStatusCodes).append(this.notFoundStatusCodes).append(this.headersPrefix).append(this.detectContentType).append(this.detectCharset).toHashCode();
    }

    public String toString() {
        return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE).append("validStatusCodes", this.validStatusCodes).append("notFoundStatusCodes", this.notFoundStatusCodes).append("headersPrefix", this.headersPrefix).append("detectContentType", this.detectContentType).append("detectCharset", this.detectCharset).toString();
    }
}
