package com.norconex.collector.http.robot.impl;

import com.norconex.collector.http.robot.IRobotsMetaProvider;
import com.norconex.collector.http.robot.RobotsMeta;
import com.norconex.commons.lang.config.IXMLConfigurable;
import com.norconex.commons.lang.config.XMLConfigurationUtil;
import com.norconex.commons.lang.file.ContentType;
import com.norconex.commons.lang.io.TextReader;
import com.norconex.commons.lang.map.Properties;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamWriter;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

/* loaded from: input_file:com/norconex/collector/http/robot/impl/StandardRobotsMetaProvider.class */
public class StandardRobotsMetaProvider implements IRobotsMetaProvider, IXMLConfigurable {
    private static final Logger LOG = LogManager.getLogger(StandardRobotsMetaProvider.class);
    private static final Pattern META_ROBOTS_PATTERN = Pattern.compile("<\\s*META[^>]*?NAME\\s*=\\s*[\"']{0,1}\\s*robots\\s*[\"']{0,1}\\s*[^>]*?>", 34);
    private static final Pattern META_CONTENT_PATTERN = Pattern.compile("\\s*CONTENT\\s*=\\s*[\"']{0,1}([\\s\\w,]+)[\"']{0,1}\\s*[^>]*?>", 34);
    private static final Pattern HEAD_PATTERN = Pattern.compile("<\\s*/\\s*HEAD\\s*>", 34);
    private static final Pattern COMMENT_PATTERN = Pattern.compile("<!--.*?-->", 34);
    private String headersPrefix;

    @Override // com.norconex.collector.http.robot.IRobotsMetaProvider
    public RobotsMeta getRobotsMeta(Reader reader, String str, ContentType contentType, Properties properties) throws IOException {
        RobotsMeta robotsMeta = null;
        if (isMetaSupportingContentType(contentType)) {
            TextReader textReader = new TextReader(reader);
            while (true) {
                String readText = textReader.readText();
                if (readText == null) {
                    break;
                }
                String replaceAll = COMMENT_PATTERN.matcher(readText).replaceAll("");
                String findInContent = findInContent(replaceAll);
                if (findInContent != null) {
                    robotsMeta = buildMeta(findInContent);
                    if (LOG.isDebugEnabled() && robotsMeta != null) {
                        LOG.debug("Meta robots \"" + findInContent + "\" found in HTML meta tag for: " + str);
                    }
                } else if (isEndOfHead(replaceAll)) {
                    break;
                }
            }
            textReader.close();
        }
        if (robotsMeta == null) {
            robotsMeta = findInHeaders(properties, str);
        }
        if (LOG.isDebugEnabled() && robotsMeta == null) {
            LOG.debug("No meta robots found for: " + str);
        }
        return robotsMeta;
    }

    public String getHeadersPrefix() {
        return this.headersPrefix;
    }

    public void setHeadersPrefix(String str) {
        this.headersPrefix = str;
    }

    private boolean isMetaSupportingContentType(ContentType contentType) {
        return contentType != null && contentType.equals(ContentType.HTML);
    }

    private RobotsMeta findInHeaders(Properties properties, String str) {
        String str2;
        str2 = "X-Robots-Tag";
        String string = properties.getString(StringUtils.isNotBlank(this.headersPrefix) ? this.headersPrefix + str2 : "X-Robots-Tag");
        RobotsMeta buildMeta = buildMeta(string);
        if (LOG.isDebugEnabled() && buildMeta != null) {
            LOG.debug("Meta robots \"" + string + "\" found in HTTP header for: " + str);
        }
        return buildMeta;
    }

    private RobotsMeta buildMeta(String str) {
        if (StringUtils.isBlank(str)) {
            return null;
        }
        boolean z = false;
        boolean z2 = false;
        for (String str2 : StringUtils.split(str, ',')) {
            if (str2.trim().equalsIgnoreCase("noindex")) {
                z = true;
            }
            if (str2.trim().equalsIgnoreCase("nofollow")) {
                z2 = true;
            }
        }
        return new RobotsMeta(z2, z);
    }

    private String findInContent(String str) {
        Matcher matcher = META_ROBOTS_PATTERN.matcher(str);
        while (matcher.find()) {
            Matcher matcher2 = META_CONTENT_PATTERN.matcher(matcher.group());
            if (matcher2.find()) {
                String group = matcher2.group(1);
                if (StringUtils.isNotBlank(group)) {
                    return group;
                }
            }
        }
        return null;
    }

    private boolean isEndOfHead(String str) {
        return HEAD_PATTERN.matcher(str).matches();
    }

    public void loadFromXML(Reader reader) throws IOException {
        setHeadersPrefix(XMLConfigurationUtil.newXMLConfiguration(reader).getString("headersPrefix", (String) null));
    }

    public void saveToXML(Writer writer) throws IOException {
        try {
            XMLStreamWriter createXMLStreamWriter = XMLOutputFactory.newInstance().createXMLStreamWriter(writer);
            createXMLStreamWriter.writeStartElement("robotsMeta");
            createXMLStreamWriter.writeAttribute("class", getClass().getCanonicalName());
            if (this.headersPrefix != null) {
                createXMLStreamWriter.writeStartElement("headersPrefix");
                createXMLStreamWriter.writeCharacters(this.headersPrefix);
                createXMLStreamWriter.writeEndElement();
            }
            createXMLStreamWriter.writeEndElement();
            createXMLStreamWriter.flush();
            createXMLStreamWriter.close();
        } catch (XMLStreamException e) {
            throw new IOException("Cannot save as XML.", e);
        }
    }

    public boolean equals(Object obj) {
        if (obj instanceof StandardRobotsMetaProvider) {
            return new EqualsBuilder().append(this.headersPrefix, ((StandardRobotsMetaProvider) obj).headersPrefix).isEquals();
        }
        return false;
    }

    public int hashCode() {
        return new HashCodeBuilder().append(this.headersPrefix).toHashCode();
    }

    public String toString() {
        return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE).append("headersPrefix", this.headersPrefix).toString();
    }
}
