package com.norconex.collector.http.recrawl.impl;

import com.norconex.collector.http.recrawl.IRecrawlableResolver;
import com.norconex.collector.http.recrawl.PreviousCrawlData;
import com.norconex.collector.http.sitemap.SitemapChangeFrequency;
import com.norconex.commons.lang.config.IXMLConfigurable;
import com.norconex.commons.lang.config.XMLConfigurationUtil;
import com.norconex.commons.lang.time.DurationParser;
import com.norconex.commons.lang.time.DurationUtil;
import com.norconex.commons.lang.xml.EnhancedXMLStreamWriter;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.regex.Pattern;
import javax.xml.stream.XMLStreamException;
import org.apache.commons.configuration.HierarchicalConfiguration;
import org.apache.commons.configuration.XMLConfiguration;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.joda.time.DateTime;
import org.joda.time.Duration;

/* loaded from: input_file:com/norconex/collector/http/recrawl/impl/GenericRecrawlableResolver.class */
public class GenericRecrawlableResolver implements IRecrawlableResolver, IXMLConfigurable {
    private static final Logger LOG = LogManager.getLogger(GenericRecrawlableResolver.class);
    private SitemapSupport sitemapSupport = SitemapSupport.FIRST;
    private final List<MinFrequency> minFrequencies = new ArrayList();

    /* loaded from: input_file:com/norconex/collector/http/recrawl/impl/GenericRecrawlableResolver$MinFrequency.class */
    public static class MinFrequency {
        private String applyTo;
        private String value;
        private String pattern;
        private Pattern cachedPattern;
        private boolean caseSensitive;

        public MinFrequency() {
        }

        public MinFrequency(String str, String str2, String str3) {
            this.applyTo = str;
            this.value = str2;
            setPattern(str3);
        }

        public String getApplyTo() {
            return this.applyTo;
        }

        public void setApplyTo(String str) {
            this.applyTo = str;
        }

        public String getValue() {
            return this.value;
        }

        public void setValue(String str) {
            this.value = str;
        }

        public String getPattern() {
            return this.pattern;
        }

        public void setPattern(String str) {
            this.pattern = str;
            this.cachedPattern = null;
        }

        public boolean isCaseSensitive() {
            return this.caseSensitive;
        }

        public void setCaseSensitive(boolean z) {
            this.caseSensitive = z;
            this.cachedPattern = null;
        }

        /* JADX INFO: Access modifiers changed from: private */
        public synchronized Pattern getCachedPattern() {
            Pattern compile;
            if (this.cachedPattern != null) {
                return this.cachedPattern;
            }
            if (this.pattern == null) {
                compile = null;
            } else {
                int i = 32;
                if (!this.caseSensitive) {
                    i = 32 | 2 | 64;
                }
                compile = Pattern.compile(this.pattern, i);
            }
            this.cachedPattern = compile;
            return compile;
        }

        public String toString() {
            return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE).append("applyTo", this.applyTo).append("value", this.value).append("pattern", this.pattern).append("caseSensitive", this.caseSensitive).toString();
        }

        public boolean equals(Object obj) {
            if (!(obj instanceof MinFrequency)) {
                return false;
            }
            MinFrequency minFrequency = (MinFrequency) obj;
            return new EqualsBuilder().append(this.applyTo, minFrequency.applyTo).append(this.value, minFrequency.value).append(this.pattern, minFrequency.pattern).append(this.caseSensitive, minFrequency.caseSensitive).isEquals();
        }

        public int hashCode() {
            return new HashCodeBuilder().append(this.applyTo).append(this.value).append(this.pattern).append(this.caseSensitive).toHashCode();
        }
    }

    /* loaded from: input_file:com/norconex/collector/http/recrawl/impl/GenericRecrawlableResolver$SitemapSupport.class */
    public enum SitemapSupport {
        FIRST,
        LAST,
        NEVER;

        public static SitemapSupport getSitemapSupport(String str) {
            if (StringUtils.isBlank(str)) {
                return null;
            }
            for (SitemapSupport sitemapSupport : values()) {
                if (sitemapSupport.toString().equalsIgnoreCase(str)) {
                    return sitemapSupport;
                }
            }
            return null;
        }
    }

    public SitemapSupport getSitemapSupport() {
        return this.sitemapSupport;
    }

    public void setSitemapSupport(SitemapSupport sitemapSupport) {
        this.sitemapSupport = sitemapSupport;
    }

    public MinFrequency[] getMinFrequencies() {
        return (MinFrequency[]) this.minFrequencies.toArray(new MinFrequency[0]);
    }

    public void setMinFrequencies(MinFrequency... minFrequencyArr) {
        this.minFrequencies.clear();
        this.minFrequencies.addAll(Arrays.asList(minFrequencyArr));
    }

    @Override // com.norconex.collector.http.recrawl.IRecrawlableResolver
    public boolean isRecrawlable(PreviousCrawlData previousCrawlData) {
        if (previousCrawlData.getCrawlDate() == null) {
            return true;
        }
        SitemapSupport sitemapSupport = this.sitemapSupport;
        if (sitemapSupport == null) {
            sitemapSupport = SitemapSupport.FIRST;
        }
        boolean z = hasSitemapFrequency(previousCrawlData) || hasSitemapLastModified(previousCrawlData);
        if (sitemapSupport == SitemapSupport.FIRST && z) {
            return isRecrawlableFromSitemap(previousCrawlData);
        }
        MinFrequency matchingMinFrequency = getMatchingMinFrequency(previousCrawlData);
        if (matchingMinFrequency != null) {
            return isRecrawlableFromMinFrequencies(matchingMinFrequency, previousCrawlData);
        }
        if (sitemapSupport == SitemapSupport.LAST && z) {
            return isRecrawlableFromSitemap(previousCrawlData);
        }
        return true;
    }

    private MinFrequency getMatchingMinFrequency(PreviousCrawlData previousCrawlData) {
        for (MinFrequency minFrequency : this.minFrequencies) {
            if (minFrequency.pattern == null || minFrequency.value == null) {
                LOG.warn("Value or pattern missing in minimum frequency.");
            } else {
                String applyTo = minFrequency.getApplyTo();
                if (StringUtils.isBlank(applyTo)) {
                    applyTo = "reference";
                }
                if ("reference".equalsIgnoreCase(applyTo) && minFrequency.getCachedPattern().matcher(previousCrawlData.getReference()).matches()) {
                    return minFrequency;
                }
                if ("contentType".equalsIgnoreCase(applyTo) && minFrequency.getCachedPattern().matcher(previousCrawlData.getContentType().toString()).matches()) {
                    return minFrequency;
                }
            }
        }
        return null;
    }

    private boolean hasSitemapFrequency(PreviousCrawlData previousCrawlData) {
        return StringUtils.isNotBlank(previousCrawlData.getSitemapChangeFreq());
    }

    private boolean hasSitemapLastModified(PreviousCrawlData previousCrawlData) {
        return previousCrawlData.getSitemapLastMod() != null && previousCrawlData.getSitemapLastMod().longValue() > 0;
    }

    private boolean isRecrawlableFromMinFrequencies(MinFrequency minFrequency, PreviousCrawlData previousCrawlData) {
        String value = minFrequency.getValue();
        if (StringUtils.isBlank(value)) {
            return true;
        }
        SitemapChangeFrequency changeFrequency = SitemapChangeFrequency.getChangeFrequency(value);
        if (changeFrequency != null) {
            return isRecrawlableFromFrequency(changeFrequency, previousCrawlData, "custom");
        }
        long j = NumberUtils.isDigits(value) ? NumberUtils.toLong(value) : DurationParser.parse(value);
        DateTime dateTime = new DateTime(previousCrawlData.getCrawlDate());
        DateTime plus = dateTime.plus(j);
        DateTime now = DateTime.now();
        if (plus.isBefore(now)) {
            if (!LOG.isDebugEnabled()) {
                return true;
            }
            LOG.debug(String.format("Recrawlable according to custom directive (required elasped time '%s' < actual elasped time '%s' since '%s'): %s", formatDuration(j), formatDuration(dateTime, now), formatDate(dateTime), previousCrawlData.getReference()));
            return true;
        }
        if (!LOG.isDebugEnabled()) {
            return false;
        }
        LOG.debug(String.format("Not recrawlable according to custom directive (required elasped time '%s' >= actual elasped time '%s' since '%s'): %s", formatDuration(j), formatDuration(dateTime, now), formatDate(dateTime), previousCrawlData.getReference()));
        return false;
    }

    private boolean isRecrawlableFromSitemap(PreviousCrawlData previousCrawlData) {
        if (!hasSitemapLastModified(previousCrawlData)) {
            return isRecrawlableFromFrequency(SitemapChangeFrequency.getChangeFrequency(previousCrawlData.getSitemapChangeFreq()), previousCrawlData, "Sitemap");
        }
        DateTime dateTime = new DateTime(previousCrawlData.getSitemapLastMod());
        DateTime dateTime2 = new DateTime(previousCrawlData.getCrawlDate());
        LOG.debug("Sitemap last modified date is " + dateTime + " for: " + previousCrawlData.getReference());
        if (dateTime.isAfter(dateTime2)) {
            if (!LOG.isDebugEnabled()) {
                return true;
            }
            LOG.debug(String.format("Recrawlable according to sitemap directive (last modified '%s' > last crawled '%s'): %s", formatDate(dateTime), formatDate(dateTime2), previousCrawlData.getReference()));
            return true;
        }
        if (!LOG.isDebugEnabled()) {
            return false;
        }
        LOG.debug(String.format("Not recrawlable according to sitemap directive (last modified '%s' <= last crawled '%s'): %s", formatDate(dateTime), formatDate(dateTime2), previousCrawlData.getReference()));
        return false;
    }

    private boolean isRecrawlableFromFrequency(SitemapChangeFrequency sitemapChangeFrequency, PreviousCrawlData previousCrawlData, String str) {
        if (sitemapChangeFrequency == null) {
            return true;
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("The " + str + " change frequency is " + sitemapChangeFrequency + " for: " + previousCrawlData.getReference());
        }
        if (sitemapChangeFrequency == SitemapChangeFrequency.ALWAYS) {
            return true;
        }
        if (sitemapChangeFrequency == SitemapChangeFrequency.NEVER) {
            return false;
        }
        DateTime dateTime = new DateTime(previousCrawlData.getCrawlDate());
        DateTime dateTime2 = new DateTime(previousCrawlData.getCrawlDate());
        switch (sitemapChangeFrequency) {
            case HOURLY:
                dateTime2 = dateTime2.plusHours(1);
                break;
            case DAILY:
                dateTime2 = dateTime2.plusDays(1);
                break;
            case WEEKLY:
                dateTime2 = dateTime2.plusWeeks(1);
                break;
            case MONTHLY:
                dateTime2 = dateTime2.plusMonths(1);
                break;
            case YEARLY:
                dateTime2 = dateTime2.plusYears(1);
                break;
        }
        DateTime now = DateTime.now();
        if (dateTime2.isBefore(now)) {
            if (!LOG.isDebugEnabled()) {
                return true;
            }
            LOG.debug(String.format("Recrawlable according to %s directive (required elasped time '%s' < actual elasped time '%s' since '%s'): %s", str, formatDuration(dateTime, dateTime2), formatDuration(dateTime, now), formatDate(dateTime), previousCrawlData.getReference()));
            return true;
        }
        if (!LOG.isDebugEnabled()) {
            return false;
        }
        LOG.debug(String.format("Not recrawlable according to %s directive (required elasped time '%s' >= actual elasped time '%s' since '%s'): %s", str, formatDuration(dateTime, dateTime2), formatDuration(dateTime, now), formatDate(dateTime), previousCrawlData.getReference()));
        return false;
    }

    private String formatDate(DateTime dateTime) {
        return dateTime.toString("yyyy-MM-dd'T'HH:mm:ss");
    }

    private String formatDuration(DateTime dateTime, DateTime dateTime2) {
        return formatDuration(new Duration(dateTime, dateTime2));
    }

    private String formatDuration(Duration duration) {
        return formatDuration(duration.getMillis());
    }

    private String formatDuration(long j) {
        return DurationUtil.formatShort(Locale.ENGLISH, j);
    }

    public void loadFromXML(Reader reader) throws IOException {
        XMLConfiguration newXMLConfiguration = XMLConfigurationUtil.newXMLConfiguration(reader);
        String string = newXMLConfiguration.getString("[@sitemapSupport]");
        if (StringUtils.isNotBlank(string)) {
            SitemapSupport sitemapSupport = SitemapSupport.getSitemapSupport(string);
            if (sitemapSupport == null) {
                LOG.warn("Unsupported sitemap support value: \"" + string + "\". Will use default.");
            }
            setSitemapSupport(sitemapSupport);
        }
        List<HierarchicalConfiguration> configurationsAt = newXMLConfiguration.configurationsAt("minFrequency");
        ArrayList arrayList = new ArrayList();
        for (HierarchicalConfiguration hierarchicalConfiguration : configurationsAt) {
            MinFrequency minFrequency = new MinFrequency();
            minFrequency.setApplyTo(hierarchicalConfiguration.getString("[@applyTo]"));
            minFrequency.setCaseSensitive(hierarchicalConfiguration.getBoolean("[@caseSensitive]", false));
            minFrequency.setValue(hierarchicalConfiguration.getString("[@value]"));
            minFrequency.setPattern(hierarchicalConfiguration.getString(""));
            arrayList.add(minFrequency);
        }
        setMinFrequencies((MinFrequency[]) arrayList.toArray(new MinFrequency[0]));
    }

    public void saveToXML(Writer writer) throws IOException {
        try {
            EnhancedXMLStreamWriter enhancedXMLStreamWriter = new EnhancedXMLStreamWriter(writer);
            enhancedXMLStreamWriter.writeStartElement("recrawlableResolver");
            enhancedXMLStreamWriter.writeAttribute("class", getClass().getCanonicalName());
            if (getSitemapSupport() != null) {
                enhancedXMLStreamWriter.writeAttribute("sitemapSupport", getSitemapSupport().toString().toLowerCase());
            }
            for (MinFrequency minFrequency : this.minFrequencies) {
                enhancedXMLStreamWriter.writeStartElement("minFrequency");
                enhancedXMLStreamWriter.writeAttributeString("applyTo", minFrequency.getApplyTo());
                enhancedXMLStreamWriter.writeAttributeString("value", minFrequency.getValue());
                enhancedXMLStreamWriter.writeAttributeBoolean("caseSensitive", Boolean.valueOf(minFrequency.isCaseSensitive()));
                enhancedXMLStreamWriter.writeCharacters(minFrequency.getPattern());
                enhancedXMLStreamWriter.writeEndElement();
            }
            enhancedXMLStreamWriter.writeEndElement();
            enhancedXMLStreamWriter.flush();
            enhancedXMLStreamWriter.close();
        } catch (XMLStreamException e) {
            throw new IOException("Cannot save as XML.", e);
        }
    }

    public String toString() {
        return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE).append("sitemapSupport", this.sitemapSupport).append("minFrequencies", this.minFrequencies).toString();
    }

    public boolean equals(Object obj) {
        if (!(obj instanceof GenericRecrawlableResolver)) {
            return false;
        }
        GenericRecrawlableResolver genericRecrawlableResolver = (GenericRecrawlableResolver) obj;
        return new EqualsBuilder().append(this.sitemapSupport, genericRecrawlableResolver.sitemapSupport).append(this.minFrequencies, genericRecrawlableResolver.minFrequencies).isEquals();
    }

    public int hashCode() {
        return new HashCodeBuilder().append(this.sitemapSupport).append(this.minFrequencies).toHashCode();
    }
}
