package com.norconex.collector.http.url.impl;

import com.norconex.collector.core.CollectorException;
import com.norconex.collector.http.url.ILinkExtractor;
import com.norconex.collector.http.url.Link;
import com.norconex.commons.lang.config.IXMLConfigurable;
import com.norconex.commons.lang.config.XMLConfigurationUtil;
import com.norconex.commons.lang.file.ContentType;
import com.norconex.commons.lang.xml.EnhancedXMLStreamWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.Writer;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Pattern;
import javax.xml.stream.XMLStreamException;
import org.apache.commons.configuration.XMLConfiguration;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.commons.lang3.builder.ToStringStyle;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

/* loaded from: input_file:com/norconex/collector/http/url/impl/XMLFeedLinkExtractor.class */
public class XMLFeedLinkExtractor implements ILinkExtractor, IXMLConfigurable {
    public static final String DEFAULT_CONTENT_TYPE_PATTERN = "application/(rss\\+|rdf\\+|atom\\+){0,1}xml|text/xml";
    private String applyToContentTypePattern = DEFAULT_CONTENT_TYPE_PATTERN;
    private String applyToReferencePattern;

    /* loaded from: input_file:com/norconex/collector/http/url/impl/XMLFeedLinkExtractor$FeedHandler.class */
    private class FeedHandler extends DefaultHandler {
        private final String referer;
        private final Set<Link> links;
        private boolean isInLink = false;
        private String stringLink = "";

        public FeedHandler(String str, Set<Link> set) {
            this.referer = str;
            this.links = set;
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            if ("link".equalsIgnoreCase(str2)) {
                this.isInLink = true;
                String value = attributes.getValue("href");
                if (StringUtils.isNotBlank(value)) {
                    Link link = new Link(value);
                    link.setReferrer(this.referer);
                    this.links.add(link);
                }
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) throws SAXException {
            if (!this.isInLink || i2 <= 0) {
                return;
            }
            this.stringLink += new String(cArr, i, i2);
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            if ("link".equals(str2)) {
                if (this.stringLink.length() > 0) {
                    Link link = new Link(this.stringLink);
                    link.setReferrer(this.referer);
                    this.links.add(link);
                    this.stringLink = "";
                }
                this.isInLink = false;
            }
        }
    }

    @Override // com.norconex.collector.http.url.ILinkExtractor
    public Set<Link> extractLinks(InputStream inputStream, String str, ContentType contentType) throws IOException {
        HashSet hashSet = new HashSet();
        try {
            XMLReader createXMLReader = XMLReaderFactory.createXMLReader();
            FeedHandler feedHandler = new FeedHandler(str, hashSet);
            createXMLReader.setContentHandler(feedHandler);
            createXMLReader.setErrorHandler(feedHandler);
            createXMLReader.parse(new InputSource(inputStream));
            return hashSet;
        } catch (SAXException e) {
            throw new CollectorException("Could not parse XML Feed: " + str, e);
        }
    }

    @Override // com.norconex.collector.http.url.ILinkExtractor
    public boolean accepts(String str, ContentType contentType) {
        if (!StringUtils.isNotBlank(this.applyToReferencePattern) || Pattern.matches(this.applyToReferencePattern, str)) {
            return !StringUtils.isNotBlank(this.applyToContentTypePattern) || Pattern.matches(this.applyToContentTypePattern, contentType.toString());
        }
        return false;
    }

    public String getApplyToContentTypePattern() {
        return this.applyToContentTypePattern;
    }

    public void setApplyToContentTypePattern(String str) {
        this.applyToContentTypePattern = str;
    }

    public String getApplyToReferencePattern() {
        return this.applyToReferencePattern;
    }

    public void setApplyToReferencePattern(String str) {
        this.applyToReferencePattern = str;
    }

    public void loadFromXML(Reader reader) {
        XMLConfiguration newXMLConfiguration = XMLConfigurationUtil.newXMLConfiguration(reader);
        setApplyToContentTypePattern(newXMLConfiguration.getString("applyToContentTypePattern", getApplyToContentTypePattern()));
        setApplyToReferencePattern(newXMLConfiguration.getString("applyToReferencePattern", getApplyToReferencePattern()));
    }

    public void saveToXML(Writer writer) throws IOException {
        try {
            EnhancedXMLStreamWriter enhancedXMLStreamWriter = new EnhancedXMLStreamWriter(writer);
            enhancedXMLStreamWriter.writeStartElement("extractor");
            enhancedXMLStreamWriter.writeAttribute("class", getClass().getCanonicalName());
            enhancedXMLStreamWriter.writeElementString("applyToContentTypePattern", getApplyToContentTypePattern());
            enhancedXMLStreamWriter.writeElementString("applyToReferencePattern", getApplyToReferencePattern());
            enhancedXMLStreamWriter.writeEndElement();
            enhancedXMLStreamWriter.flush();
            enhancedXMLStreamWriter.close();
        } catch (XMLStreamException e) {
            throw new IOException("Cannot save as XML.", e);
        }
    }

    public String toString() {
        return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE).append("applyToContentTypePattern", this.applyToContentTypePattern).append("applyToReferencePattern", this.applyToReferencePattern).toString();
    }

    public boolean equals(Object obj) {
        if (!(obj instanceof XMLFeedLinkExtractor)) {
            return false;
        }
        XMLFeedLinkExtractor xMLFeedLinkExtractor = (XMLFeedLinkExtractor) obj;
        return new EqualsBuilder().append(this.applyToContentTypePattern, xMLFeedLinkExtractor.applyToContentTypePattern).append(this.applyToReferencePattern, xMLFeedLinkExtractor.applyToReferencePattern).isEquals();
    }

    public int hashCode() {
        return new HashCodeBuilder().append(this.applyToContentTypePattern).append(this.applyToReferencePattern).toHashCode();
    }
}
