package com.norconex.collector.http.url.impl;

import com.norconex.collector.http.url.Link;
import com.norconex.commons.lang.config.XMLConfigurationUtil;
import com.norconex.commons.lang.file.ContentType;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Iterator;
import java.util.Set;
import org.apache.commons.io.IOUtils;
import org.junit.Assert;
import org.junit.Test;

/* loaded from: input_file:com/norconex/collector/http/url/impl/RegexLinkExtractorTest.class */
public class RegexLinkExtractorTest {
    @Test
    public void testLinkExtraction() throws IOException {
        String str = "http://www.example.com/test/";
        String str2 = str + "RegexLinkExtractorTest.html";
        RegexLinkExtractor regexLinkExtractor = new RegexLinkExtractor();
        regexLinkExtractor.addPattern("\\[\\s*(.*?)\\s*\\]", "$1");
        regexLinkExtractor.addPattern("<link>\\s*(.*?)\\s*</link>", "$1");
        regexLinkExtractor.addPattern("<a href=\"javascript:;\"[^>]*?id=\"p_(\\d+)\">", "/page?id=$1");
        String[] strArr = {"http://www.example.com/page1.html", "http://www.example.com/page2.html", "http://www.example.com/page3.html", "http://www.example.com/page4.html", str + "page5.html", "http://www.example.com/page?id=12345", "http://www.example.com/page?id=67890"};
        InputStream resourceAsStream = getClass().getResourceAsStream("RegexLinkExtractorTest.txt");
        Set<Link> extractLinks = regexLinkExtractor.extractLinks(resourceAsStream, str2, ContentType.TEXT);
        IOUtils.closeQuietly(resourceAsStream);
        for (String str3 : strArr) {
            Assert.assertTrue("Could not find expected URL: " + str3, contains(extractLinks, str3));
        }
        Assert.assertEquals("Invalid number of links extracted.", strArr.length, extractLinks.size());
    }

    @Test
    public void testJSLinkFromXML() throws IOException {
        String[] strArr;
        InputStream resourceAsStream;
        Throwable th;
        String str = ("http://www.example.com/test/") + "RegexLinkExtractorTest.html";
        RegexLinkExtractor regexLinkExtractor = new RegexLinkExtractor();
        InputStreamReader inputStreamReader = new InputStreamReader(getClass().getResourceAsStream(getClass().getSimpleName() + ".cfg.xml"));
        Throwable th2 = null;
        try {
            try {
                regexLinkExtractor.loadFromXML(inputStreamReader);
                if (inputStreamReader != null) {
                    if (0 != 0) {
                        try {
                            inputStreamReader.close();
                        } catch (Throwable th3) {
                            th2.addSuppressed(th3);
                        }
                    } else {
                        inputStreamReader.close();
                    }
                }
                strArr = new String[]{"http://www.example.com/page?id=12345", "http://www.example.com/page?id=67890"};
                resourceAsStream = getClass().getResourceAsStream("RegexLinkExtractorTest.txt");
                th = null;
            } finally {
            }
            try {
                try {
                    Set<Link> extractLinks = regexLinkExtractor.extractLinks(resourceAsStream, str, ContentType.TEXT);
                    if (resourceAsStream != null) {
                        if (0 != 0) {
                            try {
                                resourceAsStream.close();
                            } catch (Throwable th4) {
                                th.addSuppressed(th4);
                            }
                        } else {
                            resourceAsStream.close();
                        }
                    }
                    for (String str2 : strArr) {
                        Assert.assertTrue("Could not find expected URL: " + str2, contains(extractLinks, str2));
                    }
                    Assert.assertEquals("Invalid number of links extracted.", strArr.length, extractLinks.size());
                } finally {
                }
            } catch (Throwable th5) {
                if (resourceAsStream != null) {
                    if (th != null) {
                        try {
                            resourceAsStream.close();
                        } catch (Throwable th6) {
                            th.addSuppressed(th6);
                        }
                    } else {
                        resourceAsStream.close();
                    }
                }
                throw th5;
            }
        } catch (Throwable th7) {
            if (inputStreamReader != null) {
                if (th2 != null) {
                    try {
                        inputStreamReader.close();
                    } catch (Throwable th8) {
                        th2.addSuppressed(th8);
                    }
                } else {
                    inputStreamReader.close();
                }
            }
            throw th7;
        }
    }

    @Test
    public void testGenericWriteRead() throws IOException {
        RegexLinkExtractor regexLinkExtractor = new RegexLinkExtractor();
        regexLinkExtractor.addPattern("\\[(.*?)\\]", "$1");
        regexLinkExtractor.addPattern("<link>.*?</link>", "$1");
        regexLinkExtractor.setApplyToContentTypePattern("ct");
        regexLinkExtractor.setApplyToReferencePattern("ref");
        regexLinkExtractor.setCharset("charset");
        regexLinkExtractor.setMaxURLLength(12345);
        System.out.println("Writing/Reading this: " + regexLinkExtractor);
        XMLConfigurationUtil.assertWriteRead(regexLinkExtractor);
    }

    private boolean contains(Set<Link> set, String str) {
        Iterator<Link> it = set.iterator();
        while (it.hasNext()) {
            if (str.equals(it.next().getUrl())) {
                return true;
            }
        }
        return false;
    }
}
