package com.norconex.collector.http.robot.impl;

import com.norconex.collector.core.filter.IReferenceFilter;
import com.norconex.collector.core.filter.impl.RegexReferenceFilter;
import com.norconex.collector.http.robot.IRobotsTxtFilter;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.junit.Assert;
import org.junit.Test;

/* loaded from: input_file:com/norconex/collector/http/robot/impl/StandardRobotsTxtProviderTest.class */
public class StandardRobotsTxtProviderTest {
    @Test
    public void testGetRobotsTxt() throws IOException {
        assertStartsWith("Robots.txt -> Disallow: /bathroom/", parseRobotRule("mister-crawler", "User-agent: *\nDisallow: /dontgo/there/\nUser-agent: mister-crawler\nDisallow: /bathroom/\nUser-agent: miss-crawler\nDisallow: /tvremote/\n")[1]);
        assertStartsWith("Robots.txt -> Disallow: /bathroom/", parseRobotRule("mister-crawler", " User-agent : mister-crawler \n  Disallow : /bathroom/ \n   User-agent : * \n    Disallow : /dontgo/there/ \n")[0]);
        assertStartsWith("Robots.txt -> Disallow: /dontgo/there/", parseRobotRule("mister-crawler", "User-agent: miss-crawler\nDisallow: /tvremote/\nUser-agent: *\nDisallow: /dontgo/there/\n")[0]);
        assertStartsWith("Robots.txt -> Disallow: /bathroom/", parseRobotRule("mister-crawler", "User-agent: miss-crawler\nDisallow: /tvremote/\nUser-agent: *\nDisallow: /dontgo/there/\nUser-agent: mister-crawler\nDisallow: /bathroom/\n")[1]);
        assertStartsWith("Robots.txt -> Disallow: /some/fake/", parseRobotRule("mister-crawler", "# robots.txt\nUser-agent: *\nDisallow: /some/fake/ # Spiders, keep out! \nDisallow: /spidertrap/\nAllow: /open/\n Allow : / \n")[0]);
        assertStartsWith("Robots.txt -> Disallow: /spidertrap/", parseRobotRule("mister-crawler", "# robots.txt\nUser-agent: *\nDisallow: /some/fake/ # Spiders, keep out! \nDisallow: /spidertrap/\nAllow: /open/\n Allow : / \n")[1]);
        assertStartsWith("Robots.txt -> Allow: /open/", parseRobotRule("mister-crawler", "# robots.txt\nUser-agent: *\nDisallow: /some/fake/ # Spiders, keep out! \nDisallow: /spidertrap/\nAllow: /open/\n Allow : / \n")[2]);
        Assert.assertEquals(3L, parseRobotRule("mister-crawler", "# robots.txt\nUser-agent: *\nDisallow: /some/fake/ # Spiders, keep out! \nDisallow: /spidertrap/\nAllow: /open/\n Allow : / \n").length);
        Assert.assertTrue(ArrayUtils.isEmpty(parseRobotRule("mister-crawler", "User-agent: *\n\nDisallow: \n\n")));
        Assert.assertTrue(ArrayUtils.isEmpty(parseRobotRule("mister-crawler", "User-agent: *\n\nDisallow: # allow all\n\n")));
    }

    @Test
    public void testWildcardPattern() throws IOException {
        IReferenceFilter iReferenceFilter = parseRobotRule("mister-crawler", "User-agent: *\n\nDisallow: /testing/*/wildcards\n")[0];
        assertMatch("http://www.test.com/testing/some/random/path/wildcards", iReferenceFilter);
        assertMatch("http://www.test.com/testing/some/random/path/wildcards/test", iReferenceFilter);
        assertNoMatch("http://www.test.com/testing/wildcards", iReferenceFilter);
        assertNoMatch("http://www.test.com/wildcards", iReferenceFilter);
    }

    @Test
    public void testStringEndPattern() throws IOException {
        IReferenceFilter iReferenceFilter = parseRobotRule("mister-crawler", "User-agent: *\n\nDisallow: /testing/anchors$\n")[0];
        assertMatch("http://www.test.com/testing/anchors", iReferenceFilter);
        assertMatch("http://www.test.com/testing/anchors/", iReferenceFilter);
        assertNoMatch("http://www.test.com/testing/anchors/test", iReferenceFilter);
        assertNoMatch("http://www.test.com/randomly/testing/anchors", iReferenceFilter);
    }

    @Test
    public void testRegexEscape() throws IOException {
        IReferenceFilter iReferenceFilter = parseRobotRule("mister-crawler", "User-agent: *\n\nDisallow: /testing/reg.ex/escape?\n")[0];
        assertMatch("http://www.test.com/testing/reg.ex/escape?", iReferenceFilter);
        assertMatch("http://www.test.com/testing/reg.ex/escape?test", iReferenceFilter);
        assertNoMatch("http://www.test.com/testing/reggex/escape?", iReferenceFilter);
        assertNoMatch("http://www.test.com/testing/reggex/escape?test", iReferenceFilter);
        assertNoMatch("http://www.test.com/testing/reg*ex/escape?", iReferenceFilter);
        assertNoMatch("http://www.test.com/testing/reg*ex/escape?test", iReferenceFilter);
    }

    private void assertStartsWith(String str, IReferenceFilter iReferenceFilter) {
        Assert.assertEquals(str, StringUtils.substring(iReferenceFilter.toString(), 0, str.length()));
    }

    private void assertMatch(String str, IReferenceFilter iReferenceFilter, Boolean bool) {
        Assert.assertEquals(bool, Boolean.valueOf(str.matches(((RegexReferenceFilter) iReferenceFilter).getRegex())));
    }

    private void assertMatch(String str, IReferenceFilter iReferenceFilter) {
        assertMatch(str, iReferenceFilter, true);
    }

    private void assertNoMatch(String str, IReferenceFilter iReferenceFilter) {
        assertMatch(str, iReferenceFilter, false);
    }

    private IRobotsTxtFilter[] parseRobotRule(String str, String str2, String str3) throws IOException {
        return new StandardRobotsTxtProvider().parseRobotsTxt(IOUtils.toInputStream(str2, StandardCharsets.UTF_8), str3, "mister-crawler").getFilters();
    }

    private IRobotsTxtFilter[] parseRobotRule(String str, String str2) throws IOException {
        return parseRobotRule(str, str2, "http://www.test.com/some/fake/url.html");
    }
}
