package com.norconex.collector.http.url.impl;

import com.norconex.collector.http.doc.HttpMetadata;
import com.norconex.commons.lang.config.XMLConfigurationUtil;
import com.norconex.commons.lang.file.ContentType;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import org.junit.Assert;
import org.junit.Test;

/* loaded from: input_file:com/norconex/collector/http/url/impl/GenericCanonicalLinkDetectorTest.class */
public class GenericCanonicalLinkDetectorTest {
    @Test
    public void testMultipleLinkValueFromMetadata() throws IOException {
        GenericCanonicalLinkDetector genericCanonicalLinkDetector = new GenericCanonicalLinkDetector();
        HttpMetadata httpMetadata = new HttpMetadata("http://www.example.com/file.pdf");
        httpMetadata.setString("Link", new String[]{"<http://www.example.com/images/logo.png>; rel=\"image_src\",<http://www.example.com/cano,ni;cal.pdf>; rel=\"canonical\",<http://www.example.com/short/1234>; rel=\"shortlink\",<http://www.example.com/cano,ni;cal.pdf>; rel=\"hreflang_en\""});
        Assert.assertEquals("Invalid absolute canonical URL", "http://www.example.com/cano,ni;cal.pdf", genericCanonicalLinkDetector.detectFromMetadata("http://www.example.com/file.pdf", httpMetadata));
    }

    @Test
    public void testDetectFromMetadata() throws IOException {
        GenericCanonicalLinkDetector genericCanonicalLinkDetector = new GenericCanonicalLinkDetector();
        HttpMetadata httpMetadata = new HttpMetadata("http://www.example.com/file.pdf");
        httpMetadata.setString("Link", new String[]{"<http://www.example.com/canonical.pdf> rel=\"canonical\""});
        Assert.assertEquals("Invalid absolute canonical URL", "http://www.example.com/canonical.pdf", genericCanonicalLinkDetector.detectFromMetadata("http://www.example.com/file.pdf", httpMetadata));
        httpMetadata.setString("Link", new String[]{"</canonical.pdf> rel=\"canonical\""});
        Assert.assertEquals("Invalid relative canonical URL", "http://www.example.com/canonical.pdf", genericCanonicalLinkDetector.detectFromMetadata("http://www.example.com/file.pdf", httpMetadata));
    }

    @Test
    public void testDetectFromContent() throws IOException {
        GenericCanonicalLinkDetector genericCanonicalLinkDetector = new GenericCanonicalLinkDetector();
        Assert.assertEquals("Invalid <link> form <head>", "http://www.example.com/canonical.pdf", genericCanonicalLinkDetector.detectFromContent("http://www.example.com/file.pdf", new ByteArrayInputStream(("<html><head><title>Test</title>\n<meta property=\"canonical\"\n href=\"\nhttp://www.example.com/canonical.pdf\" />\n</head><body>Nothing of interest in body</body></html>").getBytes()), ContentType.HTML));
        Assert.assertEquals("Invalid <link> form <head>", "http://www.example.com/canonical.pdf", genericCanonicalLinkDetector.detectFromContent("http://www.example.com/file.pdf", new ByteArrayInputStream(("<html><head><title>Test</title>\n<link rel=\"canonical\"\n href=\"\nhttp://www.example.com/canonical.pdf\" />\n</head><body>Nothing of interest in body</body></html>").getBytes()), ContentType.HTML));
        Assert.assertNull("Canonical link should be null.", genericCanonicalLinkDetector.detectFromContent("http://www.example.com/file.pdf", new ByteArrayInputStream(("<html><head><title>Test</title>\n</head><body>\n<link rel=\"canonical\"\n href=\"\nhttp://www.example.com/canonical.pdf\" />\n</body></html>").getBytes()), ContentType.HTML));
    }

    @Test
    public void testEscapedCanonicalUrl() throws IOException {
        Assert.assertEquals("https://test.kaffe.se/web", new GenericCanonicalLinkDetector().detectFromContent("http://www.test.te.com/web", new ByteArrayInputStream(("<html><head><title>Test</title>\n<link rel=\"canonical\"\n href=\"\nhttps&#x3a;&#x2f;&#x2f;test&#x2e;kaffe&#x2e;se&#x2f;web\" />\n</head><body>Nothing of interest in body</body></html>").getBytes()), ContentType.HTML));
    }

    @Test
    public void testMixedQuoteCanonicalUrl() throws IOException {
        Assert.assertEquals("http://www.example.com/blah'blah.html", new GenericCanonicalLinkDetector().detectFromContent("http://www.test.te.com/web", new ByteArrayInputStream(("<html><head><title>Test</title>\n<link rel=\"canonical\"\n href=\"\nhttp://www.example.com/blah'blah.html\" />\n</head><body>Nothing of interest in body</body></html>").getBytes()), ContentType.HTML));
    }

    @Test
    public void testWriteRead() throws IOException {
        GenericCanonicalLinkDetector genericCanonicalLinkDetector = new GenericCanonicalLinkDetector();
        genericCanonicalLinkDetector.setContentTypes(new ContentType[]{ContentType.HTML, ContentType.TEXT});
        System.out.println("Writing/Reading this: " + genericCanonicalLinkDetector);
        XMLConfigurationUtil.assertWriteRead(genericCanonicalLinkDetector);
    }
}
