package com.norconex.collector.http.pipeline.queue;

import com.norconex.collector.http.crawler.HttpCrawlerEvent;
import com.norconex.collector.http.data.HttpCrawlState;
import com.norconex.collector.http.robot.IRobotsTxtFilter;
import com.norconex.collector.http.robot.RobotsTxt;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

/* loaded from: input_file:com/norconex/collector/http/pipeline/queue/RobotsTxtFiltersStage.class */
class RobotsTxtFiltersStage extends AbstractQueueStage {
    private static final Logger LOG = LogManager.getLogger(RobotsTxtFiltersStage.class);

    @Override // com.norconex.collector.http.pipeline.queue.AbstractQueueStage
    public boolean executeStage(HttpQueuePipelineContext httpQueuePipelineContext) {
        IRobotsTxtFilter findRejectingRobotsFilter;
        if (httpQueuePipelineContext.m37getConfig().isIgnoreRobotsTxt() || (findRejectingRobotsFilter = findRejectingRobotsFilter(httpQueuePipelineContext)) == null) {
            return true;
        }
        httpQueuePipelineContext.m36getCrawlData().setState(HttpCrawlState.REJECTED);
        httpQueuePipelineContext.fireCrawlerEvent(HttpCrawlerEvent.REJECTED_ROBOTS_TXT, httpQueuePipelineContext.m36getCrawlData(), findRejectingRobotsFilter);
        LOG.debug("REJECTED by robots.txt. . Reference=" + httpQueuePipelineContext.m36getCrawlData().getReference() + " Filter=" + findRejectingRobotsFilter);
        return false;
    }

    private IRobotsTxtFilter findRejectingRobotsFilter(HttpQueuePipelineContext httpQueuePipelineContext) {
        IRobotsTxtFilter[] disallowFilters;
        RobotsTxt robotsTxt = HttpQueuePipeline.getRobotsTxt(httpQueuePipelineContext);
        if (robotsTxt == null || (disallowFilters = robotsTxt.getDisallowFilters()) == null) {
            return null;
        }
        String reference = httpQueuePipelineContext.m36getCrawlData().getReference();
        IRobotsTxtFilter[] allowFilters = robotsTxt.getAllowFilters();
        for (IRobotsTxtFilter iRobotsTxtFilter : disallowFilters) {
            if (!iRobotsTxtFilter.acceptReference(reference)) {
                boolean z = false;
                int length = allowFilters.length;
                int i = 0;
                while (true) {
                    if (i >= length) {
                        break;
                    }
                    IRobotsTxtFilter iRobotsTxtFilter2 = allowFilters[i];
                    if (iRobotsTxtFilter2.getPath().length() > iRobotsTxtFilter.getPath().length() && iRobotsTxtFilter2.acceptReference(reference)) {
                        z = true;
                        break;
                    }
                    i++;
                }
                if (!z) {
                    return iRobotsTxtFilter;
                }
            }
        }
        return null;
    }
}
