package com.casm.acled.crawler.scraper;

import com.casm.acled.crawler.Crawl;
import com.casm.acled.crawler.scraper.dates.ExcludingCustomDateMetadataFilter;
import com.casm.acled.dao.entities.ArticleDAO;
import com.casm.acled.dao.entities.SourceListDAO;
import com.casm.acled.entities.EntityVersions;
import com.casm.acled.entities.article.Article;
import com.casm.acled.entities.source.Source;
import com.norconex.collector.http.HttpCollector;
import com.norconex.collector.http.doc.HttpDocument;
import com.norconex.collector.http.doc.HttpMetadata;
import com.norconex.collector.http.processor.IHttpDocumentProcessor;
import com.norconex.jef4.status.JobState;
import java.time.LocalDate;
import java.util.function.Supplier;
import org.apache.http.client.HttpClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/casm/acled/crawler/scraper/ACLEDImporter.class */
public class ACLEDImporter implements IHttpDocumentProcessor {
    protected static final Logger logger = LoggerFactory.getLogger(ACLEDImporter.class);
    private final ArticleDAO articleDAO;
    private final Source source;
    private final SourceListDAO sourceListDAO;
    private final boolean sourceRequired;
    private Supplier<HttpCollector> collectorSupplier;
    private Integer maxArticles = null;

    public ACLEDImporter(ArticleDAO articleDAO, Source source, SourceListDAO sourceListDAO, boolean z) {
        this.articleDAO = articleDAO;
        this.source = source;
        this.sourceListDAO = sourceListDAO;
        this.sourceRequired = z;
    }

    public void setCollectorSupplier(Supplier<HttpCollector> supplier) {
        this.collectorSupplier = supplier;
    }

    public void setMaxArticles(Integer num) {
        if (num == null || num.intValue() < 0) {
            return;
        }
        this.maxArticles = num;
    }

    private boolean previouslyScraped(HttpDocument httpDocument) {
        String string = httpDocument.getMetadata().getString("scraped.previously");
        if (string == null) {
            return false;
        }
        return Boolean.valueOf(string).booleanValue();
    }

    private synchronized void stop(HttpCollector httpCollector) {
        if (this.collectorSupplier.get().getState().isOneOf(new JobState[]{JobState.RUNNING})) {
            this.collectorSupplier.get().stop();
        }
    }

    private boolean stopAfterNArticlesFromSource(Source source) {
        if (this.maxArticles == null || this.articleDAO.bySource(source).size() < this.maxArticles.intValue()) {
            return false;
        }
        stop(this.collectorSupplier.get());
        return true;
    }

    public void processDocument(HttpClient httpClient, HttpDocument httpDocument) {
        if (previouslyScraped(httpDocument)) {
            return;
        }
        HttpMetadata metadata = httpDocument.getMetadata();
        if (metadata.getBoolean(ScraperFields.KEYWORD_PASSED) && metadata.getBoolean(ScraperFields.DATE_PASSED)) {
            String string = metadata.getString(ScraperFields.SCRAPED_ARTICLE);
            String string2 = metadata.getString(ScraperFields.SCRAPED_TITLE);
            String string3 = metadata.getString(ScraperFields.SCRAPED_DATE);
            String string4 = metadata.getString(ScraperFields.STANDARD_DATE);
            Article current = EntityVersions.get(Article.class).current();
            if (string2 != null) {
                current = (Article) current.put("TITLE", string2);
            }
            Article put = current.put("TEXT", string).put("SCRAPE_DATE", string3).put("URL", httpDocument.getReference());
            if (metadata.getString(ScraperFields.KEYWORD_HIGHLIGHT) != null) {
                put = (Article) put.put("SCRAPE_KEYWORD_HIGHLIGHT", metadata.getString(ScraperFields.KEYWORD_HIGHLIGHT));
            }
            if (string4 != null) {
                put = (Article) put.put("DATE", ExcludingCustomDateMetadataFilter.toDate(string4).toLocalDate());
            }
            Article put2 = put.put("CRAWL_DEPTH", Integer.valueOf(metadata.getInt("collector.depth"))).put("CRAWL_DATE", LocalDate.now());
            if (stopAfterNArticlesFromSource(this.source)) {
                return;
            }
            this.articleDAO.create(put2.put(Crawl.SOURCE_ID, Integer.valueOf(this.source.id())));
        }
    }
}
