package ac.uk.susx.jack.tag.runner;

import ac.uk.susx.jack.tag.cluster.LDAClustering;
import ac.uk.susx.jack.tag.data.TFIDF;
import com.beust.jcommander.internal.Maps;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.spark.ml.clustering.LDAModel;
import org.apache.spark.ml.feature.CountVectorizer;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.Row;

/* loaded from: input_file:ac/uk/susx/jack/tag/runner/LDARunner.class */
public class LDARunner {
    public static CompositeModel<LDAModel> train(String str, int i, int i2, double d, double d2) {
        File file = new File(str);
        TFIDF tfidf = new TFIDF();
        LDAClustering lDAClustering = new LDAClustering();
        try {
            DataFrame dataFrame = tfidf.tokenise(tfidf.basicDataFrame(tfidf.readData(file, "txt"), TFIDF.basicSchema()), "document", "text");
            DataFrame transform = new CountVectorizer().setMinTF(2.0d).setInputCol("text").setOutputCol("features").fit(dataFrame).transform(dataFrame);
            transform.show(20);
            LDAModel train = lDAClustering.train(transform, i, d, d2, i2);
            System.out.println("log likelihood: " + train.logLikelihood(transform));
            System.out.println("perplexity: " + train.logPerplexity(transform));
            train.describeTopics(20).show(false);
            train.transform(transform);
            return new CompositeModel<>(transform, train);
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    public static void predict(LDAModel lDAModel, DataFrame dataFrame) {
        List collectAsList = lDAModel.transform(dataFrame).collectAsList();
        Map newHashMap = Maps.newHashMap();
        Vector vector = (Vector) ((Row) collectAsList.get(0)).get(4);
        for (int i = 0; i < vector.size(); i++) {
            newHashMap.put(Integer.valueOf(i), Double.valueOf(0.0d));
        }
        Iterator it = collectAsList.iterator();
        while (it.hasNext()) {
            Vector vector2 = (Vector) ((Row) it.next()).get(4);
            for (int i2 = 0; i2 < vector2.size(); i2++) {
                newHashMap.put(Integer.valueOf(i2), Double.valueOf(((Double) newHashMap.get(Integer.valueOf(i2))).doubleValue() + vector2.toArray()[i2]));
            }
        }
        Iterator it2 = newHashMap.keySet().iterator();
        while (it2.hasNext()) {
            int intValue = ((Integer) it2.next()).intValue();
            System.out.println("Topic: " + intValue + " Ave. Topic Dist: " + (((Double) newHashMap.get(Integer.valueOf(intValue))).doubleValue() / collectAsList.size()));
            System.out.println();
        }
    }
}
