package ac.uk.susx.jack.tag.data;

import ac.uk.susx.jack.tag.context.Context;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.ml.feature.HashingTF;
import org.apache.spark.ml.feature.IDF;
import org.apache.spark.ml.feature.Tokenizer;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.Row;

/* loaded from: input_file:ac/uk/susx/jack/tag/data/TFIDF.class */
public class TFIDF extends AbstractDataReader {
    public static HashingTF tf(String str, String str2, int i) {
        return new HashingTF().setInputCol(str).setOutputCol(str2).setNumFeatures(i);
    }

    public static IDF idf(String str, String str2) {
        return new IDF().setInputCol(str).setOutputCol(str2);
    }

    public DataFrame tfIdf(JavaRDD<Row> javaRDD, int i) {
        DataFrame transform = tf("tokens", "features", i).transform(new Tokenizer().setInputCol("document").setOutputCol("tokens").transform(Context.sqlContext().createDataFrame(javaRDD, basicSchema())));
        return idf("features", "idfFeatures").fit(transform).transform(transform);
    }
}
