package net.doo.datamining.preprocessing;

import com.beust.jcommander.ParametersDelegate;
import com.google.common.base.MoreObjects;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.UnmodifiableIterator;
import java.io.File;
import java.io.IOException;
import java.util.Collection;
import net.doo.datamining.io.BinaryChunk;
import net.doo.datamining.io.IO;
import net.doo.datamining.language.LanguageClassifier;
import net.doo.datamining.util.Pair;
import org.apache.log4j.Logger;

/* loaded from: classes2.dex */
public class PreprocessingConfiguration {
    private static final Logger log = Logger.getLogger(PreprocessingConfiguration.class);

    @ParametersDelegate
    public final StringFilter filter = new StringFilter();

    @ParametersDelegate
    public final BagOfWordsFactory bowFactory = new BagOfWordsFactory();

    @ParametersDelegate
    public final DictionaryFactory dictionaryFactory = new DictionaryFactory();

    public PreprocessingConfiguration fromChunk(BinaryChunk binaryChunk) throws IOException {
        this.filter.fromChunk(binaryChunk);
        this.bowFactory.fromChunk(binaryChunk);
        this.dictionaryFactory.fromChunk(binaryChunk);
        return this;
    }

    public ImmutableList<WordVector> getClassificationData(Collection<File> collection, Dictionary dictionary) throws IOException {
        ImmutableList.Builder builder = ImmutableList.builder();
        UnmodifiableIterator<BagOfWords> it = readBagsOfWords(collection).iterator();
        while (it.hasNext()) {
            WordVector buildWordVector = this.dictionaryFactory.buildWordVector(it.next(), dictionary);
            if (this.dictionaryFactory.isNormalizeWordVector()) {
                buildWordVector = buildWordVector.normalize();
            }
            builder.add((ImmutableList.Builder) buildWordVector);
        }
        return builder.build();
    }

    /* JADX WARN: Multi-variable type inference failed */
    public ImmutableList<BagOfWords> readBagsOfWords(Collection<File> collection) throws IOException {
        String str;
        ImmutableList.Builder builder = ImmutableList.builder();
        for (File file : collection) {
            for (Pair<File, String> pair : IO.listFilesRecursive(file)) {
                if (((File) pair.fst).equals(file) || ((File) pair.fst).getParentFile().equals(file)) {
                    str = LanguageClassifier.UNDEFINED_LANGUAGE;
                } else {
                    String substring = ((File) pair.fst).getAbsolutePath().substring(file.getAbsolutePath().length());
                    str = substring.substring(1, substring.indexOf(47, 1));
                }
                if (IO.mightBeText((File) pair.fst)) {
                    builder.add((ImmutableList.Builder) this.bowFactory.preprocess(Pair.of(str, ""), (File) pair.fst, ((String) pair.snd) + "/" + ((File) pair.fst).getName(), this.filter, true));
                }
            }
        }
        return builder.build();
    }

    public String toString() {
        return MoreObjects.toStringHelper(this).addValue(this.filter).addValue(this.bowFactory).addValue(this.dictionaryFactory).toString();
    }
}
