/*
 * Decompiled with CFR 0.152.
 */
package eu.europeana.patternanalysis;

import eu.europeana.metis.schema.convert.RdfConversionUtils;
import eu.europeana.metis.schema.convert.SerializationException;
import eu.europeana.metis.schema.jibx.AboutType;
import eu.europeana.metis.schema.jibx.EuropeanaType;
import eu.europeana.metis.schema.jibx.LiteralType;
import eu.europeana.metis.schema.jibx.ProxyType;
import eu.europeana.metis.schema.jibx.RDF;
import eu.europeana.metis.schema.jibx.ResourceOrLiteralType;
import eu.europeana.patternanalysis.view.ProblemOccurrence;
import eu.europeana.patternanalysis.view.ProblemPattern;
import eu.europeana.patternanalysis.view.ProblemPatternAnalysis;
import eu.europeana.patternanalysis.view.ProblemPatternDescription;
import eu.europeana.patternanalysis.view.RecordAnalysis;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.BooleanUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.similarity.LongestCommonSubsequence;

public class ProblemPatternAnalyzer {
    private static final int MIN_TITLE_LENGTH = 2;
    private static final int MAX_TITLE_LENGTH = 70;
    private static final int MIN_DESCRIPTION_LENGTH = 50;
    private static final int UNRECOGNIZABLE_CHARACTERS_THRESHOLD = 5;
    private static final double LCS_CALCULATION_THRESHOLD = 0.9;
    private static final int TITLE_DESCRIPTION_LENGTH_DISTANCE = 20;
    private static final int DEFAULT_MAX_CHARACTERS_ELEMENT_LENGTH_FOR_REPORT = 50;
    private static final String UNRECOGNIZABLE_CHARACTERS_REGEX = "[^\\p{IsAlphabetic}\\p{IsDigit} ]";
    private static final Pattern UNRECOGNIZABLE_CHARACTERS_PATTERN = Pattern.compile("[^\\p{IsAlphabetic}\\p{IsDigit} ]");
    public static final Set<ProblemPatternDescription.ProblemPatternId> globalProblemPatterns = Collections.unmodifiableSet(EnumSet.of(ProblemPatternDescription.ProblemPatternId.P1));
    public static final Set<ProblemPatternDescription.ProblemPatternId> nonGlobalProblemPatterns = Collections.unmodifiableSet(EnumSet.complementOf(EnumSet.of(ProblemPatternDescription.ProblemPatternId.P1)));

    public ProblemPatternAnalysis analyzeRecord(String rdfString) throws SerializationException {
        return this.analyzeRecord(new RdfConversionUtils().convertStringToRdf(rdfString));
    }

    public ProblemPatternAnalysis analyzeRecord(RDF rdf) {
        List<ProxyType> providerProxies = this.getProviderProxies(rdf);
        List<EuropeanaType.Choice> choices = providerProxies.stream().map(EuropeanaType::getChoiceList).filter(Objects::nonNull).flatMap(Collection::stream).toList();
        List<String> titles = this.getChoicesInStringList(choices, EuropeanaType.Choice::ifTitle, EuropeanaType.Choice::getTitle, LiteralType::getString);
        List<String> descriptions = this.getChoicesInStringList(choices, EuropeanaType.Choice::ifDescription, EuropeanaType.Choice::getDescription, ResourceOrLiteralType::getString);
        List<String> identifiers = this.getChoicesInStringList(choices, EuropeanaType.Choice::ifIdentifier, EuropeanaType.Choice::getIdentifier, LiteralType::getString);
        String rdfAbout = rdf.getProvidedCHOList().stream().filter(Objects::nonNull).findFirst().map(AboutType::getAbout).orElse(null);
        ArrayList<ProblemPattern> problemPatterns = this.computeProblemPatterns(rdfAbout, titles, descriptions, identifiers);
        return new ProblemPatternAnalysis(rdfAbout, problemPatterns, Set.copyOf(titles));
    }

    private <T> List<String> getChoicesInStringList(List<EuropeanaType.Choice> choices, Predicate<EuropeanaType.Choice> choicePredicate, Function<EuropeanaType.Choice, T> choiceGetter, Function<T, String> getString) {
        return choices.stream().filter(Objects::nonNull).filter(choicePredicate).map(choiceGetter).map(getString).toList();
    }

    private ArrayList<ProblemPattern> computeProblemPatterns(String rdfAbout, List<String> titles, List<String> descriptions, List<String> identifiers) {
        ArrayList<ProblemPattern> problemPatterns = new ArrayList<ProblemPattern>();
        this.constructProblemPattern(rdfAbout, ProblemPatternDescription.P2, this.checkP2(titles, descriptions)).ifPresent(problemPatterns::add);
        this.constructProblemPattern(rdfAbout, ProblemPatternDescription.P3, this.checkP3(titles, descriptions)).ifPresent(problemPatterns::add);
        this.constructProblemPattern(rdfAbout, ProblemPatternDescription.P5, this.checkP5(titles, identifiers)).ifPresent(problemPatterns::add);
        this.constructProblemPattern(rdfAbout, ProblemPatternDescription.P6, this.checkP6(titles)).ifPresent(problemPatterns::add);
        this.constructProblemPattern(rdfAbout, ProblemPatternDescription.P7, this.checkP7(descriptions)).ifPresent(problemPatterns::add);
        this.constructProblemPattern(rdfAbout, ProblemPatternDescription.P9, this.checkP9(descriptions)).ifPresent(problemPatterns::add);
        this.constructProblemPattern(rdfAbout, ProblemPatternDescription.P12, this.checkP12(titles)).ifPresent(problemPatterns::add);
        return problemPatterns;
    }

    private static boolean isProviderProxy(ProxyType proxy) {
        return proxy.getEuropeanaProxy() == null || BooleanUtils.isFalse((Boolean)proxy.getEuropeanaProxy().isEuropeanaProxy());
    }

    private List<ProxyType> getProviderProxies(RDF rdf) {
        return Optional.ofNullable(rdf.getProxyList()).stream().flatMap(Collection::stream).filter(Objects::nonNull).filter(ProblemPatternAnalyzer::isProviderProxy).toList();
    }

    public String abbreviateElement(String element) {
        return StringUtils.abbreviate((String)element, (int)50);
    }

    private List<ProblemOccurrence> checkP2(List<String> titles, List<String> descriptions) {
        Set uniqueTitles = titles.stream().map(String::toLowerCase).collect(Collectors.toSet());
        Set uniqueDescriptions = descriptions.stream().map(String::toLowerCase).collect(Collectors.toSet());
        HashSet equalTitlesAndDescriptions = new HashSet(uniqueTitles);
        equalTitlesAndDescriptions.retainAll(uniqueDescriptions);
        return equalTitlesAndDescriptions.stream().map(value -> new ProblemOccurrence(this.abbreviateElement((String)value))).toList();
    }

    private List<String> nearIdenticalDescriptions(String title, List<String> descriptions) {
        LongestCommonSubsequence longestCommonSubsequence = new LongestCommonSubsequence();
        Predicate<String> lcsPredicate = description -> (double)longestCommonSubsequence.apply((CharSequence)title, (CharSequence)description).intValue() / (double)Math.min(title.length(), description.length()) >= 0.9;
        Predicate<String> distancePredicate = description -> Math.abs(title.length() - description.length()) <= 20;
        return descriptions.stream().filter(StringUtils::isNotBlank).filter(Predicate.not(title::equalsIgnoreCase)).filter(lcsPredicate.and(distancePredicate)).toList();
    }

    private List<ProblemOccurrence> checkP3(List<String> titles, List<String> descriptions) {
        Map<String, List> nearIdenticalTitleDescriptionsMap = titles.stream().filter(StringUtils::isNotBlank).collect(Collectors.toMap(title -> title, title -> this.nearIdenticalDescriptions((String)title, descriptions), (t1, t2) -> t1));
        return nearIdenticalTitleDescriptionsMap.entrySet().stream().flatMap(entry -> ((List)entry.getValue()).stream().map(value -> new ProblemOccurrence(String.format("%s <--> %s", this.abbreviateElement((String)entry.getKey()), this.abbreviateElement((String)value))))).toList();
    }

    private List<ProblemOccurrence> checkP5(List<String> titles, List<String> identifiers) {
        Predicate<String> moreThanThresholdUnrecognizableCharacters = s -> UNRECOGNIZABLE_CHARACTERS_PATTERN.matcher((CharSequence)s).results().count() > 5L;
        Predicate<String> containsIdentifier = s -> identifiers.stream().anyMatch(s::contains);
        return titles.stream().filter(moreThanThresholdUnrecognizableCharacters.or(containsIdentifier)).map(title -> new ProblemOccurrence(this.abbreviateElement((String)title))).toList();
    }

    private List<ProblemOccurrence> checkP6(List<String> titles) {
        return titles.stream().filter(title -> title.length() <= 2).map(title -> new ProblemOccurrence(this.abbreviateElement((String)title))).toList();
    }

    private List<ProblemOccurrence> checkP7(List<String> descriptions) {
        if (CollectionUtils.isEmpty(descriptions) || descriptions.stream().allMatch(StringUtils::isBlank)) {
            return List.of(new ProblemOccurrence(this.abbreviateElement("Missing description fields")));
        }
        return Collections.emptyList();
    }

    private List<ProblemOccurrence> checkP9(List<String> descriptions) {
        return descriptions.stream().filter(StringUtils::isNotBlank).filter(description -> description.length() <= 50).map(description -> new ProblemOccurrence(this.abbreviateElement((String)description))).toList();
    }

    private List<ProblemOccurrence> checkP12(List<String> titles) {
        return titles.stream().filter(title -> title.length() > 70).map(title -> new ProblemOccurrence(this.abbreviateElement((String)title))).toList();
    }

    private Optional<ProblemPattern> constructProblemPattern(String recordId, ProblemPatternDescription problemPatternDescription, List<ProblemOccurrence> problemOccurrences) {
        if (CollectionUtils.isNotEmpty(problemOccurrences)) {
            return Optional.of(new ProblemPattern(problemPatternDescription, 1, List.of(new RecordAnalysis(recordId, problemOccurrences))));
        }
        return Optional.empty();
    }
}

