from typing import List, Optional

from collections import Counter

from nltk import word_tokenize

from apadata.strategies.score_strategy import ScoreStrategyEnum
from apadata.strategies.score_strategy_factory import ScoreStrategyFactory
from apadata.text_processors import LangDetectTextProcessor
from apadata.text_processors.evaluators.evaluator import Evaluator


def get_all_other_keywords_counter_mapping(used_languages, keyword_list):
    filtered_keywords = []
    for keyword in keyword_list:
        lang_detector = LangDetectTextProcessor(text=keyword)
        language = lang_detector.detect()
        if language in used_languages:
            filtered_keywords.append(keyword)
            for token in word_tokenize(keyword):
                filtered_keywords.append(token)

    return Counter(set(filtered_keywords))


class ElasticsearchEvaluator(Evaluator):
    """Uses computed results and features from elasticsearch in order to evaluate how
    relevant is a keyword"""

    def __init__(
        self,
        strategy: ScoreStrategyEnum = ScoreStrategyEnum(ScoreStrategyEnum.HIGHEST),
        all_keywords_list: Optional[List[str]] = None,
    ):
        if all_keywords_list is None:
            all_keywords_list = []
        self.strategy: ScoreStrategyEnum = strategy
        self.used_languages = ["en"]
        self.all_other_keywords = get_all_other_keywords_counter_mapping(
            self.used_languages, keyword_list=all_keywords_list
        )
        self.highest_frequency = (
            max(self.all_other_keywords.values()) if (self.all_other_keywords) else 1
        )

    def evaluate(self, keyword: str) -> float:
        keyword_tokens = word_tokenize(keyword)
        freqs = [
            self.all_other_keywords[token] / self.highest_frequency
            if token in self.all_other_keywords.keys()
            else 0
            for token in keyword_tokens
        ]
        final_freq = ScoreStrategyFactory.create(self.strategy).calculate(freqs)
        return float(final_freq)
