from typing import Any

from statistics import mean

from nltk.tokenize import word_tokenize
from wordfreq import word_frequency  # noqa pylint: disable=import-error

from apadata.text_processors.evaluators.evaluator import Evaluator


class LanguageFrequencyEvaluator(Evaluator):
    """Evaluates the frequency of a keyword in a language"""

    def __init__(self, language: str):
        self.language = language

    def evaluate(self, keyword: str, **kwargs: Any) -> float:
        keyword_tokens = word_tokenize(keyword)
        freqs = [
            float(word_frequency(token, self.language)) for token in keyword_tokens
        ]
        # fixme this is a bit costly in time
        average_freq = mean(freqs)
        return average_freq
