from typing import Set

import inflect
from nltk import WordNetLemmatizer

from apadata.models import Keyword
from apadata.text_processors.keywords.lemma_finder import LemmaFinder
from apadata.text_processors.text_embedder_processor import TextEmbedderProcessor
from apadata.utils import flatten


class LemmaFinderEnricher(LemmaFinder):
    """
    Finds lemmas of all the keywords and removes those with a common lemma keeping
    only one
    """

    lemmatizer = WordNetLemmatizer()

    def __init__(self, text: str):
        super().__init__(text)

    def process(self) -> Set[str]:
        super().process()
        p = inflect.engine()
        result = {
            p.plural(self.text),
            p.present_participle(self.text),
            p.plural_verb(self.text),
            p.plural_noun(self.text),
        }
        for word in result:
            if not Keyword.objects.filter(name=word).exists():
                Keyword.objects.create(
                    name=word,
                    embedding=list(flatten(TextEmbedderProcessor(word).process())),
                )
        return result
