from django.db import models
from nltk import WordNetLemmatizer

from apadata.models import Keyword
from apadata.text_processors.keywords.lemma_finder import LemmaFinder


class LemmaFinderReducer(LemmaFinder):
    """
    Finds lemmas of all the keywords and removes those with a common lemma keeping
    only one
    """

    lemmatizer = WordNetLemmatizer()

    def __init__(self, text: str):
        super().__init__(text)

    def process(self) -> str:
        super().process()
        try:
            keyword_obj = Keyword.objects.get(name=self.text)
        except (models.ObjectDoesNotExist, Keyword.DoesNotExist):
            return self.text
        if keyword_obj.discarded:
            return str(self.text)
        if not keyword_obj.lemma_form:
            keyword_obj.lemma_form = str(LemmaFinder.lemmatizer.lemmatize(self.text))
            keyword_obj.save()
        keywords_objs = Keyword.objects.filter(
            lemma_form__iexact=keyword_obj.lemma_form
        ).exclude(id=keyword_obj.id)
        for keyword_obj in keywords_objs:
            keyword_obj.discarded = True
            keyword_obj.discarded_reason = "lemma_finder"
            keyword_obj.save()
        return str(self.text)
