from typing import Optional, Set

from django.db import models
from django.db.models import Q

from apadata.models import Keyword, SimilarKeywords
from apadata.text_processors.keywords.similarity_finder import SimilarityFinder
from apadata.text_processors.text_embedder_processor import TextEmbedderProcessor
from apadata.utils import flatten


class SimilarityFinderEnricher(SimilarityFinder):
    """
    This class will find the synonyms for a word
    """

    def __init__(self, text: str, similarity_threshold: float = 0.75, top_k: int = 10):
        super().__init__(text)
        self.similarity_threshold = similarity_threshold
        self.top_k = top_k

    def process(self) -> Optional[Set[str]]:
        super().process()
        embedding = None
        try:
            keyword_obj = Keyword.objects.get(name=self.text)
            if keyword_obj.discarded:
                return set()
            embedding = keyword_obj.embedding
        except (models.ObjectDoesNotExist, Keyword.DoesNotExist):
            keyword_obj = Keyword.objects.create(
                name=self.text,
                embedding=list(flatten(TextEmbedderProcessor(self.text).process())),
            )
        if not embedding:
            embedding = TextEmbedderProcessor(self.text).process()
        result = TextEmbedderProcessor.get_vector_db().query(
            embedding=list(flatten(embedding)), top_k=self.top_k
        )
        matches = result["matches"]
        ids = [
            match["id"]
            for match in matches
            if self.similarity_threshold <= match["score"] < 1
        ]
        similar_keyword_objs = Keyword.objects.filter(name__in=ids)

        persisted_ids = [kw_obj.name for kw_obj in similar_keyword_objs]

        non_persisted_ids = [id for id in ids if id not in persisted_ids]

        new_keyword_objects = [
            Keyword(name=id, embedding=TextEmbedderProcessor(id).process())
            for id in non_persisted_ids
        ]
        Keyword.objects.bulk_create(new_keyword_objects)

        new_similars_objects = [
            SimilarKeywords(keyword1=keyword_obj, keyword2=sim_kw_job)
            for sim_kw_job in new_keyword_objects
        ]
        SimilarKeywords.objects.bulk_create(new_similars_objects)

        similars_objects = [
            SimilarKeywords(keyword1=keyword_obj, keyword2=similar_keyword_obj)
            for similar_keyword_obj in similar_keyword_objs
        ]

        existing_entries = SimilarKeywords.objects.filter(
            Q(keyword1=keyword_obj) & Q(keyword2__in=similar_keyword_objs)
        )
        SimilarKeywords.objects.bulk_update(
            existing_entries, fields=["keyword1", "keyword2"]
        )
        SimilarKeywords.objects.bulk_create(similars_objects)

        return set(ids)
