from typing import List

from apadata.models import Keyword
from apadata.pipelines.keywords_remover_pipeline import KeywordsRemoverPipeline
from apadata.pipelines.pipeline_context import PipelineContext
from apadata.text_processors.text_embedder_processor import TextEmbedderProcessor
from apadata.utils import flatten


def keywords_remover_task(*, keywords: List[str]) -> List[str]:
    existing_keywords = Keyword.objects.filter(name__in=keywords)
    missing_keywords = set(keywords) - {k.name for k in existing_keywords}
    Keyword.objects.bulk_create(
        [
            Keyword(
                name=name,
                embedding=list(flatten(TextEmbedderProcessor(name).process())),
            )
            for name in missing_keywords
        ]
    )

    non_discarded_keywords = Keyword.objects.filter(name__in=keywords, discarded=False)

    # run the pipeline and obtain the result
    for keyword in non_discarded_keywords:
        payload = {"keyword": keyword.name}
        context = PipelineContext(payload=payload)
        evaluator_pipeline = KeywordsRemoverPipeline(context=context)
        evaluator_pipeline.run()

    return [keyword_object.name for keyword_object in non_discarded_keywords]
