from typing import Dict, List, Optional

from django.db import models

from apadata.models import DomainTargetIndustries, Industry
from apadata.models.domain import Domain
from apadata.pipelines.pipeline_context import PipelineContext
from apadata.pipelines.target_industries_extraction_pipeline import (
    TargetIndustriesExtractionPipeline,
)


def get_persisted_results(domain_object: Domain) -> List[str]:
    try:
        query_set = DomainTargetIndustries.objects.filter(domain=domain_object)
        if len(query_set):
            return [
                target_object.target_industries.value for target_object in query_set
            ]
    except models.ObjectDoesNotExist:
        pass
    return []


def target_industries_task(
    *, domain: str, skip: Optional[List[str]] = None
) -> List[str]:
    # check whether we have the result already stored in the database
    if skip is None:
        skip = []
    domain = Domain.tld_extract(domain)
    try:
        domain_object = Domain.objects.get(domain=domain)
    except (models.ObjectDoesNotExist, Industry.DoesNotExist):
        domain_object = Domain.objects.create(domain=domain)

    persisted_results = get_persisted_results(domain_object)
    if len(persisted_results):
        return persisted_results

    # run the pipeline and obtain the result
    payload = {"domain": domain, "size": 512}
    context = PipelineContext(payload=payload)
    if skip:
        context.add("skip", skip)
    target_pipeline = TargetIndustriesExtractionPipeline(context=context)
    context_result = target_pipeline.run()
    result: Dict[str, List[str]] = context_result.result
    target_industries = result["target_industries"]
    target_industries = list(
        {target_industry.lower() for target_industry in target_industries}
    )

    # persist the result in the database
    for target_industry in target_industries:
        try:
            industry_object = Industry.objects.get(value=target_industry)
        except (models.ObjectDoesNotExist, Industry.DoesNotExist):
            industry_object = Industry.objects.create(value=target_industry)

        if not (
            DomainTargetIndustries.objects.filter(
                domain=domain_object, target_industries=industry_object
            ).exists()
        ):
            DomainTargetIndustries.objects.create(
                domain=domain_object,
                target_industries=industry_object,
            )

    return target_industries
