import spacy

from apadata.spacy import SpacyConfiguration
from apadata.spacy.constants import LangCode, SpacyGenre, SpacyPipeline

from ..spacy_text_processor import SpacyTextProcessor


def test_spacy_text_processor_en():
    text = "He works for Apple"

    pipeline_genre = SpacyGenre(SpacyGenre.CORE_WEB)
    pipeline_type = SpacyPipeline(SpacyPipeline.SM_PIPELINE)
    lang = LangCode("en")

    spacy_config = SpacyConfiguration(
        lang_code=lang, pipeline_genre=pipeline_genre, pipeline_type=pipeline_type
    )
    spacy_processor = SpacyTextProcessor(text=text, spacy_config=spacy_config)
    doc = spacy_processor.obtain_doc()

    assert isinstance(doc, spacy.tokens.doc.Doc)

    assert len(doc.ents) == 1

    assert doc.ents[0].text == "Apple"
    assert doc.ents[0].label_ == "ORG"

    doc, document_entities = spacy_processor.process()
    assert isinstance(doc, spacy.tokens.doc.Doc)
    assert len(document_entities) == 6


def test_spacy_text_processor_de():
    text = "Er arbeitet bei Apple Gmbh"
    lang = LangCode("de")
    pipeline_genre = SpacyGenre(SpacyGenre.CORE_NEWS)
    pipeline_type = SpacyPipeline(SpacyPipeline.SM_PIPELINE)
    spacy_config = SpacyConfiguration(
        lang_code=lang,
        pipeline_genre=pipeline_genre,
        pipeline_type=pipeline_type,
    )

    spacy_processor = SpacyTextProcessor(text=text, spacy_config=spacy_config)
    doc = spacy_processor.obtain_doc()

    assert isinstance(doc, spacy.tokens.doc.Doc)

    assert len(doc.ents) == 1

    assert doc.ents[0].text == "Apple Gmbh"
    assert doc.ents[0].label_ == "ORG"

    doc, document_entities = spacy_processor.process()
    assert isinstance(doc, spacy.tokens.doc.Doc)
    assert len(document_entities) == 6
