import pytest
import spacy

from apadata.spacy.constants import (
    LangCode,
    SpacyComponent,
    SpacyGenre,
    SpacyPipeline,
    SpacyWordRootOption,
)
from apadata.spacy.spacy import Spacy
from apadata.utils import timing

from ..spacy_configuration import SpacyConfiguration


@timing
def test_spacy():
    lang = LangCode("en")
    pipeline_genre = SpacyGenre(SpacyGenre.CORE_WEB)
    pipeline_type = SpacyPipeline(SpacyPipeline.SM_PIPELINE)
    word_root_pipeline = SpacyWordRootOption(SpacyWordRootOption.LEMMATIZER)

    spacy_config = SpacyConfiguration(
        lang_code=lang, pipeline_type=pipeline_type, pipeline_genre=pipeline_genre
    )
    spacy_launcher = Spacy(spacy_config)
    assert spacy_config.pipeline_name == "en_core_web_sm"
    assert spacy_launcher.nlp is not None

    spacy_config = SpacyConfiguration(
        lang_code=lang,
        pipeline_type=pipeline_type,
        pipeline_genre=pipeline_genre,
        disable=[SpacyComponent(SpacyComponent.TOK2VEC)],
    )
    spacy_launcher = Spacy(spacy_config)
    assert spacy_config.pipeline_name == "en_core_web_sm"
    assert spacy_launcher.nlp is not None

    spacy_config = SpacyConfiguration(
        lang_code=lang,
        pipeline_type=pipeline_type,
        pipeline_genre=pipeline_genre,
        use_lookup_lemmatizer=True,
    )
    spacy_launcher = Spacy(spacy_config)
    assert spacy_config.pipeline_name == "en_core_web_sm"
    assert spacy_launcher.nlp is not None

    spacy_config = SpacyConfiguration(
        lang_code=lang,
        pipeline_type=pipeline_type,
        pipeline_genre=pipeline_genre,
        use_senter_over_parser=True,
    )
    spacy_launcher = Spacy(spacy_config)
    assert spacy_config.pipeline_name == "en_core_web_sm"
    assert spacy_launcher.nlp is not None

    spacy_config = SpacyConfiguration(
        lang_code=lang,
        pipeline_type=pipeline_type,
        pipeline_genre=pipeline_genre,
        use_default_over_trainable=True,
    )
    spacy_launcher = Spacy(spacy_config)
    assert spacy_config.pipeline_name == "en_core_web_sm"
    assert spacy_launcher.nlp is not None

    spacy_config = SpacyConfiguration(
        lang_code=lang,
        pipeline_type=pipeline_type,
        pipeline_genre=pipeline_genre,
        word_root_pipeline=word_root_pipeline,
    )
    spacy_launcher = Spacy(spacy_config)
    assert spacy_config.pipeline_name == "en_core_web_sm"
    assert spacy_launcher.nlp is not None

    text = (
        "She studied computer science at University of Bucharest. She is a "
        "software engineer. She works for Google in Bucharest"
    )
    doc = spacy_launcher.nlp(text)

    assert isinstance(doc, spacy.tokens.doc.Doc)

    assert len(doc.ents) == 2

    for ent in doc.ents:
        if ent.text == "University of Bucharest":
            assert ent.label_ == "ORG"
        if ent.text == "Google":
            assert ent.label_ == "ORG"

    with pytest.raises(OSError):
        lang = LangCode("de")
        spacy_config = SpacyConfiguration(
            lang_code=lang, pipeline_type=pipeline_type, pipeline_genre=pipeline_genre
        )
        _ = Spacy(spacy_config)
