from apadata.utils.enum import StrEnum


class SpacyComponent(StrEnum):
    """
    All the components that can exist in a Spacy Pipeline
    """

    TOK2VEC = "tok2vec"
    TAGGER = "tagger"
    PARSER = "parser"
    ATTRIBUTE_RULER = "attribute_ruler"
    LEMMATIZER = "lemmatizer"
    NER = "ner"


class SpacyWordRootOption(StrEnum):
    """
    There are two options
    """

    LEMMATIZER = "lemmatizer"
    STEMMER = "stemmer"


class SpacyPipeline(StrEnum):
    """
    Types of spacy pipelines
    """

    SM_PIPELINE: str = "sm"
    MD_PIPELINE: str = "md"
    LG_PIPELINE: str = "lg"
    TRF_PIPELINE: str = "trf"


class SpacyGenre(StrEnum):
    """
    Spacy Data Genre class which tells us on what type of data was the spacy pipeline
    trained
    """

    CORE_NEWS = "core_news"
    DEP_NEWS = "dep_news"
    CORE_WEB = "core_web"
    SENT_UD = "sent_ud"


class LangCode(StrEnum):
    """Code langauge enum class"""

    SERBIAN = "sr"
    CROATIAN = "hr"
    SLOVENIAN = "sl"
    POLISH = "pl"
    SLOVAK = "sk"
    CZECH = "cs"
    UKRAINIAN = "uk"
    RUSSIAN = "ru"
    MACEDONIAN = "mk"
    BULGARIAN = "bg"
    CATALAN = "ca"
    SPANISH = "es"
    PORTUGUESE = "pt"
    ITALIAN = "it"
    ENGLISH = "en"
    AFRIKAANS = "af"
    DUTCH = "nl"
    FRENCH = "fr"
    ALBANIAN = "sq"
    BASQUE = "eu"
    ROMANIAN = "ro"
    TAGALOG = "tl"
    VIETNAMESE = "vi"
    ESTONIAN = "et"
    ARMENIAN = "hy"
    LATVIAN = "lv"
    GERMAN = "de"
    TURKISH = "tr"
    KOREAN = "ko"
    LITHUANIAN = "lt"
    HUNGARIAN = "hu"
    LUXEMBOURGISH = "lb"
    FINNISH = "fi"
    YORUBA = "yo"
    IRISH = "ga"
    HINDI = "hi"
    SINHALA = "si"
    MARATHI = "mr"
    GUJARATI = "gu"
    SANSKRIT = "sa"
    BENGALI = "bn"
    KANNADA = "kn"
    TELUGU = "te"
    TAMIL = "ta"
    DANISH = "da"
    SWEDISH = "sv"
    ICELANDIC = "is"
    INDONESIAN = "id"
    PERSIAN = "fa"
    URDU = "ur"
    ARABIC = "ar"
    HEBREW = "he"
    NORWEGIAN_BOKMAL = "nb"
    TATAR = "tt"
    JAPANESE = "ja"
    THAI = "th"
    NEPALI = "ne"
    CHINESE = "zh"
    MULTILINGUAL = "xx"
    GREEK = "el"


LANG_CODE_TO_PIPELINE = {
    LangCode.CATALAN: {
        SpacyGenre.CORE_NEWS: [
            SpacyPipeline.SM_PIPELINE,
            SpacyPipeline.MD_PIPELINE,
            SpacyPipeline.LG_PIPELINE,
            SpacyPipeline.TRF_PIPELINE,
        ]
    },
    LangCode.CHINESE: {
        SpacyGenre.CORE_WEB: [
            SpacyPipeline.SM_PIPELINE,
            SpacyPipeline.MD_PIPELINE,
            SpacyPipeline.LG_PIPELINE,
            SpacyPipeline.TRF_PIPELINE,
        ]
    },
    LangCode.DANISH: {
        SpacyGenre.CORE_NEWS: [
            SpacyPipeline.SM_PIPELINE,
            SpacyPipeline.MD_PIPELINE,
            SpacyPipeline.LG_PIPELINE,
            SpacyPipeline.TRF_PIPELINE,
        ]
    },
    LangCode.DUTCH: {
        SpacyGenre.CORE_NEWS: [
            SpacyPipeline.SM_PIPELINE,
            SpacyPipeline.MD_PIPELINE,
            SpacyPipeline.LG_PIPELINE,
        ]
    },
    LangCode.ENGLISH: {
        SpacyGenre.CORE_WEB: [
            SpacyPipeline.SM_PIPELINE,
            SpacyPipeline.MD_PIPELINE,
            SpacyPipeline.LG_PIPELINE,
            SpacyPipeline.TRF_PIPELINE,
        ]
    },
    LangCode.FINNISH: {
        SpacyGenre.CORE_NEWS: [
            SpacyPipeline.SM_PIPELINE,
            SpacyPipeline.MD_PIPELINE,
            SpacyPipeline.LG_PIPELINE,
        ]
    },
    LangCode.FRENCH: {
        SpacyGenre.CORE_NEWS: [
            SpacyPipeline.SM_PIPELINE,
            SpacyPipeline.MD_PIPELINE,
            SpacyPipeline.LG_PIPELINE,
        ],
        SpacyGenre.DEP_NEWS: [SpacyPipeline.TRF_PIPELINE],
    },
    LangCode.GERMAN: {
        SpacyGenre.CORE_NEWS: [
            SpacyPipeline.SM_PIPELINE,
            SpacyPipeline.MD_PIPELINE,
            SpacyPipeline.LG_PIPELINE,
        ],
        SpacyGenre.DEP_NEWS: [SpacyPipeline.TRF_PIPELINE],
    },
    LangCode.GREEK: {
        SpacyGenre.CORE_NEWS: [
            SpacyPipeline.SM_PIPELINE,
            SpacyPipeline.MD_PIPELINE,
            SpacyPipeline.LG_PIPELINE,
        ]
    },
    LangCode.ITALIAN: {
        SpacyGenre.CORE_NEWS: [
            SpacyPipeline.SM_PIPELINE,
            SpacyPipeline.MD_PIPELINE,
            SpacyPipeline.LG_PIPELINE,
        ]
    },
    LangCode.JAPANESE: {
        SpacyGenre.CORE_NEWS: [
            SpacyPipeline.SM_PIPELINE,
            SpacyPipeline.MD_PIPELINE,
            SpacyPipeline.LG_PIPELINE,
            SpacyPipeline.TRF_PIPELINE,
        ]
    },
    LangCode.KOREAN: {
        SpacyGenre.CORE_NEWS: [
            SpacyPipeline.SM_PIPELINE,
            SpacyPipeline.MD_PIPELINE,
            SpacyPipeline.LG_PIPELINE,
        ]
    },
    LangCode.LITHUANIAN: {
        SpacyGenre.CORE_NEWS: [
            SpacyPipeline.SM_PIPELINE,
            SpacyPipeline.MD_PIPELINE,
            SpacyPipeline.LG_PIPELINE,
        ]
    },
    LangCode.MACEDONIAN: {
        SpacyGenre.CORE_NEWS: [
            SpacyPipeline.SM_PIPELINE,
            SpacyPipeline.MD_PIPELINE,
            SpacyPipeline.LG_PIPELINE,
        ]
    },
    LangCode.MULTILINGUAL: {SpacyGenre.SENT_UD: [SpacyPipeline.SM_PIPELINE]},
    LangCode.NORWEGIAN_BOKMAL: {
        SpacyGenre.CORE_NEWS: [
            SpacyPipeline.SM_PIPELINE,
            SpacyPipeline.MD_PIPELINE,
            SpacyPipeline.LG_PIPELINE,
        ]
    },
    LangCode.POLISH: {
        SpacyGenre.CORE_NEWS: [
            SpacyPipeline.SM_PIPELINE,
            SpacyPipeline.MD_PIPELINE,
            SpacyPipeline.LG_PIPELINE,
        ]
    },
    LangCode.PORTUGUESE: {
        SpacyGenre.CORE_NEWS: [
            SpacyPipeline.SM_PIPELINE,
            SpacyPipeline.MD_PIPELINE,
            SpacyPipeline.LG_PIPELINE,
        ]
    },
    LangCode.ROMANIAN: {
        SpacyGenre.CORE_NEWS: [
            SpacyPipeline.SM_PIPELINE,
            SpacyPipeline.MD_PIPELINE,
            SpacyPipeline.LG_PIPELINE,
        ]
    },
    LangCode.RUSSIAN: {
        SpacyGenre.CORE_NEWS: [
            SpacyPipeline.SM_PIPELINE,
            SpacyPipeline.MD_PIPELINE,
            SpacyPipeline.LG_PIPELINE,
        ]
    },
    LangCode.SPANISH: {
        SpacyGenre.CORE_NEWS: [
            SpacyPipeline.SM_PIPELINE,
            SpacyPipeline.MD_PIPELINE,
            SpacyPipeline.LG_PIPELINE,
        ],
        SpacyGenre.DEP_NEWS: [SpacyPipeline.TRF_PIPELINE],
    },
    LangCode.SWEDISH: {
        SpacyGenre.CORE_NEWS: [
            SpacyPipeline.SM_PIPELINE,
            SpacyPipeline.MD_PIPELINE,
            SpacyPipeline.LG_PIPELINE,
        ]
    },
}

CONTENTS_SEPARATOR = "\n§\n"
URLS_SEPARATOR = "\n\n±\n\n"
