from typing import List

from unittest.mock import patch

from apadata.text_processors.target_industries_extractor import (
    TargetIndustriesExtractor,
)


@patch("apadata.api.clearbit.clearbit_api.ClearbitAPI.suggest")
@patch("apadata.api.clearbit.clearbit_api.ClearbitAPI.industry")
def test_target_industries_api_extractor(mock_industry, mock_suggest):
    mock_suggest.return_value = ["kpmg.us"]
    mock_industry.return_value = "Consulting"
    target_industries = ["Consulting"]
    company_names = ["KPMG"]
    extractor = TargetIndustriesExtractor()
    pred_target_industries = extractor.extract_target_industries_api(
        company_names=company_names
    )
    assert sorted(pred_target_industries) == sorted(target_industries)


def mock_web_content():
    return (
        "Accenture plc is an Irish-American[5][6] professional services company"
        "based in Dublin, specializing in information technology (IT) services and"
        "consulting. A Fortune Global 500 company,[7] it reported revenues of "
        "$61.6 billion in 2022.[2] Accenture's current clients include 91 of the"
        "Fortune Global 100 and more than three-quarters of the Fortune Global 500"
        "[8] As of 2022, Accenture is considered the largest consulting firm in the"
        "world by number of employees.[8][9]"
    )


def test_target_industries_mentions_extractor():
    target_industries: List[str] = ["Services", "Software & Internet"]
    web_content = mock_web_content()
    language = "en"
    extractor = TargetIndustriesExtractor()
    pred_target_industries = extractor.extract_target_industries_mentions(
        web_content=web_content, language=language
    )
    assert sorted(pred_target_industries) == sorted(target_industries)
