2 changed files with 75 additions and 67 deletions
@ -0,0 +1,75 @@ |
|||
#!/usr/bin/env python |
|||
|
|||
import os |
|||
import asyncio as aio |
|||
import requests |
|||
import pandas as pd |
|||
from iso639 import languages |
|||
import aiohttp |
|||
from asyncio_pool import AioPool |
|||
|
|||
API_URL = "http://localhost:8000" |
|||
|
|||
|
|||
def getSupportedLanguages(): |
|||
allLangs = requests.get(API_URL + "/api/language/predict").json() |
|||
allLangs = allLangs["supported_languages"] |
|||
return allLangs |
|||
|
|||
|
|||
def getTestData(): |
|||
cacheFile = "./test.csv" |
|||
testDataUrl = "https://huggingface.co/datasets/papluca/language-identification/raw/main/test.csv" |
|||
|
|||
supportedLangs = getSupportedLanguages() |
|||
if os.path.exists(cacheFile): |
|||
data = pd.read_csv(cacheFile) |
|||
else: |
|||
data = pd.read_csv(testDataUrl) |
|||
data.to_csv(cacheFile) |
|||
data.rename(columns={"labels": "language", "text": "Text"}, inplace=True) |
|||
data["language"] = data["language"].apply(lambda code: languages.part1[code].name) |
|||
data = data[data["language"].isin(supportedLangs)] |
|||
return data |
|||
|
|||
|
|||
async def task(row): |
|||
i = row[0] |
|||
row = row[1] |
|||
url = API_URL + "/api/language/predict" |
|||
body = {"text": row["Text"]} |
|||
|
|||
async with aiohttp.ClientSession() as session: |
|||
async with session.post(url, json=body) as r: |
|||
json_body = await r.json() |
|||
return [json_body, row] |
|||
|
|||
|
|||
async def evaluateAccuracy(): |
|||
data = getTestData() |
|||
stats = {} |
|||
failed = [] |
|||
pool = AioPool(20) |
|||
|
|||
results = await pool.map(task, data.iterrows()) |
|||
print("complted %d requests " % len(data)) |
|||
for [resp_body, row] in results: |
|||
lang = row["language"] |
|||
if lang not in stats: |
|||
stats[lang] = {"total": 0, "failed": 0} |
|||
if not resp_body["lang"] == lang: |
|||
stats[lang]["failed"] += 1 |
|||
failed.append(row) |
|||
|
|||
stats[lang]["total"] += 1 |
|||
|
|||
stats = pd.DataFrame(stats).T |
|||
stats["success_percent"] = (1 - (stats["failed"] / stats["total"])) * 100.0 |
|||
stats.sort_values("success_percent", ascending=False, inplace=True) |
|||
print(stats) |
|||
return stats |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
loop = aio.new_event_loop() |
|||
loop.run_until_complete(evaluateAccuracy()) |
Loading…
Reference in new issue