2 changed files with 75 additions and 67 deletions
@ -0,0 +1,75 @@ |
|||||
|
#!/usr/bin/env python |
||||
|
|
||||
|
import os |
||||
|
import asyncio as aio |
||||
|
import requests |
||||
|
import pandas as pd |
||||
|
from iso639 import languages |
||||
|
import aiohttp |
||||
|
from asyncio_pool import AioPool |
||||
|
|
||||
|
API_URL = "http://localhost:8000" |
||||
|
|
||||
|
|
||||
|
def getSupportedLanguages(): |
||||
|
allLangs = requests.get(API_URL + "/api/language/predict").json() |
||||
|
allLangs = allLangs["supported_languages"] |
||||
|
return allLangs |
||||
|
|
||||
|
|
||||
|
def getTestData(): |
||||
|
cacheFile = "./test.csv" |
||||
|
testDataUrl = "https://huggingface.co/datasets/papluca/language-identification/raw/main/test.csv" |
||||
|
|
||||
|
supportedLangs = getSupportedLanguages() |
||||
|
if os.path.exists(cacheFile): |
||||
|
data = pd.read_csv(cacheFile) |
||||
|
else: |
||||
|
data = pd.read_csv(testDataUrl) |
||||
|
data.to_csv(cacheFile) |
||||
|
data.rename(columns={"labels": "language", "text": "Text"}, inplace=True) |
||||
|
data["language"] = data["language"].apply(lambda code: languages.part1[code].name) |
||||
|
data = data[data["language"].isin(supportedLangs)] |
||||
|
return data |
||||
|
|
||||
|
|
||||
|
async def task(row): |
||||
|
i = row[0] |
||||
|
row = row[1] |
||||
|
url = API_URL + "/api/language/predict" |
||||
|
body = {"text": row["Text"]} |
||||
|
|
||||
|
async with aiohttp.ClientSession() as session: |
||||
|
async with session.post(url, json=body) as r: |
||||
|
json_body = await r.json() |
||||
|
return [json_body, row] |
||||
|
|
||||
|
|
||||
|
async def evaluateAccuracy(): |
||||
|
data = getTestData() |
||||
|
stats = {} |
||||
|
failed = [] |
||||
|
pool = AioPool(20) |
||||
|
|
||||
|
results = await pool.map(task, data.iterrows()) |
||||
|
print("complted %d requests " % len(data)) |
||||
|
for [resp_body, row] in results: |
||||
|
lang = row["language"] |
||||
|
if lang not in stats: |
||||
|
stats[lang] = {"total": 0, "failed": 0} |
||||
|
if not resp_body["lang"] == lang: |
||||
|
stats[lang]["failed"] += 1 |
||||
|
failed.append(row) |
||||
|
|
||||
|
stats[lang]["total"] += 1 |
||||
|
|
||||
|
stats = pd.DataFrame(stats).T |
||||
|
stats["success_percent"] = (1 - (stats["failed"] / stats["total"])) * 100.0 |
||||
|
stats.sort_values("success_percent", ascending=False, inplace=True) |
||||
|
print(stats) |
||||
|
return stats |
||||
|
|
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
loop = aio.new_event_loop() |
||||
|
loop.run_until_complete(evaluateAccuracy()) |
Loading…
Reference in new issue