testcase_Paraminer/testcase_paraminer/main.py

42 lines
1.1 KiB
Python
Raw Permalink Normal View History

2024-07-09 06:50:23 +03:00
import asyncio
import time
from pathlib import Path
from . import parser, writer
async def parser_task(file: Path):
soup = await parser.read_html(file)
data = await parser.parse_html(soup)
return data
async def run_parse():
start_time = time.perf_counter()
tasks = []
data_path = Path.cwd().joinpath("testcase_paraminer").joinpath("data")
output_path = Path.cwd().joinpath("testcase_paraminer").joinpath("output")
files = sorted(data_path.glob("*.html"))
for file in files:
tasks.append(asyncio.create_task(parser_task(file), name=str(file.name)))
print(f" --- Total tasks: {len(tasks)} --- Starting")
for task in tasks:
try:
task_result = await task
task_status = await writer.write_docx(
filename=output_path.joinpath(f"{task.get_name().split(".")[0]}.docx"),
data=task_result,
)
print(f" --- {task.get_name()} --- {task_status}")
except Exception as e:
print(e)
print(f" --- Elapsed time: {time.perf_counter() - start_time:.1f} seconds --- ")
if __name__ == "__main__":
asyncio.run(run_parse())