import asyncio import time from pathlib import Path from . import parser, writer async def parser_task(file: Path): soup = await parser.read_html(file) data = await parser.parse_html(soup) return data async def run_parse(): start_time = time.perf_counter() tasks = [] data_path = Path.cwd().joinpath("testcase_paraminer").joinpath("data") output_path = Path.cwd().joinpath("testcase_paraminer").joinpath("output") files = sorted(data_path.glob("*.html")) for file in files: tasks.append(asyncio.create_task(parser_task(file), name=str(file.name))) print(f" --- Total tasks: {len(tasks)} --- Starting") for task in tasks: try: task_result = await task task_status = await writer.write_docx( filename=output_path.joinpath(f"{task.get_name().split(".")[0]}.docx"), data=task_result, ) print(f" --- {task.get_name()} --- {task_status}") except Exception as e: print(e) print(f" --- Elapsed time: {time.perf_counter() - start_time:.1f} seconds --- ") if __name__ == "__main__": asyncio.run(run_parse())