42 lines
1.1 KiB
Python
42 lines
1.1 KiB
Python
|
import asyncio
|
||
|
import time
|
||
|
from pathlib import Path
|
||
|
|
||
|
from . import parser, writer
|
||
|
|
||
|
|
||
|
async def parser_task(file: Path):
|
||
|
soup = await parser.read_html(file)
|
||
|
data = await parser.parse_html(soup)
|
||
|
return data
|
||
|
|
||
|
|
||
|
async def run_parse():
|
||
|
start_time = time.perf_counter()
|
||
|
tasks = []
|
||
|
data_path = Path.cwd().joinpath("testcase_paraminer").joinpath("data")
|
||
|
output_path = Path.cwd().joinpath("testcase_paraminer").joinpath("output")
|
||
|
files = sorted(data_path.glob("*.html"))
|
||
|
|
||
|
for file in files:
|
||
|
tasks.append(asyncio.create_task(parser_task(file), name=str(file.name)))
|
||
|
|
||
|
print(f" --- Total tasks: {len(tasks)} --- Starting")
|
||
|
for task in tasks:
|
||
|
try:
|
||
|
task_result = await task
|
||
|
task_status = await writer.write_docx(
|
||
|
filename=output_path.joinpath(f"{task.get_name().split(".")[0]}.docx"),
|
||
|
data=task_result,
|
||
|
)
|
||
|
|
||
|
print(f" --- {task.get_name()} --- {task_status}")
|
||
|
except Exception as e:
|
||
|
print(e)
|
||
|
|
||
|
print(f" --- Elapsed time: {time.perf_counter() - start_time:.1f} seconds --- ")
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
asyncio.run(run_parse())
|