testcase
This commit is contained in:
6
testcase_paraminer/__main__.py
Normal file
6
testcase_paraminer/__main__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
import asyncio
|
||||
|
||||
from .main import run_parse
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(run_parse())
|
41
testcase_paraminer/main.py
Normal file
41
testcase_paraminer/main.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import asyncio
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from . import parser, writer
|
||||
|
||||
|
||||
async def parser_task(file: Path):
|
||||
soup = await parser.read_html(file)
|
||||
data = await parser.parse_html(soup)
|
||||
return data
|
||||
|
||||
|
||||
async def run_parse():
|
||||
start_time = time.perf_counter()
|
||||
tasks = []
|
||||
data_path = Path.cwd().joinpath("testcase_paraminer").joinpath("data")
|
||||
output_path = Path.cwd().joinpath("testcase_paraminer").joinpath("output")
|
||||
files = sorted(data_path.glob("*.html"))
|
||||
|
||||
for file in files:
|
||||
tasks.append(asyncio.create_task(parser_task(file), name=str(file.name)))
|
||||
|
||||
print(f" --- Total tasks: {len(tasks)} --- Starting")
|
||||
for task in tasks:
|
||||
try:
|
||||
task_result = await task
|
||||
task_status = await writer.write_docx(
|
||||
filename=output_path.joinpath(f"{task.get_name().split(".")[0]}.docx"),
|
||||
data=task_result,
|
||||
)
|
||||
|
||||
print(f" --- {task.get_name()} --- {task_status}")
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
print(f" --- Elapsed time: {time.perf_counter() - start_time:.1f} seconds --- ")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(run_parse())
|
BIN
testcase_paraminer/output/data1.docx
Normal file
BIN
testcase_paraminer/output/data1.docx
Normal file
Binary file not shown.
BIN
testcase_paraminer/output/data2.docx
Normal file
BIN
testcase_paraminer/output/data2.docx
Normal file
Binary file not shown.
BIN
testcase_paraminer/output/data3.docx
Normal file
BIN
testcase_paraminer/output/data3.docx
Normal file
Binary file not shown.
89
testcase_paraminer/parser.py
Normal file
89
testcase_paraminer/parser.py
Normal file
@@ -0,0 +1,89 @@
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import aiofiles
|
||||
from bs4 import BeautifulSoup, NavigableString, Tag
|
||||
|
||||
|
||||
async def read_html(filename: Path) -> Tag | NavigableString | None:
|
||||
async with aiofiles.open(filename, "r") as f:
|
||||
html = await f.read()
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
return soup.find("div", {"id": "premain"})
|
||||
|
||||
|
||||
async def parse_html(data: Tag | NavigableString | None) -> dict[Any, Any]:
|
||||
result = {"f_x": await parse_x_coefficents(data)}
|
||||
result["tables"] = await parse_tables(data)
|
||||
|
||||
return await modify_data(result)
|
||||
|
||||
|
||||
async def parse_x_coefficents(data: Tag | NavigableString | None) -> list[Any]:
|
||||
if data is None:
|
||||
return []
|
||||
|
||||
f_x = []
|
||||
b_el = data.find("b", string="Симплекс-метод")
|
||||
|
||||
for _ in range(6):
|
||||
if b_el is not None:
|
||||
b_el = b_el.next_sibling
|
||||
|
||||
if b_el is not None:
|
||||
f_x = ["".join(list(b_el.text.split()[-1])[:-1])]
|
||||
b_el = b_el.next_sibling
|
||||
|
||||
while b_el is not None and b_el.text != " при следующих условиях-ограничений.":
|
||||
if b_el.text.endswith("x"):
|
||||
f_x.append((b_el.text[:-1]).replace("+", ""))
|
||||
|
||||
b_el = b_el.next_sibling
|
||||
|
||||
return f_x
|
||||
|
||||
|
||||
async def parse_tables(data: Tag | NavigableString | None) -> dict[int, list[Any]]:
|
||||
if data is None:
|
||||
return {}
|
||||
|
||||
tables = data.find_all("table", {"class": "table-bordered"})
|
||||
result = {}
|
||||
table_num = 0
|
||||
for table_idx in range(len(tables)):
|
||||
marked_ceil = False
|
||||
tmp_table = []
|
||||
for rows in tables[table_idx].find_all("tr"):
|
||||
tmp_row = []
|
||||
for col in rows.find_all("td"):
|
||||
if col.has_attr("bgcolor"):
|
||||
marked_ceil = True
|
||||
tmp_row.append((col.text, col["bgcolor"]))
|
||||
else:
|
||||
tmp_row.append(col.text)
|
||||
tmp_table.append(tmp_row)
|
||||
|
||||
if marked_ceil or table_idx == len(tables) - 1:
|
||||
result[table_num] = tmp_table
|
||||
|
||||
table_num += 1
|
||||
|
||||
return result
|
||||
|
||||
|
||||
async def modify_data(data: dict[Any, Any]) -> dict[Any, Any]:
|
||||
for table in data["tables"].values():
|
||||
first_row = ["", "C", "-"] + data["f_x"]
|
||||
for _ in range(len(table[0]) - len(first_row) + 1):
|
||||
first_row.append("0")
|
||||
table.insert(0, first_row)
|
||||
|
||||
for i in range(1, len(table)):
|
||||
f_ceil = table[i][0]
|
||||
(
|
||||
table[i].insert(0, "")
|
||||
if not f_ceil.startswith("x")
|
||||
else table[i].insert(0, first_row[int(f_ceil[1:]) + 2])
|
||||
)
|
||||
|
||||
return data
|
26
testcase_paraminer/writer.py
Normal file
26
testcase_paraminer/writer.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from docx import Document
|
||||
from docx.shared import RGBColor
|
||||
|
||||
|
||||
async def write_docx(filename: Path, data: dict[str, Any]) -> str:
|
||||
doc = Document()
|
||||
try:
|
||||
for idx, table in data["tables"].items():
|
||||
doc.add_paragraph(f"Table {idx}")
|
||||
table = doc.add_table(rows=len(table), cols=len(table[0]))
|
||||
|
||||
for i, row in enumerate(data["tables"][idx]):
|
||||
for j, col in enumerate(row):
|
||||
table.cell(i, j).text = col if type(col) is str else str(col[0])
|
||||
if isinstance(col, tuple):
|
||||
table.cell(i, j).paragraphs[0].runs[0].font.color.rgb = (
|
||||
RGBColor(255, 0, 0)
|
||||
)
|
||||
|
||||
doc.save(str(filename))
|
||||
return "Ok"
|
||||
except Exception as e:
|
||||
return str(e)
|
Reference in New Issue
Block a user