testcase_Paraminer/testcase_paraminer/parser.py

90 lines
2.6 KiB
Python
Raw Permalink Normal View History

2024-07-09 06:50:23 +03:00
from pathlib import Path
from typing import Any
import aiofiles
from bs4 import BeautifulSoup, NavigableString, Tag
async def read_html(filename: Path) -> Tag | NavigableString | None:
async with aiofiles.open(filename, "r") as f:
html = await f.read()
soup = BeautifulSoup(html, "html.parser")
return soup.find("div", {"id": "premain"})
async def parse_html(data: Tag | NavigableString | None) -> dict[Any, Any]:
result = {"f_x": await parse_x_coefficents(data)}
result["tables"] = await parse_tables(data)
return await modify_data(result)
async def parse_x_coefficents(data: Tag | NavigableString | None) -> list[Any]:
if data is None:
return []
f_x = []
b_el = data.find("b", string="Симплекс-метод")
for _ in range(6):
if b_el is not None:
b_el = b_el.next_sibling
if b_el is not None:
f_x = ["".join(list(b_el.text.split()[-1])[:-1])]
b_el = b_el.next_sibling
while b_el is not None and b_el.text != " при следующих условиях-ограничений.":
if b_el.text.endswith("x"):
f_x.append((b_el.text[:-1]).replace("+", ""))
b_el = b_el.next_sibling
return f_x
async def parse_tables(data: Tag | NavigableString | None) -> dict[int, list[Any]]:
if data is None:
return {}
tables = data.find_all("table", {"class": "table-bordered"})
result = {}
table_num = 0
for table_idx in range(len(tables)):
marked_ceil = False
tmp_table = []
for rows in tables[table_idx].find_all("tr"):
tmp_row = []
for col in rows.find_all("td"):
if col.has_attr("bgcolor"):
marked_ceil = True
tmp_row.append((col.text, col["bgcolor"]))
else:
tmp_row.append(col.text)
tmp_table.append(tmp_row)
if marked_ceil or table_idx == len(tables) - 1:
result[table_num] = tmp_table
table_num += 1
return result
async def modify_data(data: dict[Any, Any]) -> dict[Any, Any]:
for table in data["tables"].values():
first_row = ["", "C", "-"] + data["f_x"]
for _ in range(len(table[0]) - len(first_row) + 1):
first_row.append("0")
table.insert(0, first_row)
for i in range(1, len(table)):
f_ceil = table[i][0]
(
table[i].insert(0, "")
if not f_ceil.startswith("x")
else table[i].insert(0, first_row[int(f_ceil[1:]) + 2])
)
return data