from pathlib import Path from typing import Any import aiofiles from bs4 import BeautifulSoup, NavigableString, Tag async def read_html(filename: Path) -> Tag | NavigableString | None: async with aiofiles.open(filename, "r") as f: html = await f.read() soup = BeautifulSoup(html, "html.parser") return soup.find("div", {"id": "premain"}) async def parse_html(data: Tag | NavigableString | None) -> dict[Any, Any]: result = {"f_x": await parse_x_coefficents(data)} result["tables"] = await parse_tables(data) return await modify_data(result) async def parse_x_coefficents(data: Tag | NavigableString | None) -> list[Any]: if data is None: return [] f_x = [] b_el = data.find("b", string="Симплекс-метод") for _ in range(6): if b_el is not None: b_el = b_el.next_sibling if b_el is not None: f_x = ["".join(list(b_el.text.split()[-1])[:-1])] b_el = b_el.next_sibling while b_el is not None and b_el.text != " при следующих условиях-ограничений.": if b_el.text.endswith("x"): f_x.append((b_el.text[:-1]).replace("+", "")) b_el = b_el.next_sibling return f_x async def parse_tables(data: Tag | NavigableString | None) -> dict[int, list[Any]]: if data is None: return {} tables = data.find_all("table", {"class": "table-bordered"}) result = {} table_num = 0 for table_idx in range(len(tables)): marked_ceil = False tmp_table = [] for rows in tables[table_idx].find_all("tr"): tmp_row = [] for col in rows.find_all("td"): if col.has_attr("bgcolor"): marked_ceil = True tmp_row.append((col.text, col["bgcolor"])) else: tmp_row.append(col.text) tmp_table.append(tmp_row) if marked_ceil or table_idx == len(tables) - 1: result[table_num] = tmp_table table_num += 1 return result async def modify_data(data: dict[Any, Any]) -> dict[Any, Any]: for table in data["tables"].values(): first_row = ["", "C", "-"] + data["f_x"] for _ in range(len(table[0]) - len(first_row) + 1): first_row.append("0") table.insert(0, first_row) for i in range(1, len(table)): f_ceil = table[i][0] ( table[i].insert(0, "") if not f_ceil.startswith("x") else table[i].insert(0, first_row[int(f_ceil[1:]) + 2]) ) return data