68 lines
1.4 KiB
TypeScript
68 lines
1.4 KiB
TypeScript
import * as cheerio from "cheerio";
|
|
|
|
export type ParsedRow = Record<string, string>;
|
|
|
|
export type ParsedTables = ParsedRow[];
|
|
|
|
function normalizeText(value: string): string {
|
|
return value.replace(/\s+/g, " ").trim();
|
|
}
|
|
|
|
export function parseTableRows(tableHtml: string, site?: string): ParsedRow[] {
|
|
const $ = cheerio.load(tableHtml);
|
|
|
|
const headers: string[] = [];
|
|
$("thead th").each((_: any, element: any) => {
|
|
headers.push(normalizeText($(element).text()));
|
|
});
|
|
|
|
const rows: ParsedRow[] = [];
|
|
|
|
$("tbody tr").each((_, row) => {
|
|
const cells: string[] = [];
|
|
$(row)
|
|
.find("td")
|
|
.each((__, cell) => {
|
|
cells.push(normalizeText($(cell).text()));
|
|
});
|
|
|
|
const parsedRow: ParsedRow = {};
|
|
|
|
headers.forEach((header, index) => {
|
|
if (!header) {
|
|
return;
|
|
}
|
|
|
|
parsedRow[header] = cells[index] ?? "";
|
|
});
|
|
|
|
if (site) {
|
|
parsedRow.site = site;
|
|
}
|
|
|
|
rows.push(parsedRow);
|
|
});
|
|
|
|
return rows;
|
|
}
|
|
|
|
export function parseCtmListTable(html: string) {
|
|
const $ = cheerio.load(html);
|
|
|
|
const tables = $("table[name='ctm_list_tbl']").toArray();
|
|
if (tables.length === 0) {
|
|
return [];
|
|
}
|
|
|
|
return tables.flatMap((table, index) => {
|
|
const site = `H${index + 1}`;
|
|
const tableHtml = $.html(table);
|
|
|
|
if (!tableHtml) {
|
|
return [];
|
|
}
|
|
|
|
return parseTableRows(tableHtml, site);
|
|
});
|
|
}
|