import * as cheerio from "cheerio"; export type ParsedRow = Record; export type ParsedTables = ParsedRow[]; function normalizeText(value: string): string { return value.replace(/\s+/g, " ").trim(); } export function parseTableRows(tableHtml: string, site?: string): ParsedRow[] { const $ = cheerio.load(tableHtml); const headers: string[] = []; $("thead th").each((_: any, element: any) => { headers.push(normalizeText($(element).text())); }); const rows: ParsedRow[] = []; $("tbody tr").each((_, row) => { const cells: string[] = []; $(row) .find("td") .each((__, cell) => { cells.push(normalizeText($(cell).text())); }); const parsedRow: ParsedRow = {}; headers.forEach((header, index) => { if (!header) { return; } parsedRow[header] = cells[index] ?? ""; }); if (site) { parsedRow.site = site; } rows.push(parsedRow); }); return rows; } export function parseCtmListTable(html: string) { const $ = cheerio.load(html); const tables = $("table[name='ctm_list_tbl']").toArray(); if (tables.length === 0) { return []; } return tables.flatMap((table, index) => { const site = `H${index + 1}`; const tableHtml = $.html(table); if (!tableHtml) { return []; } return parseTableRows(tableHtml, site); }); }