import axios, { type AxiosRequestConfig, type AxiosResponse, type RawAxiosResponseHeaders, } from "axios"; import * as cheerio from "cheerio"; import { createHash } from "node:crypto"; import { mkdir, writeFile } from "node:fs/promises"; import path from "node:path"; const DEFAULT_BASE_URLS = ["https://hyun.bizmax.net", "https://hyun2.bizmax.net"]; const BASE_URLS = ( process.env.BIZMAX_BASE_URLS ?? process.env.BIZMAX_BASE_URL ?? DEFAULT_BASE_URLS.join(",") ) .split(",") .map((value) => value.trim()) .filter(Boolean); const LOGIN_ID = process.env.BIZMAX_ID; const LOGIN_PASSWORD = process.env.BIZMAX_PASSWORD; const OUTPUT_DIR = process.env.BIZMAX_OUTPUT_DIR ?? path.resolve(process.cwd(), "output"); const CUSTOMER_LIST_PATH = "/customer/tpl/customer_list.php"; const RANGE_START = process.env.BIZMAX_RANGE_START ?? "2016-01-01"; const BATCH_MONTHS = Number(process.env.BIZMAX_BATCH_MONTHS ?? "3"); type LoginResponse = { ret_code?: number; ret_msg?: string; login_succ?: string | number; u_sms_a?: string; f_url?: string; lt?: string; }; class SimpleCookieJar { private readonly cookies = new Map(); addFromHeaders(setCookie: string[] = []): void { for (const cookieLine of setCookie) { const [pair] = cookieLine.split(";", 1); if (!pair) { continue; } const eqIndex = pair.indexOf("="); if (eqIndex <= 0) { continue; } const name = pair.slice(0, eqIndex).trim(); const value = pair.slice(eqIndex + 1).trim(); this.cookies.set(name, value); } } toHeader(): string { return Array.from(this.cookies.entries()) .map(([name, value]) => `${name}=${value}`) .join("; "); } } function md5(value: string): string { return createHash("md5").update(value).digest("hex"); } function getRequiredEnv(name: string, value: string | undefined): string { if (!value) { throw new Error(`Missing required environment variable: ${name}`); } return value; } function getSetCookieHeaders(headers: RawAxiosResponseHeaders | Record): string[] { const headerValue = headers["set-cookie"]; if (Array.isArray(headerValue)) { return headerValue.filter((item): item is string => typeof item === "string"); } return []; } async function request( jar: SimpleCookieJar, config: AxiosRequestConfig, ): Promise> { const headers: Record = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36", ...(config.headers as Record | undefined), }; const cookieHeader = jar.toHeader(); if (cookieHeader) { headers.Cookie = cookieHeader; } const response = await axios({ ...config, headers, validateStatus: () => true, maxRedirects: 0, }); jar.addFromHeaders(getSetCookieHeaders(response.headers)); if (response.status >= 400) { throw new Error(`Request failed: ${config.method ?? "GET"} ${config.url} -> ${response.status}`); } return response; } function absoluteUrl(baseUrl: string, urlOrPath: string): string { return new URL(urlOrPath, baseUrl).toString(); } function parseLoginPage(html: string): { appName: string; authStep: string } { const $ = cheerio.load(html); const appName = $('input[name="app_name"]').val()?.toString() ?? "Chrome"; const authStep = $('input[name="auth_step"]').val()?.toString() ?? "1"; return { appName, authStep }; } function parseExcelForm(html: string): { action: string; fields: Record } { const $ = cheerio.load(html); const form = $('form[name="excel_form"]'); if (!form.length) { throw new Error("excel_form was not found in the customer list page"); } const action = form.attr("action") || (() => { const tpl = form.attr("ctm_tpl_excel") || "customer_excel.php"; return `/customer/tpl/${tpl}`; })(); if (!action) { throw new Error("excel_form action was not found"); } const fields: Record = {}; form.find("input[name]").each((_, element) => { const input = $(element); const name = input.attr("name"); if (!name) { return; } fields[name] = input.val()?.toString() ?? ""; }); return { action, fields }; } function parseTotalCount(html: string): number { const $ = cheerio.load(html); const explicitValue = $('form[name="excel_form"] input[name="ctm_list_total_cnt"]').val()?.toString() ?? $('form[name="search_form"] input[name="ctm_list_total_cnt"]').val()?.toString(); if (explicitValue) { const parsed = Number(explicitValue.replace(/,/g, "")); if (!Number.isNaN(parsed)) { return parsed; } } const bodyText = $.text(); const match = bodyText.match(/Total\s*[::]\s*([\d,]+)건/i); if (match) { const parsed = Number(match[1].replace(/,/g, "")); if (!Number.isNaN(parsed)) { return parsed; } } return 0; } function parseSearchForm(html: string): Record { const $ = cheerio.load(html); const form = $('form[name="search_form"]'); if (!form.length) { throw new Error("search_form was not found in the customer list page"); } const fields: Record = {}; form.find("input[name], select[name], textarea[name]").each((_, element) => { const input = $(element); const name = input.attr("name"); if (!name) { return; } const tagName = element.tagName.toLowerCase(); const type = (input.attr("type") ?? "").toLowerCase(); if (type === "checkbox" || type === "radio") { if (!input.is(":checked")) { return; } fields[name] = input.val()?.toString() ?? "on"; return; } if (tagName === "select") { fields[name] = input.find("option[selected]").val()?.toString() ?? input.val()?.toString() ?? ""; return; } fields[name] = input.val()?.toString() ?? ""; }); return fields; } function buildFormBody(fields: Record): URLSearchParams { const params = new URLSearchParams(); for (const [name, value] of Object.entries(fields)) { params.set(name, value); } return params; } function getFileNameFromDisposition(contentDisposition: string | undefined): string | null { if (!contentDisposition) { return null; } const decodeFileName = (value: string): string => { try { return decodeURIComponent(value); } catch { return value; } }; const utf8Match = contentDisposition.match(/filename\*=UTF-8''([^;]+)/i); if (utf8Match) { return decodeFileName(utf8Match[1]); } const basicMatch = contentDisposition.match(/filename="?([^"]+)"?/i); if (basicMatch) { return decodeFileName(basicMatch[1]); } return null; } function toYmd(date: Date): string { return date.toISOString().slice(0, 10); } function addMonths(date: Date, months: number): Date { return new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth() + months, date.getUTCDate())); } function addDays(date: Date, days: number): Date { return new Date(date.getTime() + days * 24 * 60 * 60 * 1000); } function minDate(left: Date, right: Date): Date { return left.getTime() <= right.getTime() ? left : right; } function buildQuarterRanges(startYmd: string, batchMonths: number): Array<{ start: string; end: string }> { const startDate = new Date(`${startYmd}T00:00:00Z`); const today = new Date(); const endDate = new Date(Date.UTC(today.getUTCFullYear(), today.getUTCMonth(), today.getUTCDate())); if (Number.isNaN(startDate.getTime())) { throw new Error(`Invalid BIZMAX_RANGE_START: ${startYmd}`); } if (!Number.isInteger(batchMonths) || batchMonths <= 0) { throw new Error(`Invalid BIZMAX_BATCH_MONTHS: ${batchMonths}`); } const ranges: Array<{ start: string; end: string }> = []; let rangeStart = startDate; while (rangeStart.getTime() <= endDate.getTime()) { const nextRangeStart = addMonths(rangeStart, batchMonths); const rangeEnd = minDate(addDays(nextRangeStart, -1), endDate); ranges.push({ start: toYmd(rangeStart), end: toYmd(rangeEnd) }); rangeStart = nextRangeStart; } return ranges; } async function main(): Promise { const loginId = getRequiredEnv("BIZMAX_ID", LOGIN_ID); const loginPassword = getRequiredEnv("BIZMAX_PASSWORD", LOGIN_PASSWORD); await mkdir(OUTPUT_DIR, { recursive: true }); const ranges = buildQuarterRanges(RANGE_START, BATCH_MONTHS); for (const baseUrl of BASE_URLS) { const jar = new SimpleCookieJar(); const siteSlug = new URL(baseUrl).hostname.replace(/\./g, "_"); const loginPageResponse = await request(jar, { method: "GET", url: absoluteUrl(baseUrl, "/"), responseType: "text", }); const { appName, authStep } = parseLoginPage(loginPageResponse.data); const loginBody = new URLSearchParams({ from: "pc", nhoj: loginId, mluap: md5(loginPassword), app_name: appName, captcha_str: "", auth_step: authStep, force_sms: "n", }); const loginResponse = await request(jar, { method: "POST", url: absoluteUrl(baseUrl, "/login_pcs_jr.php"), data: loginBody.toString(), responseType: "json", headers: { "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", Accept: "application/json, text/javascript, */*; q=0.01", Referer: absoluteUrl(baseUrl, "/"), Origin: baseUrl, "X-Requested-With": "XMLHttpRequest", }, }); const loginData = loginResponse.data; if (loginData.u_sms_a === "y") { throw new Error(`[${baseUrl}] This account requires SMS verification. Axios-only automation cannot finish that step.`); } if (`${loginData.login_succ ?? ""}` !== "1" || !loginData.f_url) { throw new Error(`[${baseUrl}] Login failed: ${loginData.ret_msg ?? "unknown error"}`); } const mainPageUrl = absoluteUrl(baseUrl, loginData.f_url); await request(jar, { method: "GET", url: mainPageUrl, responseType: "text", headers: { Referer: absoluteUrl(baseUrl, "/"), }, }); const customerListResponse = await request(jar, { method: "POST", url: absoluteUrl(baseUrl, CUSTOMER_LIST_PATH), data: new URLSearchParams({ _ihr: "n", _ihl: "n", _content_only: "n", site_gubun: "pc", }).toString(), responseType: "text", headers: { "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", Referer: mainPageUrl, Origin: baseUrl, "X-Requested-With": "XMLHttpRequest", }, }); const baseSearchFields = parseSearchForm(customerListResponse.data); for (const range of ranges) { const searchFields = { ...baseSearchFields, _ihr: "n", _ihl: "n", _content_only: "n", site_gubun: "pc", mode: "search", page: "", use_date_1: "a.reg_date", use_date_1_src: "a.reg_date", use_date_1_dsp: "- 접수일자", start_date_1: range.start, end_date_1: range.end, }; const rangedCustomerListResponse = await request(jar, { method: "POST", url: absoluteUrl(baseUrl, CUSTOMER_LIST_PATH), data: buildFormBody(searchFields).toString(), responseType: "text", headers: { "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", Referer: mainPageUrl, Origin: baseUrl, "X-Requested-With": "XMLHttpRequest", }, }); const totalCount = parseTotalCount(rangedCustomerListResponse.data); if (totalCount === 0) { process.stdout.write(`[${siteSlug}] SKIP ${range.start}~${range.end} (0건)\n`); continue; } const { action, fields } = parseExcelForm(rangedCustomerListResponse.data); fields.e_type = "1"; const excelResponse = await request(jar, { method: "POST", url: absoluteUrl(baseUrl, action), data: buildFormBody(fields).toString(), responseType: "arraybuffer", headers: { "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", Referer: absoluteUrl(baseUrl, CUSTOMER_LIST_PATH), Origin: baseUrl, }, }); const contentDisposition = typeof excelResponse.headers["content-disposition"] === "string" ? excelResponse.headers["content-disposition"] : undefined; const fileName = getFileNameFromDisposition(contentDisposition) ?? `bizmax-customer-list-${range.start}-${range.end}.xlsx`; const ext = path.extname(fileName); const baseName = ext ? fileName.slice(0, -ext.length) : fileName; const rangedFileName = `${siteSlug}_${baseName}_${range.start}_${range.end}${ext || ".xlsx"}`; const outputPath = path.join(OUTPUT_DIR, rangedFileName); await writeFile(outputPath, Buffer.from(excelResponse.data)); process.stdout.write(`${outputPath}\n`); } } } main().catch((error: unknown) => { const message = error instanceof Error ? error.message : String(error); process.stderr.write(`${message}\n`); process.exitCode = 1; });