Files
bizmax_downloader/bizmax-download-excel.ts
2026-04-13 11:25:40 +09:00

452 lines
13 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import axios, {
type AxiosRequestConfig,
type AxiosResponse,
type RawAxiosResponseHeaders,
} from "axios";
import * as cheerio from "cheerio";
import { createHash } from "node:crypto";
import { mkdir, writeFile } from "node:fs/promises";
import path from "node:path";
const DEFAULT_BASE_URLS = ["https://hyun.bizmax.net", "https://hyun2.bizmax.net"];
const BASE_URLS = (
process.env.BIZMAX_BASE_URLS ??
process.env.BIZMAX_BASE_URL ??
DEFAULT_BASE_URLS.join(",")
)
.split(",")
.map((value) => value.trim())
.filter(Boolean);
const LOGIN_ID = process.env.BIZMAX_ID;
const LOGIN_PASSWORD = process.env.BIZMAX_PASSWORD;
const OUTPUT_DIR = process.env.BIZMAX_OUTPUT_DIR ?? path.resolve(process.cwd(), "output");
const CUSTOMER_LIST_PATH = "/customer/tpl/customer_list.php";
const RANGE_START = process.env.BIZMAX_RANGE_START ?? "2016-01-01";
const BATCH_MONTHS = Number(process.env.BIZMAX_BATCH_MONTHS ?? "3");
type LoginResponse = {
ret_code?: number;
ret_msg?: string;
login_succ?: string | number;
u_sms_a?: string;
f_url?: string;
lt?: string;
};
class SimpleCookieJar {
private readonly cookies = new Map<string, string>();
addFromHeaders(setCookie: string[] = []): void {
for (const cookieLine of setCookie) {
const [pair] = cookieLine.split(";", 1);
if (!pair) {
continue;
}
const eqIndex = pair.indexOf("=");
if (eqIndex <= 0) {
continue;
}
const name = pair.slice(0, eqIndex).trim();
const value = pair.slice(eqIndex + 1).trim();
this.cookies.set(name, value);
}
}
toHeader(): string {
return Array.from(this.cookies.entries())
.map(([name, value]) => `${name}=${value}`)
.join("; ");
}
}
function md5(value: string): string {
return createHash("md5").update(value).digest("hex");
}
function getRequiredEnv(name: string, value: string | undefined): string {
if (!value) {
throw new Error(`Missing required environment variable: ${name}`);
}
return value;
}
function getSetCookieHeaders(headers: RawAxiosResponseHeaders | Record<string, unknown>): string[] {
const headerValue = headers["set-cookie"];
if (Array.isArray(headerValue)) {
return headerValue.filter((item): item is string => typeof item === "string");
}
return [];
}
async function request<T = unknown>(
jar: SimpleCookieJar,
config: AxiosRequestConfig,
): Promise<AxiosResponse<T>> {
const headers: Record<string, string> = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
...(config.headers as Record<string, string> | undefined),
};
const cookieHeader = jar.toHeader();
if (cookieHeader) {
headers.Cookie = cookieHeader;
}
const response = await axios<T>({
...config,
headers,
validateStatus: () => true,
maxRedirects: 0,
});
jar.addFromHeaders(getSetCookieHeaders(response.headers));
if (response.status >= 400) {
throw new Error(`Request failed: ${config.method ?? "GET"} ${config.url} -> ${response.status}`);
}
return response;
}
function absoluteUrl(baseUrl: string, urlOrPath: string): string {
return new URL(urlOrPath, baseUrl).toString();
}
function parseLoginPage(html: string): { appName: string; authStep: string } {
const $ = cheerio.load(html);
const appName = $('input[name="app_name"]').val()?.toString() ?? "Chrome";
const authStep = $('input[name="auth_step"]').val()?.toString() ?? "1";
return { appName, authStep };
}
function parseExcelForm(html: string): { action: string; fields: Record<string, string> } {
const $ = cheerio.load(html);
const form = $('form[name="excel_form"]');
if (!form.length) {
throw new Error("excel_form was not found in the customer list page");
}
const action =
form.attr("action") ||
(() => {
const tpl = form.attr("ctm_tpl_excel") || "customer_excel.php";
return `/customer/tpl/${tpl}`;
})();
if (!action) {
throw new Error("excel_form action was not found");
}
const fields: Record<string, string> = {};
form.find("input[name]").each((_, element) => {
const input = $(element);
const name = input.attr("name");
if (!name) {
return;
}
fields[name] = input.val()?.toString() ?? "";
});
return { action, fields };
}
function parseTotalCount(html: string): number {
const $ = cheerio.load(html);
const explicitValue =
$('form[name="excel_form"] input[name="ctm_list_total_cnt"]').val()?.toString() ??
$('form[name="search_form"] input[name="ctm_list_total_cnt"]').val()?.toString();
if (explicitValue) {
const parsed = Number(explicitValue.replace(/,/g, ""));
if (!Number.isNaN(parsed)) {
return parsed;
}
}
const bodyText = $.text();
const match = bodyText.match(/Total\s*[:]\s*([\d,]+)건/i);
if (match) {
const parsed = Number(match[1].replace(/,/g, ""));
if (!Number.isNaN(parsed)) {
return parsed;
}
}
return 0;
}
function parseSearchForm(html: string): Record<string, string> {
const $ = cheerio.load(html);
const form = $('form[name="search_form"]');
if (!form.length) {
throw new Error("search_form was not found in the customer list page");
}
const fields: Record<string, string> = {};
form.find("input[name], select[name], textarea[name]").each((_, element) => {
const input = $(element);
const name = input.attr("name");
if (!name) {
return;
}
const tagName = element.tagName.toLowerCase();
const type = (input.attr("type") ?? "").toLowerCase();
if (type === "checkbox" || type === "radio") {
if (!input.is(":checked")) {
return;
}
fields[name] = input.val()?.toString() ?? "on";
return;
}
if (tagName === "select") {
fields[name] = input.find("option[selected]").val()?.toString() ?? input.val()?.toString() ?? "";
return;
}
fields[name] = input.val()?.toString() ?? "";
});
return fields;
}
function buildFormBody(fields: Record<string, string>): URLSearchParams {
const params = new URLSearchParams();
for (const [name, value] of Object.entries(fields)) {
params.set(name, value);
}
return params;
}
function getFileNameFromDisposition(contentDisposition: string | undefined): string | null {
if (!contentDisposition) {
return null;
}
const decodeFileName = (value: string): string => {
try {
return decodeURIComponent(value);
} catch {
return value;
}
};
const utf8Match = contentDisposition.match(/filename\*=UTF-8''([^;]+)/i);
if (utf8Match) {
return decodeFileName(utf8Match[1]);
}
const basicMatch = contentDisposition.match(/filename="?([^"]+)"?/i);
if (basicMatch) {
return decodeFileName(basicMatch[1]);
}
return null;
}
function toYmd(date: Date): string {
return date.toISOString().slice(0, 10);
}
function addMonths(date: Date, months: number): Date {
return new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth() + months, date.getUTCDate()));
}
function addDays(date: Date, days: number): Date {
return new Date(date.getTime() + days * 24 * 60 * 60 * 1000);
}
function minDate(left: Date, right: Date): Date {
return left.getTime() <= right.getTime() ? left : right;
}
function buildQuarterRanges(startYmd: string, batchMonths: number): Array<{ start: string; end: string }> {
const startDate = new Date(`${startYmd}T00:00:00Z`);
const today = new Date();
const endDate = new Date(Date.UTC(today.getUTCFullYear(), today.getUTCMonth(), today.getUTCDate()));
if (Number.isNaN(startDate.getTime())) {
throw new Error(`Invalid BIZMAX_RANGE_START: ${startYmd}`);
}
if (!Number.isInteger(batchMonths) || batchMonths <= 0) {
throw new Error(`Invalid BIZMAX_BATCH_MONTHS: ${batchMonths}`);
}
const ranges: Array<{ start: string; end: string }> = [];
let rangeStart = startDate;
while (rangeStart.getTime() <= endDate.getTime()) {
const nextRangeStart = addMonths(rangeStart, batchMonths);
const rangeEnd = minDate(addDays(nextRangeStart, -1), endDate);
ranges.push({ start: toYmd(rangeStart), end: toYmd(rangeEnd) });
rangeStart = nextRangeStart;
}
return ranges;
}
async function main(): Promise<void> {
const loginId = getRequiredEnv("BIZMAX_ID", LOGIN_ID);
const loginPassword = getRequiredEnv("BIZMAX_PASSWORD", LOGIN_PASSWORD);
await mkdir(OUTPUT_DIR, { recursive: true });
const ranges = buildQuarterRanges(RANGE_START, BATCH_MONTHS);
for (const baseUrl of BASE_URLS) {
const jar = new SimpleCookieJar();
const siteSlug = new URL(baseUrl).hostname.replace(/\./g, "_");
const loginPageResponse = await request<string>(jar, {
method: "GET",
url: absoluteUrl(baseUrl, "/"),
responseType: "text",
});
const { appName, authStep } = parseLoginPage(loginPageResponse.data);
const loginBody = new URLSearchParams({
from: "pc",
nhoj: loginId,
mluap: md5(loginPassword),
app_name: appName,
captcha_str: "",
auth_step: authStep,
force_sms: "n",
});
const loginResponse = await request<LoginResponse>(jar, {
method: "POST",
url: absoluteUrl(baseUrl, "/login_pcs_jr.php"),
data: loginBody.toString(),
responseType: "json",
headers: {
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
Accept: "application/json, text/javascript, */*; q=0.01",
Referer: absoluteUrl(baseUrl, "/"),
Origin: baseUrl,
"X-Requested-With": "XMLHttpRequest",
},
});
const loginData = loginResponse.data;
if (loginData.u_sms_a === "y") {
throw new Error(`[${baseUrl}] This account requires SMS verification. Axios-only automation cannot finish that step.`);
}
if (`${loginData.login_succ ?? ""}` !== "1" || !loginData.f_url) {
throw new Error(`[${baseUrl}] Login failed: ${loginData.ret_msg ?? "unknown error"}`);
}
const mainPageUrl = absoluteUrl(baseUrl, loginData.f_url);
await request<string>(jar, {
method: "GET",
url: mainPageUrl,
responseType: "text",
headers: {
Referer: absoluteUrl(baseUrl, "/"),
},
});
const customerListResponse = await request<string>(jar, {
method: "POST",
url: absoluteUrl(baseUrl, CUSTOMER_LIST_PATH),
data: new URLSearchParams({
_ihr: "n",
_ihl: "n",
_content_only: "n",
site_gubun: "pc",
}).toString(),
responseType: "text",
headers: {
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
Referer: mainPageUrl,
Origin: baseUrl,
"X-Requested-With": "XMLHttpRequest",
},
});
const baseSearchFields = parseSearchForm(customerListResponse.data);
for (const range of ranges) {
const searchFields = {
...baseSearchFields,
_ihr: "n",
_ihl: "n",
_content_only: "n",
site_gubun: "pc",
mode: "search",
page: "",
use_date_1: "a.reg_date",
use_date_1_src: "a.reg_date",
use_date_1_dsp: "- 접수일자",
start_date_1: range.start,
end_date_1: range.end,
};
const rangedCustomerListResponse = await request<string>(jar, {
method: "POST",
url: absoluteUrl(baseUrl, CUSTOMER_LIST_PATH),
data: buildFormBody(searchFields).toString(),
responseType: "text",
headers: {
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
Referer: mainPageUrl,
Origin: baseUrl,
"X-Requested-With": "XMLHttpRequest",
},
});
const totalCount = parseTotalCount(rangedCustomerListResponse.data);
if (totalCount === 0) {
process.stdout.write(`[${siteSlug}] SKIP ${range.start}~${range.end} (0건)\n`);
continue;
}
const { action, fields } = parseExcelForm(rangedCustomerListResponse.data);
fields.e_type = "1";
const excelResponse = await request<ArrayBuffer>(jar, {
method: "POST",
url: absoluteUrl(baseUrl, action),
data: buildFormBody(fields).toString(),
responseType: "arraybuffer",
headers: {
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
Referer: absoluteUrl(baseUrl, CUSTOMER_LIST_PATH),
Origin: baseUrl,
},
});
const contentDisposition =
typeof excelResponse.headers["content-disposition"] === "string"
? excelResponse.headers["content-disposition"]
: undefined;
const fileName =
getFileNameFromDisposition(contentDisposition) ??
`bizmax-customer-list-${range.start}-${range.end}.xlsx`;
const ext = path.extname(fileName);
const baseName = ext ? fileName.slice(0, -ext.length) : fileName;
const rangedFileName = `${siteSlug}_${baseName}_${range.start}_${range.end}${ext || ".xlsx"}`;
const outputPath = path.join(OUTPUT_DIR, rangedFileName);
await writeFile(outputPath, Buffer.from(excelResponse.data));
process.stdout.write(`${outputPath}\n`);
}
}
}
main().catch((error: unknown) => {
const message = error instanceof Error ? error.message : String(error);
process.stderr.write(`${message}\n`);
process.exitCode = 1;
});