This commit is contained in:
2026-04-13 11:25:40 +09:00
commit 8a24c92395
67 changed files with 276281 additions and 0 deletions

451
bizmax-download-excel.ts Normal file
View File

@@ -0,0 +1,451 @@
import axios, {
type AxiosRequestConfig,
type AxiosResponse,
type RawAxiosResponseHeaders,
} from "axios";
import * as cheerio from "cheerio";
import { createHash } from "node:crypto";
import { mkdir, writeFile } from "node:fs/promises";
import path from "node:path";
const DEFAULT_BASE_URLS = ["https://hyun.bizmax.net", "https://hyun2.bizmax.net"];
const BASE_URLS = (
process.env.BIZMAX_BASE_URLS ??
process.env.BIZMAX_BASE_URL ??
DEFAULT_BASE_URLS.join(",")
)
.split(",")
.map((value) => value.trim())
.filter(Boolean);
const LOGIN_ID = process.env.BIZMAX_ID;
const LOGIN_PASSWORD = process.env.BIZMAX_PASSWORD;
const OUTPUT_DIR = process.env.BIZMAX_OUTPUT_DIR ?? path.resolve(process.cwd(), "output");
const CUSTOMER_LIST_PATH = "/customer/tpl/customer_list.php";
const RANGE_START = process.env.BIZMAX_RANGE_START ?? "2016-01-01";
const BATCH_MONTHS = Number(process.env.BIZMAX_BATCH_MONTHS ?? "3");
type LoginResponse = {
ret_code?: number;
ret_msg?: string;
login_succ?: string | number;
u_sms_a?: string;
f_url?: string;
lt?: string;
};
class SimpleCookieJar {
private readonly cookies = new Map<string, string>();
addFromHeaders(setCookie: string[] = []): void {
for (const cookieLine of setCookie) {
const [pair] = cookieLine.split(";", 1);
if (!pair) {
continue;
}
const eqIndex = pair.indexOf("=");
if (eqIndex <= 0) {
continue;
}
const name = pair.slice(0, eqIndex).trim();
const value = pair.slice(eqIndex + 1).trim();
this.cookies.set(name, value);
}
}
toHeader(): string {
return Array.from(this.cookies.entries())
.map(([name, value]) => `${name}=${value}`)
.join("; ");
}
}
function md5(value: string): string {
return createHash("md5").update(value).digest("hex");
}
function getRequiredEnv(name: string, value: string | undefined): string {
if (!value) {
throw new Error(`Missing required environment variable: ${name}`);
}
return value;
}
function getSetCookieHeaders(headers: RawAxiosResponseHeaders | Record<string, unknown>): string[] {
const headerValue = headers["set-cookie"];
if (Array.isArray(headerValue)) {
return headerValue.filter((item): item is string => typeof item === "string");
}
return [];
}
async function request<T = unknown>(
jar: SimpleCookieJar,
config: AxiosRequestConfig,
): Promise<AxiosResponse<T>> {
const headers: Record<string, string> = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
...(config.headers as Record<string, string> | undefined),
};
const cookieHeader = jar.toHeader();
if (cookieHeader) {
headers.Cookie = cookieHeader;
}
const response = await axios<T>({
...config,
headers,
validateStatus: () => true,
maxRedirects: 0,
});
jar.addFromHeaders(getSetCookieHeaders(response.headers));
if (response.status >= 400) {
throw new Error(`Request failed: ${config.method ?? "GET"} ${config.url} -> ${response.status}`);
}
return response;
}
function absoluteUrl(baseUrl: string, urlOrPath: string): string {
return new URL(urlOrPath, baseUrl).toString();
}
function parseLoginPage(html: string): { appName: string; authStep: string } {
const $ = cheerio.load(html);
const appName = $('input[name="app_name"]').val()?.toString() ?? "Chrome";
const authStep = $('input[name="auth_step"]').val()?.toString() ?? "1";
return { appName, authStep };
}
function parseExcelForm(html: string): { action: string; fields: Record<string, string> } {
const $ = cheerio.load(html);
const form = $('form[name="excel_form"]');
if (!form.length) {
throw new Error("excel_form was not found in the customer list page");
}
const action =
form.attr("action") ||
(() => {
const tpl = form.attr("ctm_tpl_excel") || "customer_excel.php";
return `/customer/tpl/${tpl}`;
})();
if (!action) {
throw new Error("excel_form action was not found");
}
const fields: Record<string, string> = {};
form.find("input[name]").each((_, element) => {
const input = $(element);
const name = input.attr("name");
if (!name) {
return;
}
fields[name] = input.val()?.toString() ?? "";
});
return { action, fields };
}
function parseTotalCount(html: string): number {
const $ = cheerio.load(html);
const explicitValue =
$('form[name="excel_form"] input[name="ctm_list_total_cnt"]').val()?.toString() ??
$('form[name="search_form"] input[name="ctm_list_total_cnt"]').val()?.toString();
if (explicitValue) {
const parsed = Number(explicitValue.replace(/,/g, ""));
if (!Number.isNaN(parsed)) {
return parsed;
}
}
const bodyText = $.text();
const match = bodyText.match(/Total\s*[:]\s*([\d,]+)건/i);
if (match) {
const parsed = Number(match[1].replace(/,/g, ""));
if (!Number.isNaN(parsed)) {
return parsed;
}
}
return 0;
}
function parseSearchForm(html: string): Record<string, string> {
const $ = cheerio.load(html);
const form = $('form[name="search_form"]');
if (!form.length) {
throw new Error("search_form was not found in the customer list page");
}
const fields: Record<string, string> = {};
form.find("input[name], select[name], textarea[name]").each((_, element) => {
const input = $(element);
const name = input.attr("name");
if (!name) {
return;
}
const tagName = element.tagName.toLowerCase();
const type = (input.attr("type") ?? "").toLowerCase();
if (type === "checkbox" || type === "radio") {
if (!input.is(":checked")) {
return;
}
fields[name] = input.val()?.toString() ?? "on";
return;
}
if (tagName === "select") {
fields[name] = input.find("option[selected]").val()?.toString() ?? input.val()?.toString() ?? "";
return;
}
fields[name] = input.val()?.toString() ?? "";
});
return fields;
}
function buildFormBody(fields: Record<string, string>): URLSearchParams {
const params = new URLSearchParams();
for (const [name, value] of Object.entries(fields)) {
params.set(name, value);
}
return params;
}
function getFileNameFromDisposition(contentDisposition: string | undefined): string | null {
if (!contentDisposition) {
return null;
}
const decodeFileName = (value: string): string => {
try {
return decodeURIComponent(value);
} catch {
return value;
}
};
const utf8Match = contentDisposition.match(/filename\*=UTF-8''([^;]+)/i);
if (utf8Match) {
return decodeFileName(utf8Match[1]);
}
const basicMatch = contentDisposition.match(/filename="?([^"]+)"?/i);
if (basicMatch) {
return decodeFileName(basicMatch[1]);
}
return null;
}
function toYmd(date: Date): string {
return date.toISOString().slice(0, 10);
}
function addMonths(date: Date, months: number): Date {
return new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth() + months, date.getUTCDate()));
}
function addDays(date: Date, days: number): Date {
return new Date(date.getTime() + days * 24 * 60 * 60 * 1000);
}
function minDate(left: Date, right: Date): Date {
return left.getTime() <= right.getTime() ? left : right;
}
function buildQuarterRanges(startYmd: string, batchMonths: number): Array<{ start: string; end: string }> {
const startDate = new Date(`${startYmd}T00:00:00Z`);
const today = new Date();
const endDate = new Date(Date.UTC(today.getUTCFullYear(), today.getUTCMonth(), today.getUTCDate()));
if (Number.isNaN(startDate.getTime())) {
throw new Error(`Invalid BIZMAX_RANGE_START: ${startYmd}`);
}
if (!Number.isInteger(batchMonths) || batchMonths <= 0) {
throw new Error(`Invalid BIZMAX_BATCH_MONTHS: ${batchMonths}`);
}
const ranges: Array<{ start: string; end: string }> = [];
let rangeStart = startDate;
while (rangeStart.getTime() <= endDate.getTime()) {
const nextRangeStart = addMonths(rangeStart, batchMonths);
const rangeEnd = minDate(addDays(nextRangeStart, -1), endDate);
ranges.push({ start: toYmd(rangeStart), end: toYmd(rangeEnd) });
rangeStart = nextRangeStart;
}
return ranges;
}
async function main(): Promise<void> {
const loginId = getRequiredEnv("BIZMAX_ID", LOGIN_ID);
const loginPassword = getRequiredEnv("BIZMAX_PASSWORD", LOGIN_PASSWORD);
await mkdir(OUTPUT_DIR, { recursive: true });
const ranges = buildQuarterRanges(RANGE_START, BATCH_MONTHS);
for (const baseUrl of BASE_URLS) {
const jar = new SimpleCookieJar();
const siteSlug = new URL(baseUrl).hostname.replace(/\./g, "_");
const loginPageResponse = await request<string>(jar, {
method: "GET",
url: absoluteUrl(baseUrl, "/"),
responseType: "text",
});
const { appName, authStep } = parseLoginPage(loginPageResponse.data);
const loginBody = new URLSearchParams({
from: "pc",
nhoj: loginId,
mluap: md5(loginPassword),
app_name: appName,
captcha_str: "",
auth_step: authStep,
force_sms: "n",
});
const loginResponse = await request<LoginResponse>(jar, {
method: "POST",
url: absoluteUrl(baseUrl, "/login_pcs_jr.php"),
data: loginBody.toString(),
responseType: "json",
headers: {
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
Accept: "application/json, text/javascript, */*; q=0.01",
Referer: absoluteUrl(baseUrl, "/"),
Origin: baseUrl,
"X-Requested-With": "XMLHttpRequest",
},
});
const loginData = loginResponse.data;
if (loginData.u_sms_a === "y") {
throw new Error(`[${baseUrl}] This account requires SMS verification. Axios-only automation cannot finish that step.`);
}
if (`${loginData.login_succ ?? ""}` !== "1" || !loginData.f_url) {
throw new Error(`[${baseUrl}] Login failed: ${loginData.ret_msg ?? "unknown error"}`);
}
const mainPageUrl = absoluteUrl(baseUrl, loginData.f_url);
await request<string>(jar, {
method: "GET",
url: mainPageUrl,
responseType: "text",
headers: {
Referer: absoluteUrl(baseUrl, "/"),
},
});
const customerListResponse = await request<string>(jar, {
method: "POST",
url: absoluteUrl(baseUrl, CUSTOMER_LIST_PATH),
data: new URLSearchParams({
_ihr: "n",
_ihl: "n",
_content_only: "n",
site_gubun: "pc",
}).toString(),
responseType: "text",
headers: {
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
Referer: mainPageUrl,
Origin: baseUrl,
"X-Requested-With": "XMLHttpRequest",
},
});
const baseSearchFields = parseSearchForm(customerListResponse.data);
for (const range of ranges) {
const searchFields = {
...baseSearchFields,
_ihr: "n",
_ihl: "n",
_content_only: "n",
site_gubun: "pc",
mode: "search",
page: "",
use_date_1: "a.reg_date",
use_date_1_src: "a.reg_date",
use_date_1_dsp: "- 접수일자",
start_date_1: range.start,
end_date_1: range.end,
};
const rangedCustomerListResponse = await request<string>(jar, {
method: "POST",
url: absoluteUrl(baseUrl, CUSTOMER_LIST_PATH),
data: buildFormBody(searchFields).toString(),
responseType: "text",
headers: {
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
Referer: mainPageUrl,
Origin: baseUrl,
"X-Requested-With": "XMLHttpRequest",
},
});
const totalCount = parseTotalCount(rangedCustomerListResponse.data);
if (totalCount === 0) {
process.stdout.write(`[${siteSlug}] SKIP ${range.start}~${range.end} (0건)\n`);
continue;
}
const { action, fields } = parseExcelForm(rangedCustomerListResponse.data);
fields.e_type = "1";
const excelResponse = await request<ArrayBuffer>(jar, {
method: "POST",
url: absoluteUrl(baseUrl, action),
data: buildFormBody(fields).toString(),
responseType: "arraybuffer",
headers: {
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
Referer: absoluteUrl(baseUrl, CUSTOMER_LIST_PATH),
Origin: baseUrl,
},
});
const contentDisposition =
typeof excelResponse.headers["content-disposition"] === "string"
? excelResponse.headers["content-disposition"]
: undefined;
const fileName =
getFileNameFromDisposition(contentDisposition) ??
`bizmax-customer-list-${range.start}-${range.end}.xlsx`;
const ext = path.extname(fileName);
const baseName = ext ? fileName.slice(0, -ext.length) : fileName;
const rangedFileName = `${siteSlug}_${baseName}_${range.start}_${range.end}${ext || ".xlsx"}`;
const outputPath = path.join(OUTPUT_DIR, rangedFileName);
await writeFile(outputPath, Buffer.from(excelResponse.data));
process.stdout.write(`${outputPath}\n`);
}
}
}
main().catch((error: unknown) => {
const message = error instanceof Error ? error.message : String(error);
process.stderr.write(`${message}\n`);
process.exitCode = 1;
});