init
This commit is contained in:
451
bizmax-download-excel.ts
Normal file
451
bizmax-download-excel.ts
Normal file
@@ -0,0 +1,451 @@
|
||||
import axios, {
|
||||
type AxiosRequestConfig,
|
||||
type AxiosResponse,
|
||||
type RawAxiosResponseHeaders,
|
||||
} from "axios";
|
||||
import * as cheerio from "cheerio";
|
||||
import { createHash } from "node:crypto";
|
||||
import { mkdir, writeFile } from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
|
||||
const DEFAULT_BASE_URLS = ["https://hyun.bizmax.net", "https://hyun2.bizmax.net"];
|
||||
const BASE_URLS = (
|
||||
process.env.BIZMAX_BASE_URLS ??
|
||||
process.env.BIZMAX_BASE_URL ??
|
||||
DEFAULT_BASE_URLS.join(",")
|
||||
)
|
||||
.split(",")
|
||||
.map((value) => value.trim())
|
||||
.filter(Boolean);
|
||||
const LOGIN_ID = process.env.BIZMAX_ID;
|
||||
const LOGIN_PASSWORD = process.env.BIZMAX_PASSWORD;
|
||||
const OUTPUT_DIR = process.env.BIZMAX_OUTPUT_DIR ?? path.resolve(process.cwd(), "output");
|
||||
const CUSTOMER_LIST_PATH = "/customer/tpl/customer_list.php";
|
||||
const RANGE_START = process.env.BIZMAX_RANGE_START ?? "2016-01-01";
|
||||
const BATCH_MONTHS = Number(process.env.BIZMAX_BATCH_MONTHS ?? "3");
|
||||
|
||||
type LoginResponse = {
|
||||
ret_code?: number;
|
||||
ret_msg?: string;
|
||||
login_succ?: string | number;
|
||||
u_sms_a?: string;
|
||||
f_url?: string;
|
||||
lt?: string;
|
||||
};
|
||||
|
||||
class SimpleCookieJar {
|
||||
private readonly cookies = new Map<string, string>();
|
||||
|
||||
addFromHeaders(setCookie: string[] = []): void {
|
||||
for (const cookieLine of setCookie) {
|
||||
const [pair] = cookieLine.split(";", 1);
|
||||
if (!pair) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const eqIndex = pair.indexOf("=");
|
||||
if (eqIndex <= 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const name = pair.slice(0, eqIndex).trim();
|
||||
const value = pair.slice(eqIndex + 1).trim();
|
||||
this.cookies.set(name, value);
|
||||
}
|
||||
}
|
||||
|
||||
toHeader(): string {
|
||||
return Array.from(this.cookies.entries())
|
||||
.map(([name, value]) => `${name}=${value}`)
|
||||
.join("; ");
|
||||
}
|
||||
}
|
||||
|
||||
function md5(value: string): string {
|
||||
return createHash("md5").update(value).digest("hex");
|
||||
}
|
||||
|
||||
function getRequiredEnv(name: string, value: string | undefined): string {
|
||||
if (!value) {
|
||||
throw new Error(`Missing required environment variable: ${name}`);
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
function getSetCookieHeaders(headers: RawAxiosResponseHeaders | Record<string, unknown>): string[] {
|
||||
const headerValue = headers["set-cookie"];
|
||||
if (Array.isArray(headerValue)) {
|
||||
return headerValue.filter((item): item is string => typeof item === "string");
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
async function request<T = unknown>(
|
||||
jar: SimpleCookieJar,
|
||||
config: AxiosRequestConfig,
|
||||
): Promise<AxiosResponse<T>> {
|
||||
const headers: Record<string, string> = {
|
||||
"User-Agent":
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
|
||||
...(config.headers as Record<string, string> | undefined),
|
||||
};
|
||||
|
||||
const cookieHeader = jar.toHeader();
|
||||
if (cookieHeader) {
|
||||
headers.Cookie = cookieHeader;
|
||||
}
|
||||
|
||||
const response = await axios<T>({
|
||||
...config,
|
||||
headers,
|
||||
validateStatus: () => true,
|
||||
maxRedirects: 0,
|
||||
});
|
||||
|
||||
jar.addFromHeaders(getSetCookieHeaders(response.headers));
|
||||
|
||||
if (response.status >= 400) {
|
||||
throw new Error(`Request failed: ${config.method ?? "GET"} ${config.url} -> ${response.status}`);
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
function absoluteUrl(baseUrl: string, urlOrPath: string): string {
|
||||
return new URL(urlOrPath, baseUrl).toString();
|
||||
}
|
||||
|
||||
function parseLoginPage(html: string): { appName: string; authStep: string } {
|
||||
const $ = cheerio.load(html);
|
||||
const appName = $('input[name="app_name"]').val()?.toString() ?? "Chrome";
|
||||
const authStep = $('input[name="auth_step"]').val()?.toString() ?? "1";
|
||||
return { appName, authStep };
|
||||
}
|
||||
|
||||
function parseExcelForm(html: string): { action: string; fields: Record<string, string> } {
|
||||
const $ = cheerio.load(html);
|
||||
const form = $('form[name="excel_form"]');
|
||||
if (!form.length) {
|
||||
throw new Error("excel_form was not found in the customer list page");
|
||||
}
|
||||
|
||||
const action =
|
||||
form.attr("action") ||
|
||||
(() => {
|
||||
const tpl = form.attr("ctm_tpl_excel") || "customer_excel.php";
|
||||
return `/customer/tpl/${tpl}`;
|
||||
})();
|
||||
if (!action) {
|
||||
throw new Error("excel_form action was not found");
|
||||
}
|
||||
|
||||
const fields: Record<string, string> = {};
|
||||
form.find("input[name]").each((_, element) => {
|
||||
const input = $(element);
|
||||
const name = input.attr("name");
|
||||
if (!name) {
|
||||
return;
|
||||
}
|
||||
|
||||
fields[name] = input.val()?.toString() ?? "";
|
||||
});
|
||||
|
||||
return { action, fields };
|
||||
}
|
||||
|
||||
function parseTotalCount(html: string): number {
|
||||
const $ = cheerio.load(html);
|
||||
const explicitValue =
|
||||
$('form[name="excel_form"] input[name="ctm_list_total_cnt"]').val()?.toString() ??
|
||||
$('form[name="search_form"] input[name="ctm_list_total_cnt"]').val()?.toString();
|
||||
|
||||
if (explicitValue) {
|
||||
const parsed = Number(explicitValue.replace(/,/g, ""));
|
||||
if (!Number.isNaN(parsed)) {
|
||||
return parsed;
|
||||
}
|
||||
}
|
||||
|
||||
const bodyText = $.text();
|
||||
const match = bodyText.match(/Total\s*[::]\s*([\d,]+)건/i);
|
||||
if (match) {
|
||||
const parsed = Number(match[1].replace(/,/g, ""));
|
||||
if (!Number.isNaN(parsed)) {
|
||||
return parsed;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
function parseSearchForm(html: string): Record<string, string> {
|
||||
const $ = cheerio.load(html);
|
||||
const form = $('form[name="search_form"]');
|
||||
if (!form.length) {
|
||||
throw new Error("search_form was not found in the customer list page");
|
||||
}
|
||||
|
||||
const fields: Record<string, string> = {};
|
||||
|
||||
form.find("input[name], select[name], textarea[name]").each((_, element) => {
|
||||
const input = $(element);
|
||||
const name = input.attr("name");
|
||||
if (!name) {
|
||||
return;
|
||||
}
|
||||
|
||||
const tagName = element.tagName.toLowerCase();
|
||||
const type = (input.attr("type") ?? "").toLowerCase();
|
||||
|
||||
if (type === "checkbox" || type === "radio") {
|
||||
if (!input.is(":checked")) {
|
||||
return;
|
||||
}
|
||||
fields[name] = input.val()?.toString() ?? "on";
|
||||
return;
|
||||
}
|
||||
|
||||
if (tagName === "select") {
|
||||
fields[name] = input.find("option[selected]").val()?.toString() ?? input.val()?.toString() ?? "";
|
||||
return;
|
||||
}
|
||||
|
||||
fields[name] = input.val()?.toString() ?? "";
|
||||
});
|
||||
|
||||
return fields;
|
||||
}
|
||||
|
||||
function buildFormBody(fields: Record<string, string>): URLSearchParams {
|
||||
const params = new URLSearchParams();
|
||||
for (const [name, value] of Object.entries(fields)) {
|
||||
params.set(name, value);
|
||||
}
|
||||
return params;
|
||||
}
|
||||
|
||||
function getFileNameFromDisposition(contentDisposition: string | undefined): string | null {
|
||||
if (!contentDisposition) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const decodeFileName = (value: string): string => {
|
||||
try {
|
||||
return decodeURIComponent(value);
|
||||
} catch {
|
||||
return value;
|
||||
}
|
||||
};
|
||||
|
||||
const utf8Match = contentDisposition.match(/filename\*=UTF-8''([^;]+)/i);
|
||||
if (utf8Match) {
|
||||
return decodeFileName(utf8Match[1]);
|
||||
}
|
||||
|
||||
const basicMatch = contentDisposition.match(/filename="?([^"]+)"?/i);
|
||||
if (basicMatch) {
|
||||
return decodeFileName(basicMatch[1]);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function toYmd(date: Date): string {
|
||||
return date.toISOString().slice(0, 10);
|
||||
}
|
||||
|
||||
function addMonths(date: Date, months: number): Date {
|
||||
return new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth() + months, date.getUTCDate()));
|
||||
}
|
||||
|
||||
function addDays(date: Date, days: number): Date {
|
||||
return new Date(date.getTime() + days * 24 * 60 * 60 * 1000);
|
||||
}
|
||||
|
||||
function minDate(left: Date, right: Date): Date {
|
||||
return left.getTime() <= right.getTime() ? left : right;
|
||||
}
|
||||
|
||||
function buildQuarterRanges(startYmd: string, batchMonths: number): Array<{ start: string; end: string }> {
|
||||
const startDate = new Date(`${startYmd}T00:00:00Z`);
|
||||
const today = new Date();
|
||||
const endDate = new Date(Date.UTC(today.getUTCFullYear(), today.getUTCMonth(), today.getUTCDate()));
|
||||
|
||||
if (Number.isNaN(startDate.getTime())) {
|
||||
throw new Error(`Invalid BIZMAX_RANGE_START: ${startYmd}`);
|
||||
}
|
||||
|
||||
if (!Number.isInteger(batchMonths) || batchMonths <= 0) {
|
||||
throw new Error(`Invalid BIZMAX_BATCH_MONTHS: ${batchMonths}`);
|
||||
}
|
||||
|
||||
const ranges: Array<{ start: string; end: string }> = [];
|
||||
let rangeStart = startDate;
|
||||
|
||||
while (rangeStart.getTime() <= endDate.getTime()) {
|
||||
const nextRangeStart = addMonths(rangeStart, batchMonths);
|
||||
const rangeEnd = minDate(addDays(nextRangeStart, -1), endDate);
|
||||
ranges.push({ start: toYmd(rangeStart), end: toYmd(rangeEnd) });
|
||||
rangeStart = nextRangeStart;
|
||||
}
|
||||
|
||||
return ranges;
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const loginId = getRequiredEnv("BIZMAX_ID", LOGIN_ID);
|
||||
const loginPassword = getRequiredEnv("BIZMAX_PASSWORD", LOGIN_PASSWORD);
|
||||
await mkdir(OUTPUT_DIR, { recursive: true });
|
||||
const ranges = buildQuarterRanges(RANGE_START, BATCH_MONTHS);
|
||||
|
||||
for (const baseUrl of BASE_URLS) {
|
||||
const jar = new SimpleCookieJar();
|
||||
const siteSlug = new URL(baseUrl).hostname.replace(/\./g, "_");
|
||||
|
||||
const loginPageResponse = await request<string>(jar, {
|
||||
method: "GET",
|
||||
url: absoluteUrl(baseUrl, "/"),
|
||||
responseType: "text",
|
||||
});
|
||||
|
||||
const { appName, authStep } = parseLoginPage(loginPageResponse.data);
|
||||
|
||||
const loginBody = new URLSearchParams({
|
||||
from: "pc",
|
||||
nhoj: loginId,
|
||||
mluap: md5(loginPassword),
|
||||
app_name: appName,
|
||||
captcha_str: "",
|
||||
auth_step: authStep,
|
||||
force_sms: "n",
|
||||
});
|
||||
|
||||
const loginResponse = await request<LoginResponse>(jar, {
|
||||
method: "POST",
|
||||
url: absoluteUrl(baseUrl, "/login_pcs_jr.php"),
|
||||
data: loginBody.toString(),
|
||||
responseType: "json",
|
||||
headers: {
|
||||
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
|
||||
Accept: "application/json, text/javascript, */*; q=0.01",
|
||||
Referer: absoluteUrl(baseUrl, "/"),
|
||||
Origin: baseUrl,
|
||||
"X-Requested-With": "XMLHttpRequest",
|
||||
},
|
||||
});
|
||||
|
||||
const loginData = loginResponse.data;
|
||||
if (loginData.u_sms_a === "y") {
|
||||
throw new Error(`[${baseUrl}] This account requires SMS verification. Axios-only automation cannot finish that step.`);
|
||||
}
|
||||
|
||||
if (`${loginData.login_succ ?? ""}` !== "1" || !loginData.f_url) {
|
||||
throw new Error(`[${baseUrl}] Login failed: ${loginData.ret_msg ?? "unknown error"}`);
|
||||
}
|
||||
|
||||
const mainPageUrl = absoluteUrl(baseUrl, loginData.f_url);
|
||||
await request<string>(jar, {
|
||||
method: "GET",
|
||||
url: mainPageUrl,
|
||||
responseType: "text",
|
||||
headers: {
|
||||
Referer: absoluteUrl(baseUrl, "/"),
|
||||
},
|
||||
});
|
||||
|
||||
const customerListResponse = await request<string>(jar, {
|
||||
method: "POST",
|
||||
url: absoluteUrl(baseUrl, CUSTOMER_LIST_PATH),
|
||||
data: new URLSearchParams({
|
||||
_ihr: "n",
|
||||
_ihl: "n",
|
||||
_content_only: "n",
|
||||
site_gubun: "pc",
|
||||
}).toString(),
|
||||
responseType: "text",
|
||||
headers: {
|
||||
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
|
||||
Referer: mainPageUrl,
|
||||
Origin: baseUrl,
|
||||
"X-Requested-With": "XMLHttpRequest",
|
||||
},
|
||||
});
|
||||
|
||||
const baseSearchFields = parseSearchForm(customerListResponse.data);
|
||||
|
||||
for (const range of ranges) {
|
||||
const searchFields = {
|
||||
...baseSearchFields,
|
||||
_ihr: "n",
|
||||
_ihl: "n",
|
||||
_content_only: "n",
|
||||
site_gubun: "pc",
|
||||
mode: "search",
|
||||
page: "",
|
||||
use_date_1: "a.reg_date",
|
||||
use_date_1_src: "a.reg_date",
|
||||
use_date_1_dsp: "- 접수일자",
|
||||
start_date_1: range.start,
|
||||
end_date_1: range.end,
|
||||
};
|
||||
|
||||
const rangedCustomerListResponse = await request<string>(jar, {
|
||||
method: "POST",
|
||||
url: absoluteUrl(baseUrl, CUSTOMER_LIST_PATH),
|
||||
data: buildFormBody(searchFields).toString(),
|
||||
responseType: "text",
|
||||
headers: {
|
||||
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
|
||||
Referer: mainPageUrl,
|
||||
Origin: baseUrl,
|
||||
"X-Requested-With": "XMLHttpRequest",
|
||||
},
|
||||
});
|
||||
|
||||
const totalCount = parseTotalCount(rangedCustomerListResponse.data);
|
||||
if (totalCount === 0) {
|
||||
process.stdout.write(`[${siteSlug}] SKIP ${range.start}~${range.end} (0건)\n`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const { action, fields } = parseExcelForm(rangedCustomerListResponse.data);
|
||||
fields.e_type = "1";
|
||||
|
||||
const excelResponse = await request<ArrayBuffer>(jar, {
|
||||
method: "POST",
|
||||
url: absoluteUrl(baseUrl, action),
|
||||
data: buildFormBody(fields).toString(),
|
||||
responseType: "arraybuffer",
|
||||
headers: {
|
||||
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
|
||||
Referer: absoluteUrl(baseUrl, CUSTOMER_LIST_PATH),
|
||||
Origin: baseUrl,
|
||||
},
|
||||
});
|
||||
|
||||
const contentDisposition =
|
||||
typeof excelResponse.headers["content-disposition"] === "string"
|
||||
? excelResponse.headers["content-disposition"]
|
||||
: undefined;
|
||||
const fileName =
|
||||
getFileNameFromDisposition(contentDisposition) ??
|
||||
`bizmax-customer-list-${range.start}-${range.end}.xlsx`;
|
||||
|
||||
const ext = path.extname(fileName);
|
||||
const baseName = ext ? fileName.slice(0, -ext.length) : fileName;
|
||||
const rangedFileName = `${siteSlug}_${baseName}_${range.start}_${range.end}${ext || ".xlsx"}`;
|
||||
const outputPath = path.join(OUTPUT_DIR, rangedFileName);
|
||||
await writeFile(outputPath, Buffer.from(excelResponse.data));
|
||||
|
||||
process.stdout.write(`${outputPath}\n`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((error: unknown) => {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
process.stderr.write(`${message}\n`);
|
||||
process.exitCode = 1;
|
||||
});
|
||||
Reference in New Issue
Block a user