新增数据补全和提交脚本,支持从 job_bundle.json 读取数据并执行 API 请求;实现批量提交功能,记录成功与失败的日志
This commit is contained in:
201
scripts/node/enricher.mjs
Normal file
201
scripts/node/enricher.mjs
Normal file
@@ -0,0 +1,201 @@
|
||||
/**
|
||||
* enricher.mjs — API 数据补全脚本 (Node.js)
|
||||
*
|
||||
* 读取前端生成的 job_bundle.json,执行 enrichment_rules 中的 API 请求,
|
||||
* 将结果合并到 source_data 中,输出最终完整的 JSON 文件。
|
||||
*
|
||||
* 用法:
|
||||
* node enricher.mjs job_bundle.json
|
||||
* node enricher.mjs job_bundle.json -o enriched_data.json
|
||||
* node enricher.mjs job_bundle.json --concurrency 10
|
||||
*/
|
||||
|
||||
import { readFileSync, writeFileSync } from "fs";
|
||||
import { basename, join, dirname } from "path";
|
||||
|
||||
// ── helpers ──
|
||||
|
||||
function resolvePath(obj, path) {
|
||||
const keys = path.split(".");
|
||||
let current = obj;
|
||||
for (const key of keys) {
|
||||
if (current == null) return null;
|
||||
if (Array.isArray(current)) {
|
||||
const idx = Number(key);
|
||||
if (Number.isNaN(idx) || idx < 0 || idx >= current.length) return null;
|
||||
current = current[idx];
|
||||
} else if (typeof current === "object") {
|
||||
current = current[key];
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return current ?? null;
|
||||
}
|
||||
|
||||
function renderTemplate(template, row) {
|
||||
return template.replace(/\{\{(.+?)\}\}/g, (_, key) => {
|
||||
const val = row[key];
|
||||
return val != null ? String(val) : "";
|
||||
});
|
||||
}
|
||||
|
||||
// ── fetch one ──
|
||||
|
||||
async function fetchOne(rule, row, rowIndex, semaphore) {
|
||||
const targetKey = rule.target_key;
|
||||
const fallback = rule.fallback_value ?? null;
|
||||
const url = renderTemplate(rule.url_template, row);
|
||||
const method = (rule.method || "GET").toUpperCase();
|
||||
|
||||
const headers = {};
|
||||
if (rule.headers) {
|
||||
for (const [k, v] of Object.entries(rule.headers)) {
|
||||
headers[k] = renderTemplate(v, row);
|
||||
}
|
||||
}
|
||||
|
||||
/** @type {RequestInit} */
|
||||
const opts = { method, headers };
|
||||
|
||||
if (method === "POST" && rule.body_template) {
|
||||
const body = renderTemplate(rule.body_template, row);
|
||||
try {
|
||||
JSON.parse(body);
|
||||
headers["Content-Type"] = headers["Content-Type"] || "application/json";
|
||||
opts.body = body;
|
||||
} catch {
|
||||
opts.body = body;
|
||||
}
|
||||
}
|
||||
|
||||
await semaphore.acquire();
|
||||
try {
|
||||
const resp = await fetch(url, opts);
|
||||
if (!resp.ok) {
|
||||
console.log(` [WARN] Row ${rowIndex} | ${targetKey} | HTTP ${resp.status} <- ${url}`);
|
||||
return { rowIndex, targetKey, value: fallback };
|
||||
}
|
||||
const data = await resp.json();
|
||||
const value = resolvePath(data, rule.response_path);
|
||||
return { rowIndex, targetKey, value: value ?? fallback };
|
||||
} catch (e) {
|
||||
console.log(` [ERROR] Row ${rowIndex} | ${targetKey} | ${e.message}`);
|
||||
return { rowIndex, targetKey, value: fallback };
|
||||
} finally {
|
||||
semaphore.release();
|
||||
}
|
||||
}
|
||||
|
||||
// ── semaphore ──
|
||||
|
||||
function createSemaphore(max) {
|
||||
let current = 0;
|
||||
/** @type {(() => void)[]} */
|
||||
const queue = [];
|
||||
return {
|
||||
acquire() {
|
||||
if (current < max) {
|
||||
current++;
|
||||
return Promise.resolve();
|
||||
}
|
||||
return new Promise((resolve) => queue.push(resolve));
|
||||
},
|
||||
release() {
|
||||
current--;
|
||||
if (queue.length > 0) {
|
||||
current++;
|
||||
queue.shift()();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// ── main ──
|
||||
|
||||
function parseArgs() {
|
||||
const args = process.argv.slice(2);
|
||||
const opts = { bundle: "", output: "", concurrency: 5 };
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
if (args[i] === "-o" || args[i] === "--output") {
|
||||
opts.output = args[++i];
|
||||
} else if (args[i] === "--concurrency") {
|
||||
opts.concurrency = Number(args[++i]) || 5;
|
||||
} else if (args[i] === "--help" || args[i] === "-h") {
|
||||
console.log("Usage: node enricher.mjs <job_bundle.json> [-o output.json] [--concurrency N]");
|
||||
process.exit(0);
|
||||
} else if (!opts.bundle) {
|
||||
opts.bundle = args[i];
|
||||
}
|
||||
}
|
||||
if (!opts.bundle) {
|
||||
console.error("Error: Please provide a job_bundle.json path.");
|
||||
console.error("Usage: node enricher.mjs <job_bundle.json>");
|
||||
process.exit(1);
|
||||
}
|
||||
return opts;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const opts = parseArgs();
|
||||
|
||||
const raw = readFileSync(opts.bundle, "utf-8");
|
||||
const bundle = JSON.parse(raw);
|
||||
|
||||
const meta = bundle.meta || {};
|
||||
const config = bundle.config || {};
|
||||
const sourceData = bundle.source_data || [];
|
||||
const rules = config.enrichment_rules || [];
|
||||
|
||||
console.log(`=== Job Bundle v${meta.version || "?"} ===`);
|
||||
console.log(`Generated: ${meta.generated_at || "?"}`);
|
||||
console.log(`Rows: ${sourceData.length}`);
|
||||
console.log(`Static rules: ${(config.static_rules || []).length}`);
|
||||
console.log(`Enrichment rules: ${rules.length}`);
|
||||
console.log();
|
||||
|
||||
if (rules.length === 0) {
|
||||
console.log("No enrichment rules configured. Outputting source data as-is.");
|
||||
} else {
|
||||
const totalCalls = sourceData.length * rules.length;
|
||||
console.log(`Enriching ${sourceData.length} rows x ${rules.length} rule(s) = ${totalCalls} API calls`);
|
||||
console.log(`Concurrency: ${opts.concurrency}`);
|
||||
console.log();
|
||||
|
||||
const semaphore = createSemaphore(opts.concurrency);
|
||||
const tasks = [];
|
||||
for (let rowIdx = 0; rowIdx < sourceData.length; rowIdx++) {
|
||||
for (const rule of rules) {
|
||||
tasks.push(fetchOne(rule, sourceData[rowIdx], rowIdx, semaphore));
|
||||
}
|
||||
}
|
||||
|
||||
const results = await Promise.all(tasks);
|
||||
|
||||
let errorCount = 0;
|
||||
for (const r of results) {
|
||||
if (r.value === undefined) errorCount++;
|
||||
sourceData[r.rowIndex][r.targetKey] = r.value;
|
||||
}
|
||||
|
||||
console.log();
|
||||
console.log(`Done. ${totalCalls - errorCount}/${totalCalls} calls succeeded.`);
|
||||
}
|
||||
|
||||
const output = {
|
||||
meta,
|
||||
submission: config.submission || {},
|
||||
data: sourceData,
|
||||
};
|
||||
|
||||
const outName = opts.output || opts.bundle.replace(/\.json$/i, "_enriched.json");
|
||||
writeFileSync(outName, JSON.stringify(output, null, 2), "utf-8");
|
||||
|
||||
console.log(`\nEnriched data saved to: ${outName}`);
|
||||
console.log(`Next step: node submitter.mjs ${outName}`);
|
||||
}
|
||||
|
||||
main().catch((e) => {
|
||||
console.error(e);
|
||||
process.exit(1);
|
||||
});
|
||||
177
scripts/node/submitter.mjs
Normal file
177
scripts/node/submitter.mjs
Normal file
@@ -0,0 +1,177 @@
|
||||
/**
|
||||
* submitter.mjs — 数据提交脚本 (Node.js)
|
||||
*
|
||||
* 读取 enricher.mjs 输出的 JSON 文件,按批次提交到目标接口。
|
||||
* 记录提交成功和失败的记录到单独的日志文件。
|
||||
*
|
||||
* 用法:
|
||||
* node submitter.mjs enriched_data.json
|
||||
* node submitter.mjs enriched_data.json --batch-size 100
|
||||
* node submitter.mjs enriched_data.json --url https://api.example.com/import
|
||||
* node submitter.mjs enriched_data.json --dry-run
|
||||
*/
|
||||
|
||||
import { readFileSync, writeFileSync } from "fs";
|
||||
|
||||
// ── args ──
|
||||
|
||||
function parseArgs() {
|
||||
const args = process.argv.slice(2);
|
||||
const opts = { input: "", url: "", method: "", batchSize: 0, dryRun: false };
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
if (args[i] === "--url") {
|
||||
opts.url = args[++i];
|
||||
} else if (args[i] === "--method") {
|
||||
opts.method = args[++i];
|
||||
} else if (args[i] === "--batch-size") {
|
||||
opts.batchSize = Number(args[++i]) || 0;
|
||||
} else if (args[i] === "--dry-run") {
|
||||
opts.dryRun = true;
|
||||
} else if (args[i] === "--help" || args[i] === "-h") {
|
||||
console.log("Usage: node submitter.mjs <enriched.json> [--url URL] [--method POST|PUT] [--batch-size N] [--dry-run]");
|
||||
process.exit(0);
|
||||
} else if (!opts.input) {
|
||||
opts.input = args[i];
|
||||
}
|
||||
}
|
||||
if (!opts.input) {
|
||||
console.error("Error: Please provide an enriched JSON file path.");
|
||||
console.error("Usage: node submitter.mjs <enriched.json>");
|
||||
process.exit(1);
|
||||
}
|
||||
return opts;
|
||||
}
|
||||
|
||||
// ── submit ──
|
||||
|
||||
async function submitBatch(url, method, batch, batchIndex, totalBatches) {
|
||||
try {
|
||||
const resp = await fetch(url, {
|
||||
method,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(batch),
|
||||
});
|
||||
const body = await resp.text();
|
||||
const ok = resp.status < 400;
|
||||
const tag = ok ? "OK" : "FAIL";
|
||||
console.log(` Batch ${batchIndex}/${totalBatches}: HTTP ${resp.status} ${tag} (${batch.length} records)`);
|
||||
return { ok, status: resp.status, body, data: batch };
|
||||
} catch (e) {
|
||||
const msg = e.message;
|
||||
console.log(` Batch ${batchIndex}/${totalBatches}: ERROR - ${msg} (${batch.length} records)`);
|
||||
return { ok: false, status: 0, body: msg, data: batch };
|
||||
}
|
||||
}
|
||||
|
||||
// ── main ──
|
||||
|
||||
async function main() {
|
||||
const opts = parseArgs();
|
||||
|
||||
const raw = readFileSync(opts.input, "utf-8");
|
||||
const payload = JSON.parse(raw);
|
||||
|
||||
const data = payload.data || [];
|
||||
const submission = payload.submission || {};
|
||||
|
||||
const targetUrl = opts.url || submission.target_url || "";
|
||||
const method = (opts.method || submission.method || "POST").toUpperCase();
|
||||
const batchSize = opts.batchSize || submission.batch_size || 50;
|
||||
|
||||
if (!targetUrl) {
|
||||
console.error("Error: No target URL configured. Use --url or set in job bundle.");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log("=== Submitter ===");
|
||||
console.log(`Input: ${opts.input} (${data.length} records)`);
|
||||
console.log(`Target: ${method} ${targetUrl}`);
|
||||
console.log(`Batch size: ${batchSize}`);
|
||||
console.log();
|
||||
|
||||
// 分批
|
||||
const batches = [];
|
||||
for (let i = 0; i < data.length; i += batchSize) {
|
||||
batches.push(data.slice(i, i + batchSize));
|
||||
}
|
||||
const totalBatches = batches.length;
|
||||
|
||||
if (opts.dryRun) {
|
||||
console.log(`[DRY RUN] Would submit ${data.length} records in ${totalBatches} batch(es)`);
|
||||
for (let i = 0; i < batches.length; i++) {
|
||||
console.log(` Batch ${i + 1}/${totalBatches}: ${batches[i].length} records`);
|
||||
}
|
||||
console.log("\nDry run complete. No data was sent.");
|
||||
return;
|
||||
}
|
||||
|
||||
/** @type {object[]} */
|
||||
const successRecords = [];
|
||||
/** @type {object[]} */
|
||||
const failedRecords = [];
|
||||
/** @type {object[]} */
|
||||
const failedDetails = [];
|
||||
|
||||
console.log(`Submitting ${data.length} records in ${totalBatches} batch(es)...\n`);
|
||||
|
||||
for (let i = 0; i < batches.length; i++) {
|
||||
const result = await submitBatch(targetUrl, method, batches[i], i + 1, totalBatches);
|
||||
if (result.ok) {
|
||||
successRecords.push(...result.data);
|
||||
} else {
|
||||
failedRecords.push(...result.data);
|
||||
failedDetails.push({
|
||||
batch_index: i + 1,
|
||||
status: result.status,
|
||||
response: result.body.slice(0, 500),
|
||||
record_count: result.data.length,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 结果统计
|
||||
console.log();
|
||||
console.log("=== Result ===");
|
||||
console.log(`Success: ${successRecords.length} records`);
|
||||
console.log(`Failed: ${failedRecords.length} records`);
|
||||
|
||||
const timestamp = new Date().toISOString().replace(/[-:T]/g, "").slice(0, 15);
|
||||
const baseName = opts.input.replace(/\.json$/i, "");
|
||||
|
||||
if (successRecords.length > 0) {
|
||||
const successFile = `${baseName}_success_${timestamp}.json`;
|
||||
writeFileSync(successFile, JSON.stringify(successRecords, null, 2), "utf-8");
|
||||
console.log(`\nSuccess log: ${successFile}`);
|
||||
}
|
||||
|
||||
if (failedRecords.length > 0) {
|
||||
const failedFile = `${baseName}_failed_${timestamp}.json`;
|
||||
const report = {
|
||||
summary: {
|
||||
total_failed: failedRecords.length,
|
||||
failed_batches: failedDetails.length,
|
||||
target_url: targetUrl,
|
||||
timestamp,
|
||||
},
|
||||
batch_errors: failedDetails,
|
||||
failed_records: failedRecords,
|
||||
};
|
||||
writeFileSync(failedFile, JSON.stringify(report, null, 2), "utf-8");
|
||||
console.log(`Failed log: ${failedFile}`);
|
||||
|
||||
const retryFile = `${baseName}_retry_${timestamp}.json`;
|
||||
const retryPayload = { submission, data: failedRecords };
|
||||
writeFileSync(retryFile, JSON.stringify(retryPayload, null, 2), "utf-8");
|
||||
console.log(`\nTo retry failed records:`);
|
||||
console.log(` node submitter.mjs ${retryFile}`);
|
||||
}
|
||||
|
||||
if (successRecords.length === 0 && failedRecords.length === 0) {
|
||||
console.log("\nNo records to submit.");
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((e) => {
|
||||
console.error(e);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user