diff --git a/CLAUDE.md b/CLAUDE.md index 85f13e0..807a4e9 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,14 +4,15 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project Overview -**excel2json** — a SvelteKit web application for converting Excel files to JSON. Built with Svelte 5, SvelteKit 2, TypeScript, and Tailwind CSS 4. +**excel2json** — a SvelteKit web application for converting Excel files to JSON with visual mapping configuration. Built with Svelte 5, SvelteKit 2, TypeScript, and Tailwind CSS 4. ## Commands -- `npm run dev` — start dev server +- `npm run dev` — start dev server (typically http://localhost:5173) - `npm run build` — production build - `npm run preview` — preview production build - `npm run check` — type-check with svelte-check +- `npm run check:watch` — type-check in watch mode - `npm run test` — run all tests once - `npm run test:unit` — run tests in watch mode - `npm run test:unit -- --run --testNamePattern="pattern"` — run a single test by name @@ -22,10 +23,11 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co - **Styling**: Tailwind CSS 4 with `@tailwindcss/forms` and `@tailwindcss/typography` plugins, configured via `src/routes/layout.css` - **Adapter**: `@sveltejs/adapter-auto` - **TypeScript**: strict mode enabled +- **Dependencies**: `xlsx` (SheetJS) for Excel parsing, `dayjs` for date handling ### Testing -Two Vitest project configurations in `vite.config.ts`: +Two Vitest project configurations in `vitest.config.ts`: - **`client`** — browser tests using Playwright (headless Chromium). Files matching `src/**/*.svelte.{test,spec}.{js,ts}`. Uses `vitest-browser-svelte` for component rendering. - **`server`** — Node.js unit tests. Files matching `src/**/*.{test,spec}.{js,ts}` (excluding `.svelte.` test files). @@ -38,6 +40,32 @@ All tests require assertions (`expect.requireAssertions: true`). - Shared library code goes in `src/lib/` (aliased as `$lib`) - Use Svelte 5 runes syntax, not legacy Svelte 4 patterns +### Core Application Structure + +**Single-page application** with split-pane layout: + +- `src/routes/+page.svelte` — Main page containing all application logic +- `src/lib/components/ExcelTable.svelte` — Left panel showing Excel data with column configuration +- `src/lib/components/JsonPreview.svelte` — Right panel showing JSON output with syntax highlighting +- `src/lib/components/ColumnConfig.svelte` — Modal for column mapping configuration +- `src/lib/components/ApiConfigModal.svelte` — API enrichment rules configuration +- `src/lib/components/SubmissionSettings.svelte` — Data submission settings + +**Core libraries:** + +- `src/lib/excel.ts` — Excel/CSV reading and parsing +- `src/lib/converter.ts` — Mapping conversion core logic +- `src/lib/types.ts` — TypeScript type definitions (`MappingConfig`, `RowData`, etc.) + +**Data flow:** File upload → Excel parsing → Mapping configuration → JSON conversion → Preview/Export + +**Key features:** +- Nested object support via dot notation (e.g., `user.address.city`) +- Date formatting including Excel serial date compatibility +- Empty value handling (exclude field or use default value) +- Template import/export for mapping configurations +- API enrichment for dynamic data fetching + ## Svelte MCP Server A Svelte MCP server is available for Svelte 5 / SvelteKit documentation lookup and code validation. When writing Svelte code: diff --git a/DEMAND-字典映射.md b/DEMAND-字典映射.md new file mode 100644 index 0000000..67a3fed --- /dev/null +++ b/DEMAND-字典映射.md @@ -0,0 +1,162 @@ + + +这次更新加入了 **“字典映射 (Dictionary Mapping)”** 功能,允许用户对有限的枚举值进行转换(如“是/否”转“Y/N”)。 + +我特别在文档中强调了**“自动扫描”**功能——让 UI 自动读取 Excel 列中的前几十行去重数据,自动列出所有可能的选项,用户只需要填右边的目标值即可,极大提升体验。 + +--- + +### 复制下面的内容发送给 Claude: + +--- + +**Role:** 资深前端架构师 (Svelte 5 + TypeScript) + +**Project Context:** +我们正在开发 "Excel2JSON ETL Blueprint Generator"。 +前端负责解析 Excel、配置映射规则(含静态重命名、字典转换、动态 API)、生成预览,并最终导出包含 **[处理后的基础数据 + API 执行逻辑]** 的 `job_bundle.json`。 + +**Current Task:** +我们需要在 **v2.0 (API Enrichment)** 的基础上,新增 **v2.1 (Dictionary/Enum Mapping)** 功能。 + +### Phase 2.1: 字典映射与高级 ETL 配置需求文档 + +#### 1. 核心功能变更 + +在静态字段映射(Static Mapping)阶段,新增 **"值映射 (Value Map)"** 功能。 + +* **场景:** Excel 中某列是状态词(如 "开/关", "男/女"),目标 JSON 需要代码(如 `1/0`, `M/F`)。 +* **逻辑:** 解析 Excel -> **字典替换** -> 类型转换 -> 生成基础 JSON -> (后续由 Python 处理 API). + +#### 2. 用户界面与交互 (UI/UX) + +##### 2.1 列配置面板升级 + +点击 Excel 表头配置时,除了修改 `Target Key` 和 `Data Type`,新增一个 **"Value Mapping" (值映射)** 开关/折叠面板。 + +**面板内容:** + +1. **自动扫描 (Auto-Scan):** +* UI 自动读取该列的前 50 行数据,提取所有**唯一值 (Unique Values)**。 +* 显示一个“映射表”:左侧是 `Source Value` (Excel 原值),右侧是 `Target Value` (输入框)。 + + +2. **手动添加:** 允许用户手动增加新的映射对(防止前 50 行没覆盖到所有情况)。 +3. **默认值 (Fallback):** +* 如果单元格的值不在映射表中,怎么处理? +* 选项: `Keep Original` (保留原值) / `Set to Null` / `Custom Value` (自定义默认值)。 + + + +##### 2.2 预览逻辑 (Preview Logic) + +* 右侧 JSON 预览必须**实时反映**字典映射的结果。 +* *示例:* 用户在左侧把 "是" 映射为 `true` (Boolean),右侧预览中原本的 "是" 应立即变为 `true`。 + +#### 3. 核心数据结构 (Updated Interfaces) + +请更新 TypeScript 接口以支持新的映射逻辑: + +```typescript +// 字典映射项 +interface ValueMapItem { + source: string | number; // Excel 里的原始值 (e.g., "是") + target: any; // JSON 里的目标值 (e.g., true, "Y", 1) +} + +// 静态映射规则 (升级版) +interface StaticRule { + type: 'static'; + source_column: string; // Excel 原表头 + target_key: string; // JSON 目标 Key + data_type: 'string' | 'number' | 'boolean' | 'date' | 'array'; // 目标类型 + + // v2.1 新增: 字典映射配置 + use_dictionary: boolean; // 是否启用字典映射 + value_mapping?: ValueMapItem[]; + mapping_fallback?: 'keep' | 'null' | any; // 没匹配到时的默认值 + + // v2.0 已有 + format?: string; // 日期格式 + separator?: string; // 数组分隔符 +} + +// 动态 API 规则 (保持 v2.0 不变) +interface ApiEnrichmentRule { + type: 'api_fetch'; + target_key: string; + url_template: string; // "https://api.com/{{id}}" + method: 'GET' | 'POST'; + headers?: Record; + body_template?: string; + response_path: string; // "data.result" +} + +// 提交配置 (保持 v2.0 不变) +interface SubmissionConfig { + target_url: string; + method: 'POST' | 'PUT'; + batch_size: number; +} + +// 最终导出的 Job Bundle +interface JobBundle { + meta: { version: string; generated_at: string }; + config: { + // static_rules 仅用于前端回显,Python 脚本其实只需要 enrichment 和 submission + // 但为了以后能在前端重新导入编辑,建议保留完整配置 + static_rules: StaticRule[]; + enrichment_rules: ApiEnrichmentRule[]; + submission: SubmissionConfig; + }; + // 注意: source_data 是前端已经应用了 "StaticRule" (包括字典映射) 后的干净数据 + source_data: Record[]; +} + +``` + +#### 4. 处理流程 (Processing Pipeline) + +前端在生成 `source_data` 时,必须严格按照以下顺序处理每一单元格: + +1. **Extract:** 读取 Excel 单元格原始值。 +2. **Map (字典映射):** +* 如果启用了 `use_dictionary`:查找映射表。 +* 找到 -> 替换为 Target Value。 +* 没找到 -> 应用 `mapping_fallback` 策略。 + + +3. **Cast (类型转换):** +* 将上一步的结果转换为 `data_type` 指定的类型 (e.g., String -> Boolean, String -> Number)。 +* *注意:* 如果字典映射的目标值已经是正确的类型(如 `true`),则跳过此步或确保不会再次转为字符串 "true"。 + + +4. **Format:** (如果是日期或数组) 应用格式化规则。 + +#### 5. 开发任务清单 + +1. **组件开发:** +* 修改 `ColumnConfigPanel.svelte` (或类似组件)。 +* 新增 `DictionaryMapper` 子组件:包含“自动扫描”按钮和“键值对”编辑表格。 + + +2. **逻辑核心:** +* 更新 `processRow` 函数,在类型转换前插入字典查找逻辑。 +* 实现 `scanUniqueValues(columnData)` 函数,用于快速提取 Excel 列的去重值。 + + +3. **预览同步:** 确保右侧 JSON 预览能实时响应字典配置的变更。 +4. **导出验证:** 导出 `job_bundle.json`,检查 `source_data` 中的值是否已成功转换为映射后的值(例如 "Y" 而不是 "是")。 + +--- + +### 给 AI 的提示 (Prompt Tip) + +* **性能注意:** 自动扫描 `scanUniqueValues` 时,如果 Excel 数据量极大(>10万行),不要全量扫描。只扫描前 1000 行即可,并提示用户“仅扫描了前 1000 行,如有遗漏请手动添加”。 +* **交互细节:** 字典映射的 Target Value 输入框,应该能智能识别类型。如果用户输入 `true`,应该被识别为 Boolean 而不是字符串 "true"。 + +--- + +### 执行步骤 + +请先基于上述文档,更新 **数据类型定义 (Interfaces)** 和 **核心处理逻辑 (`processRow` 函数)**。 \ No newline at end of file diff --git a/src/lib/components/ColumnConfig.svelte b/src/lib/components/ColumnConfig.svelte index 4232dac..57ac12f 100644 --- a/src/lib/components/ColumnConfig.svelte +++ b/src/lib/components/ColumnConfig.svelte @@ -1,7 +1,16 @@ -
+

列配置

- + {:else} + + {/if} +
+ + {#if config.useDictionary} + + + + + {#if config.valueMapping.length > 0} +
+ + + + + + + + + + {#each config.valueMapping as item, index (index)} + + + + + + {/each} + +
目标
{String(item.source)} + updateTargetValue(index, e.currentTarget.value)} + class="w-full rounded border border-gray-300 px-1 py-0.5 text-xs focus:border-blue-500 focus:ring-1 focus:ring-blue-500" + /> + + +
+
+ {:else} +

暂无映射项

+ {/if} + + +
+ + + +
+ + +
+ 未匹配: + +
+ + + {#if config.mappingFallback === 'custom'} + + {/if} + {/if} +
diff --git a/src/lib/components/ExcelTable.svelte b/src/lib/components/ExcelTable.svelte index 2747e39..67b4665 100644 --- a/src/lib/components/ExcelTable.svelte +++ b/src/lib/components/ExcelTable.svelte @@ -131,7 +131,7 @@ {/if}
{#if activeConfigIndex === i && mappings[i]} - (activeConfigIndex = null)} /> + (activeConfigIndex = null)} /> {/if} {/each} diff --git a/src/lib/converter.ts b/src/lib/converter.ts index 04b3270..ae900d1 100644 --- a/src/lib/converter.ts +++ b/src/lib/converter.ts @@ -7,7 +7,9 @@ import type { ApiEnrichmentRule, SubmissionConfig, StaticRule, - JobBundle + JobBundle, + ValueMapItem, + MappingFallback } from './types.js'; /** @@ -17,31 +19,107 @@ function isEmpty(value: unknown): boolean { return value === undefined || value === null || value === ''; } +/** + * Parse a target value string into the appropriate type. + * Intelligently detects boolean, number, null, and JSON. + */ +function parseTargetValue(value: string): unknown { + if (value === 'null' || value === '') return null; + if (value === 'true') return true; + if (value === 'false') return false; + if (value === 'undefined') return undefined; + + // Try parsing as number + const num = Number(value); + if (!isNaN(num) && value !== '') return num; + + // Try parsing as JSON object/array + if ((value.startsWith('{') && value.endsWith('}')) || (value.startsWith('[') && value.endsWith(']'))) { + try { + return JSON.parse(value); + } catch { + // Not valid JSON, return as string + } + } + + return value; +} + +/** + * Apply dictionary mapping to a value. + * Returns the mapped value or applies the fallback strategy. + */ +function applyDictionaryMapping( + value: unknown, + valueMapping: ValueMapItem[], + fallback: MappingFallback, + customValue?: string +): unknown { + if (value === undefined || value === null) { + return fallback === 'null' ? null : value; + } + + const normalizedValue = typeof value === 'object' ? JSON.stringify(value) : String(value); + + // Try to find exact match in mapping + const mappedItem = valueMapping.find( + (item) => String(item.source) === normalizedValue + ); + + if (mappedItem) { + return mappedItem.target; + } + + // Apply fallback strategy + switch (fallback) { + case 'keep': + return value; + case 'null': + return null; + case 'custom': + return customValue !== undefined ? parseTargetValue(customValue) : null; + default: + return value; + } +} + /** * Convert a raw cell value to the specified data type. */ -function convertValue(value: unknown, type: DataType, format?: string): unknown { +function convertValue( + value: unknown, + type: DataType, + format?: string, + mapping?: { enabled: boolean; items: ValueMapItem[]; fallback: MappingFallback; customValue?: string } +): unknown { if (isEmpty(value)) return undefined; + // Apply dictionary mapping first (before type conversion) + let processedValue = value; + if (mapping?.enabled && mapping.items.length > 0) { + processedValue = applyDictionaryMapping(value, mapping.items, mapping.fallback, mapping.customValue); + if (processedValue === null && mapping.fallback === 'null') return null; + } + switch (type) { case 'number': { - const num = Number(value); - return isNaN(num) ? value : num; + const num = Number(processedValue); + return isNaN(num) ? processedValue : num; } case 'boolean': { - if (typeof value === 'boolean') return value; - const str = String(value).toLowerCase().trim(); + if (typeof processedValue === 'boolean') return processedValue; + const str = String(processedValue).toLowerCase().trim(); if (['true', '1', 'yes', '是'].includes(str)) return true; if (['false', '0', 'no', '否'].includes(str)) return false; - return Boolean(value); + return Boolean(processedValue); } case 'date': { - return formatDate(value, format); + return formatDate(processedValue, format); } case 'string': default: - if (value instanceof Date) return dayjs(value).format('YYYY-MM-DD HH:mm:ss'); - return String(value); + if (processedValue instanceof Date) return dayjs(processedValue).format('YYYY-MM-DD HH:mm:ss'); + return String(processedValue); } } @@ -119,9 +197,29 @@ export function convertData( const finalValue = isEmptyVal ? (mapping.defaultValue !== undefined && mapping.defaultValue !== '' - ? convertValue(mapping.defaultValue, mapping.type, mapping.format) + ? convertValue( + mapping.defaultValue, + mapping.type, + mapping.format, + mapping.useDictionary ? { + enabled: true, + items: mapping.valueMapping ?? [], + fallback: mapping.mappingFallback ?? 'keep', + customValue: mapping.mappingCustomValue + } : undefined + ) : null) - : convertValue(rawValue, mapping.type, mapping.format); + : convertValue( + rawValue, + mapping.type, + mapping.format, + mapping.useDictionary ? { + enabled: true, + items: mapping.valueMapping ?? [], + fallback: mapping.mappingFallback ?? 'keep', + customValue: mapping.mappingCustomValue + } : undefined + ); setNested(obj, mapping.target, finalValue); } @@ -152,6 +250,27 @@ export function createDefaultMappings(headers: string[], rows?: RowData[]): Mapp /** * Detect the data type of a column by sampling its values. */ + +/** + * Scan unique values from a column for auto-filling dictionary mapping. + * Limits to first 1000 rows for performance. + */ +export function scanUniqueValues(header: string, rows: RowData[]): (string | number)[] { + const MAX_ROWS = 1000; + const sample = rows.slice(0, MAX_ROWS); + const uniqueValues = new Set(); + + for (const row of sample) { + const val = row[header]; + if (val !== undefined && val !== null && val !== '') { + const normalized = typeof val === 'object' ? JSON.stringify(val) : (val as string | number); + uniqueValues.add(normalized); + } + } + + return Array.from(uniqueValues).sort(); +} + function detectColumnType(header: string, rows: RowData[]): DataType { const sample = rows.slice(0, 20); let dateCount = 0; @@ -186,7 +305,12 @@ export function applyTemplate( type: tmpl.type, format: tmpl.format, excludeIfEmpty: tmpl.excludeIfEmpty, - defaultValue: tmpl.defaultValue ?? '' + defaultValue: tmpl.defaultValue ?? '', + // v2.1: Apply dictionary mapping settings + useDictionary: tmpl.useDictionary, + valueMapping: tmpl.valueMapping, + mappingFallback: tmpl.mappingFallback, + mappingCustomValue: tmpl.mappingCustomValue }; } return mapping; @@ -197,10 +321,15 @@ export function applyTemplate( * Export current mappings as a template. */ export function exportTemplate(mappings: MappingConfig[]): MappingTemplate { - return mappings.map(({ source, target, type, format, excludeIfEmpty, defaultValue }) => { + return mappings.map(({ source, target, type, format, excludeIfEmpty, defaultValue, useDictionary, valueMapping, mappingFallback, mappingCustomValue }) => { const entry: MappingTemplate[number] = { source, target, type, excludeIfEmpty }; if (type === 'date' && format) entry.format = format; if (defaultValue) entry.defaultValue = defaultValue; + // v2.1: Export dictionary mapping settings + if (useDictionary) entry.useDictionary = true; + if (valueMapping && valueMapping.length > 0) entry.valueMapping = valueMapping; + if (mappingFallback) entry.mappingFallback = mappingFallback; + if (mappingCustomValue) entry.mappingCustomValue = mappingCustomValue; return entry; }); } @@ -219,6 +348,15 @@ function toStaticRules(mappings: MappingConfig[]): StaticRule[] { dataType: m.type }; if (m.type === 'date' && m.format) rule.format = m.format; + // v2.1: Include dictionary mapping properties + if (m.useDictionary && m.valueMapping && m.valueMapping.length > 0) { + rule.useDictionary = true; + rule.valueMapping = m.valueMapping; + rule.mappingFallback = m.mappingFallback ?? 'keep'; + if (m.mappingFallback === 'custom') { + rule.mappingCustomValue = m.mappingCustomValue; + } + } return rule; }); } diff --git a/src/lib/types.ts b/src/lib/types.ts index b404ead..bdc83aa 100644 --- a/src/lib/types.ts +++ b/src/lib/types.ts @@ -1,5 +1,14 @@ export type DataType = 'string' | 'number' | 'boolean' | 'date'; +/** Mapping fallback strategy when value is not found in dictionary */ +export type MappingFallback = 'keep' | 'null' | 'custom'; + +/** Dictionary mapping item - maps source value to target value */ +export interface ValueMapItem { + source: string | number; + target: unknown; +} + export type DateFormat = | 'YYYY-MM-DD' | 'YYYY/MM/DD' @@ -23,6 +32,14 @@ export interface MappingConfig { defaultValue?: string; /** Whether this column is included in output */ enabled: boolean; + /** v2.1: Enable dictionary/value mapping for this column */ + useDictionary?: boolean; + /** v2.1: Value mapping dictionary */ + valueMapping?: ValueMapItem[]; + /** v2.1: Fallback strategy when value not found in mapping */ + mappingFallback?: MappingFallback; + /** v2.1: Custom fallback value (only used when mappingFallback is 'custom') */ + mappingCustomValue?: string; } /** A single row of raw Excel data, keyed by original header */ @@ -31,7 +48,7 @@ export type RowData = Record; /** Template file structure for import/export */ export type MappingTemplate = Pick< MappingConfig, - 'source' | 'target' | 'type' | 'format' | 'excludeIfEmpty' | 'defaultValue' + 'source' | 'target' | 'type' | 'format' | 'excludeIfEmpty' | 'defaultValue' | 'useDictionary' | 'valueMapping' | 'mappingFallback' | 'mappingCustomValue' >[]; /** Static mapping rule in Job Bundle output */ @@ -41,6 +58,14 @@ export interface StaticRule { target: string; dataType: DataType; format?: string; + /** v2.1: Enable dictionary/value mapping */ + useDictionary?: boolean; + /** v2.1: Value mapping dictionary */ + valueMapping?: ValueMapItem[]; + /** v2.1: Fallback strategy when value not found in mapping */ + mappingFallback?: MappingFallback; + /** v2.1: Custom fallback value */ + mappingCustomValue?: string; } /** Dynamic API enrichment rule */