feat: initialize MCP RAG Prompts server with embedding management
- Add package.json for project configuration and dependencies. - Create src/index.ts as the entry point for the MCP server. - Implement vectorStore for managing embeddings with local and cloud providers. - Add embeddingProviders for local and cloud-based embedding services (OpenAI, Aliyun, SiliconFlow). - Define types for prompts and embeddings in types.ts. - Implement searchPersona tool for semantic search of expert personas. - Create test.ts for validating vector storage and search functionality. - Configure TypeScript with tsconfig.json for strict type checking and module resolution.
This commit is contained in:
124
src/index.ts
Normal file
124
src/index.ts
Normal file
@@ -0,0 +1,124 @@
|
||||
/**
|
||||
* MCP RAG Prompts Server 入口文件
|
||||
* 基于 MCP 协议的 RAG 提示词管理服务器
|
||||
*/
|
||||
|
||||
// 加载 .env 文件(必须在最前面)
|
||||
import 'dotenv/config';
|
||||
|
||||
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
||||
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
||||
import {
|
||||
CallToolRequestSchema,
|
||||
ListToolsRequestSchema,
|
||||
} from '@modelcontextprotocol/sdk/types.js';
|
||||
|
||||
import { vectorStore } from './lib/vectorStore.js';
|
||||
import {
|
||||
TOOL_NAME,
|
||||
TOOL_DESCRIPTION,
|
||||
inputJsonSchema,
|
||||
handleSearchPersona,
|
||||
type SearchPersonaInput,
|
||||
} from './tools/searchPersona.js';
|
||||
|
||||
/**
|
||||
* 创建并配置 MCP Server
|
||||
*/
|
||||
function createServer(): Server {
|
||||
const server = new Server(
|
||||
{
|
||||
name: 'mcp-rag-prompts',
|
||||
version: '1.0.0',
|
||||
},
|
||||
{
|
||||
capabilities: {
|
||||
tools: {},
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
/**
|
||||
* 处理 tools/list 请求
|
||||
* 返回服务器提供的所有工具列表
|
||||
*/
|
||||
server.setRequestHandler(ListToolsRequestSchema, async () => {
|
||||
console.error('[Server] 收到 tools/list 请求');
|
||||
|
||||
return {
|
||||
tools: [
|
||||
{
|
||||
name: TOOL_NAME,
|
||||
description: TOOL_DESCRIPTION,
|
||||
inputSchema: inputJsonSchema,
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
|
||||
/**
|
||||
* 处理 tools/call 请求
|
||||
* 执行指定的工具并返回结果
|
||||
*/
|
||||
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
||||
const { name, arguments: args } = request.params;
|
||||
|
||||
console.error(`[Server] 收到 tools/call 请求: ${name}`);
|
||||
console.error(`[Server] 参数: ${JSON.stringify(args)}`);
|
||||
|
||||
// 路由到对应的工具处理函数
|
||||
switch (name) {
|
||||
case TOOL_NAME:
|
||||
return handleSearchPersona(args as SearchPersonaInput);
|
||||
|
||||
default:
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify({
|
||||
success: false,
|
||||
error: `未知工具: ${name}`,
|
||||
}),
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
});
|
||||
|
||||
return server;
|
||||
}
|
||||
|
||||
/**
|
||||
* 主函数:初始化并启动服务器
|
||||
*/
|
||||
async function main(): Promise<void> {
|
||||
console.error('='.repeat(50));
|
||||
console.error('MCP RAG Prompts Server 启动中...');
|
||||
console.error('='.repeat(50));
|
||||
|
||||
try {
|
||||
// 1. 初始化向量存储(加载数据并生成 embeddings)
|
||||
console.error('\n[Main] 步骤 1: 初始化向量存储');
|
||||
await vectorStore.initialize();
|
||||
|
||||
// 2. 创建 MCP Server
|
||||
console.error('\n[Main] 步骤 2: 创建 MCP Server');
|
||||
const server = createServer();
|
||||
|
||||
// 3. 创建 Stdio Transport 并连接
|
||||
console.error('\n[Main] 步骤 3: 启动 Stdio Transport');
|
||||
const transport = new StdioServerTransport();
|
||||
await server.connect(transport);
|
||||
|
||||
console.error('\n[Main] ✓ MCP Server 已启动,等待客户端连接...');
|
||||
console.error('='.repeat(50));
|
||||
} catch (error) {
|
||||
console.error('\n[Main] ✗ 启动失败:', error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// 启动服务器
|
||||
main();
|
||||
305
src/lib/embeddingProviders.ts
Normal file
305
src/lib/embeddingProviders.ts
Normal file
@@ -0,0 +1,305 @@
|
||||
/**
|
||||
* Embedding 提供者实现
|
||||
* 支持本地模型和多种云服务 API
|
||||
*/
|
||||
|
||||
import type { IEmbeddingProvider, EmbeddingConfig } from './types.js';
|
||||
|
||||
// ============================================================
|
||||
// 本地模型提供者 (使用 @xenova/transformers)
|
||||
// ============================================================
|
||||
export class LocalEmbeddingProvider implements IEmbeddingProvider {
|
||||
readonly name = 'local';
|
||||
private modelName: string;
|
||||
private extractor: any = null;
|
||||
|
||||
constructor(modelName: string = 'Xenova/paraphrase-multilingual-MiniLM-L12-v2') {
|
||||
this.modelName = modelName;
|
||||
}
|
||||
|
||||
async initialize(): Promise<void> {
|
||||
if (this.extractor) return;
|
||||
|
||||
console.error(`[LocalProvider] 正在加载本地模型: ${this.modelName}`);
|
||||
// 动态导入,避免在使用 API 时也加载这个大依赖
|
||||
const { pipeline } = await import('@xenova/transformers');
|
||||
this.extractor = await pipeline('feature-extraction', this.modelName);
|
||||
console.error(`[LocalProvider] 模型加载完成`);
|
||||
}
|
||||
|
||||
async embed(text: string): Promise<number[]> {
|
||||
if (!this.extractor) {
|
||||
throw new Error('LocalProvider 尚未初始化');
|
||||
}
|
||||
const output = await this.extractor(text, {
|
||||
pooling: 'mean',
|
||||
normalize: true,
|
||||
});
|
||||
return Array.from(output.data as Float32Array);
|
||||
}
|
||||
|
||||
async embedBatch(texts: string[]): Promise<number[][]> {
|
||||
// 本地模型逐个处理
|
||||
const results: number[][] = [];
|
||||
for (const text of texts) {
|
||||
results.push(await this.embed(text));
|
||||
}
|
||||
return results;
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// OpenAI 兼容 API 提供者
|
||||
// 支持 OpenAI 官方 API 和兼容接口(如 Azure OpenAI)
|
||||
// ============================================================
|
||||
export class OpenAIEmbeddingProvider implements IEmbeddingProvider {
|
||||
readonly name = 'openai';
|
||||
private apiKey: string;
|
||||
private baseUrl: string;
|
||||
private model: string;
|
||||
|
||||
constructor(config: { apiKey: string; baseUrl?: string; model?: string }) {
|
||||
this.apiKey = config.apiKey;
|
||||
this.baseUrl = config.baseUrl || 'https://api.openai.com/v1';
|
||||
this.model = config.model || 'text-embedding-3-small';
|
||||
}
|
||||
|
||||
async initialize(): Promise<void> {
|
||||
console.error(`[OpenAIProvider] 使用模型: ${this.model}`);
|
||||
console.error(`[OpenAIProvider] API 地址: ${this.baseUrl}`);
|
||||
}
|
||||
|
||||
async embed(text: string): Promise<number[]> {
|
||||
const response = await fetch(`${this.baseUrl}/embeddings`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
input: text,
|
||||
model: this.model,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.text();
|
||||
throw new Error(`OpenAI API 错误: ${response.status} - ${error}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
return data.data[0].embedding;
|
||||
}
|
||||
|
||||
async embedBatch(texts: string[]): Promise<number[][]> {
|
||||
// OpenAI 支持批量请求
|
||||
const response = await fetch(`${this.baseUrl}/embeddings`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
input: texts,
|
||||
model: this.model,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.text();
|
||||
throw new Error(`OpenAI API 错误: ${response.status} - ${error}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
// 按 index 排序确保顺序正确
|
||||
return data.data
|
||||
.sort((a: any, b: any) => a.index - b.index)
|
||||
.map((item: any) => item.embedding);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// 阿里云百炼 DashScope API 提供者
|
||||
// ============================================================
|
||||
export class AliyunEmbeddingProvider implements IEmbeddingProvider {
|
||||
readonly name = 'aliyun';
|
||||
private apiKey: string;
|
||||
private model: string;
|
||||
private baseUrl = 'https://dashscope.aliyuncs.com/api/v1/services/embeddings/text-embedding/text-embedding';
|
||||
|
||||
constructor(config: { apiKey: string; model?: string }) {
|
||||
this.apiKey = config.apiKey;
|
||||
this.model = config.model || 'text-embedding-v3';
|
||||
}
|
||||
|
||||
async initialize(): Promise<void> {
|
||||
console.error(`[AliyunProvider] 使用模型: ${this.model}`);
|
||||
}
|
||||
|
||||
async embed(text: string): Promise<number[]> {
|
||||
const response = await fetch(this.baseUrl, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: this.model,
|
||||
input: {
|
||||
texts: [text],
|
||||
},
|
||||
parameters: {
|
||||
text_type: 'query',
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.text();
|
||||
throw new Error(`阿里云 API 错误: ${response.status} - ${error}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
if (data.output?.embeddings?.[0]?.embedding) {
|
||||
return data.output.embeddings[0].embedding;
|
||||
}
|
||||
|
||||
throw new Error(`阿里云 API 返回格式错误: ${JSON.stringify(data)}`);
|
||||
}
|
||||
|
||||
async embedBatch(texts: string[]): Promise<number[][]> {
|
||||
const response = await fetch(this.baseUrl, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: this.model,
|
||||
input: {
|
||||
texts: texts,
|
||||
},
|
||||
parameters: {
|
||||
text_type: 'query',
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.text();
|
||||
throw new Error(`阿里云 API 错误: ${response.status} - ${error}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
if (data.output?.embeddings) {
|
||||
return data.output.embeddings
|
||||
.sort((a: any, b: any) => a.text_index - b.text_index)
|
||||
.map((item: any) => item.embedding);
|
||||
}
|
||||
|
||||
throw new Error(`阿里云 API 返回格式错误: ${JSON.stringify(data)}`);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// SiliconFlow API 提供者
|
||||
// 硅基流动,支持多种开源 Embedding 模型
|
||||
// ============================================================
|
||||
export class SiliconFlowEmbeddingProvider implements IEmbeddingProvider {
|
||||
readonly name = 'siliconflow';
|
||||
private apiKey: string;
|
||||
private model: string;
|
||||
private baseUrl = 'https://api.siliconflow.cn/v1/embeddings';
|
||||
|
||||
constructor(config: { apiKey: string; model?: string }) {
|
||||
this.apiKey = config.apiKey;
|
||||
// 默认使用 BGE-M3,多语言效果好
|
||||
this.model = config.model || 'BAAI/bge-m3';
|
||||
}
|
||||
|
||||
async initialize(): Promise<void> {
|
||||
console.error(`[SiliconFlowProvider] 使用模型: ${this.model}`);
|
||||
}
|
||||
|
||||
async embed(text: string): Promise<number[]> {
|
||||
const response = await fetch(this.baseUrl, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: this.model,
|
||||
input: text,
|
||||
encoding_format: 'float',
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.text();
|
||||
throw new Error(`SiliconFlow API 错误: ${response.status} - ${error}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
return data.data[0].embedding;
|
||||
}
|
||||
|
||||
async embedBatch(texts: string[]): Promise<number[][]> {
|
||||
// SiliconFlow 支持批量(与 OpenAI 兼容)
|
||||
const response = await fetch(this.baseUrl, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: this.model,
|
||||
input: texts,
|
||||
encoding_format: 'float',
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.text();
|
||||
throw new Error(`SiliconFlow API 错误: ${response.status} - ${error}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
return data.data
|
||||
.sort((a: any, b: any) => a.index - b.index)
|
||||
.map((item: any) => item.embedding);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// 工厂函数:根据配置创建对应的提供者
|
||||
// ============================================================
|
||||
export function createEmbeddingProvider(config: EmbeddingConfig): IEmbeddingProvider {
|
||||
switch (config.provider) {
|
||||
case 'local':
|
||||
return new LocalEmbeddingProvider(config.local?.modelName);
|
||||
|
||||
case 'openai':
|
||||
if (!config.openai?.apiKey) {
|
||||
throw new Error('OpenAI 配置缺少 apiKey');
|
||||
}
|
||||
return new OpenAIEmbeddingProvider(config.openai);
|
||||
|
||||
case 'aliyun':
|
||||
if (!config.aliyun?.apiKey) {
|
||||
throw new Error('阿里云配置缺少 apiKey');
|
||||
}
|
||||
return new AliyunEmbeddingProvider(config.aliyun);
|
||||
|
||||
case 'siliconflow':
|
||||
if (!config.siliconflow?.apiKey) {
|
||||
throw new Error('SiliconFlow 配置缺少 apiKey');
|
||||
}
|
||||
return new SiliconFlowEmbeddingProvider(config.siliconflow);
|
||||
|
||||
default:
|
||||
throw new Error(`不支持的 Embedding 提供者: ${config.provider}`);
|
||||
}
|
||||
}
|
||||
84
src/lib/types.ts
Normal file
84
src/lib/types.ts
Normal file
@@ -0,0 +1,84 @@
|
||||
/**
|
||||
* 类型定义文件
|
||||
* 定义 Prompt 数据结构和向量存储相关类型
|
||||
*/
|
||||
|
||||
/**
|
||||
* 原始 Prompt 数据结构(从 JSON 文件读取)
|
||||
*/
|
||||
export interface PromptData {
|
||||
id: string;
|
||||
tags: string[];
|
||||
description: string;
|
||||
content: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* 带向量的 Prompt 数据(内存中缓存)
|
||||
*/
|
||||
export interface PromptWithEmbedding extends PromptData {
|
||||
embedding: number[];
|
||||
}
|
||||
|
||||
/**
|
||||
* 搜索结果结构
|
||||
*/
|
||||
export interface SearchResult {
|
||||
prompt: PromptData;
|
||||
similarity: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Embedding 提供者类型
|
||||
*/
|
||||
export type EmbeddingProvider = 'local' | 'openai' | 'aliyun' | 'siliconflow';
|
||||
|
||||
/**
|
||||
* Embedding 配置接口
|
||||
*/
|
||||
export interface EmbeddingConfig {
|
||||
/** 提供者类型 */
|
||||
provider: EmbeddingProvider;
|
||||
|
||||
/** 本地模型配置 */
|
||||
local?: {
|
||||
modelName: string;
|
||||
};
|
||||
|
||||
/** OpenAI 配置 */
|
||||
openai?: {
|
||||
apiKey: string;
|
||||
baseUrl?: string; // 支持自定义 base URL(如代理)
|
||||
model?: string; // 默认 text-embedding-3-small
|
||||
};
|
||||
|
||||
/** 阿里云百炼配置 */
|
||||
aliyun?: {
|
||||
apiKey: string;
|
||||
model?: string; // 默认 text-embedding-v3
|
||||
};
|
||||
|
||||
/** SiliconFlow 配置 */
|
||||
siliconflow?: {
|
||||
apiKey: string;
|
||||
model?: string; // 默认 BAAI/bge-m3
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Embedding 提供者接口
|
||||
* 所有 Embedding 实现都需要遵循此接口
|
||||
*/
|
||||
export interface IEmbeddingProvider {
|
||||
/** 提供者名称 */
|
||||
readonly name: string;
|
||||
|
||||
/** 初始化(如加载模型) */
|
||||
initialize(): Promise<void>;
|
||||
|
||||
/** 生成单个文本的向量 */
|
||||
embed(text: string): Promise<number[]>;
|
||||
|
||||
/** 批量生成向量(可选优化) */
|
||||
embedBatch?(texts: string[]): Promise<number[][]>;
|
||||
}
|
||||
232
src/lib/vectorStore.ts
Normal file
232
src/lib/vectorStore.ts
Normal file
@@ -0,0 +1,232 @@
|
||||
/**
|
||||
* 向量存储模块 (增强版)
|
||||
* 支持本地模型和云服务 API 切换
|
||||
* 核心功能:Hybrid Search (向量语义 + 关键词加权)
|
||||
*/
|
||||
|
||||
import { readFile } from 'fs/promises';
|
||||
import { fileURLToPath } from 'url';
|
||||
import { dirname, join } from 'path';
|
||||
import type {
|
||||
PromptData,
|
||||
PromptWithEmbedding,
|
||||
SearchResult,
|
||||
EmbeddingConfig,
|
||||
IEmbeddingProvider,
|
||||
} from './types.js';
|
||||
import { createEmbeddingProvider } from './embeddingProviders.js';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
|
||||
// 相似度阈值
|
||||
const SIMILARITY_THRESHOLD = 0.4;
|
||||
|
||||
// 关键词命中加分
|
||||
const KEYWORD_BOOST_SCORE = 0.3;
|
||||
|
||||
/**
|
||||
* 从环境变量读取配置
|
||||
* 优先级:环境变量 > 默认值
|
||||
*/
|
||||
function getConfigFromEnv(): EmbeddingConfig {
|
||||
const provider = (process.env.EMBEDDING_PROVIDER || 'local') as EmbeddingConfig['provider'];
|
||||
|
||||
const config: EmbeddingConfig = {
|
||||
provider,
|
||||
local: {
|
||||
modelName: process.env.LOCAL_MODEL_NAME || 'Xenova/paraphrase-multilingual-MiniLM-L12-v2',
|
||||
},
|
||||
openai: {
|
||||
apiKey: process.env.OPENAI_API_KEY || '',
|
||||
baseUrl: process.env.OPENAI_BASE_URL || 'https://api.openai.com/v1',
|
||||
model: process.env.OPENAI_EMBEDDING_MODEL || 'text-embedding-3-small',
|
||||
},
|
||||
aliyun: {
|
||||
apiKey: process.env.DASHSCOPE_API_KEY || '',
|
||||
model: process.env.ALIYUN_EMBEDDING_MODEL || 'text-embedding-v3',
|
||||
},
|
||||
siliconflow: {
|
||||
apiKey: process.env.SILICONFLOW_API_KEY || '',
|
||||
model: process.env.SILICONFLOW_EMBEDDING_MODEL || 'BAAI/bge-m3',
|
||||
},
|
||||
};
|
||||
|
||||
return config;
|
||||
}
|
||||
|
||||
/**
|
||||
* 向量存储类
|
||||
*/
|
||||
export class VectorStore {
|
||||
private prompts: PromptWithEmbedding[] = [];
|
||||
private embeddingProvider: IEmbeddingProvider | null = null;
|
||||
private initialized = false;
|
||||
private config: EmbeddingConfig;
|
||||
|
||||
constructor(config?: EmbeddingConfig) {
|
||||
// 使用传入配置或从环境变量读取
|
||||
this.config = config || getConfigFromEnv();
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算余弦相似度
|
||||
*/
|
||||
private cosineSimilarity(vecA: number[], vecB: number[]): number {
|
||||
if (vecA.length !== vecB.length) return 0;
|
||||
|
||||
let dotProduct = 0;
|
||||
let normA = 0;
|
||||
let normB = 0;
|
||||
|
||||
for (let i = 0; i < vecA.length; i++) {
|
||||
dotProduct += vecA[i] * vecB[i];
|
||||
normA += vecA[i] * vecA[i];
|
||||
normB += vecB[i] * vecB[i];
|
||||
}
|
||||
|
||||
const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
|
||||
return magnitude === 0 ? 0 : dotProduct / magnitude;
|
||||
}
|
||||
|
||||
/**
|
||||
* 初始化向量存储
|
||||
*/
|
||||
async initialize(): Promise<void> {
|
||||
if (this.initialized) return;
|
||||
|
||||
console.error('='.repeat(50));
|
||||
console.error('[VectorStore] 开始初始化...');
|
||||
console.error(`[VectorStore] Embedding 提供者: ${this.config.provider}`);
|
||||
|
||||
// 创建并初始化 Embedding 提供者
|
||||
this.embeddingProvider = createEmbeddingProvider(this.config);
|
||||
await this.embeddingProvider.initialize();
|
||||
|
||||
// 读取 Prompt 数据
|
||||
const dataPath = join(__dirname, '../../data/prompts.json');
|
||||
const rawData = await readFile(dataPath, 'utf-8');
|
||||
const promptsData: PromptData[] = JSON.parse(rawData);
|
||||
|
||||
console.error(`[VectorStore] 读取到 ${promptsData.length} 条 Prompt 数据`);
|
||||
|
||||
// 准备待向量化的文本
|
||||
const textsToEmbed = promptsData.map(
|
||||
(prompt) => `${prompt.tags.join(' ')} ${prompt.description}`
|
||||
);
|
||||
|
||||
// 生成向量(优先使用批量接口)
|
||||
let embeddings: number[][];
|
||||
|
||||
if (this.embeddingProvider.embedBatch) {
|
||||
console.error('[VectorStore] 使用批量向量化...');
|
||||
embeddings = await this.embeddingProvider.embedBatch(textsToEmbed);
|
||||
} else {
|
||||
console.error('[VectorStore] 逐个向量化...');
|
||||
embeddings = [];
|
||||
for (const text of textsToEmbed) {
|
||||
embeddings.push(await this.embeddingProvider.embed(text));
|
||||
}
|
||||
}
|
||||
|
||||
// 组装数据
|
||||
for (let i = 0; i < promptsData.length; i++) {
|
||||
this.prompts.push({
|
||||
...promptsData[i],
|
||||
embedding: embeddings[i],
|
||||
});
|
||||
}
|
||||
|
||||
this.initialized = true;
|
||||
console.error(`[VectorStore] ✓ 初始化完成,加载了 ${promptsData.length} 条数据`);
|
||||
console.error('='.repeat(50));
|
||||
}
|
||||
|
||||
/**
|
||||
* 语义搜索
|
||||
*/
|
||||
async search(query: string): Promise<SearchResult | null> {
|
||||
if (!this.initialized || !this.embeddingProvider) {
|
||||
throw new Error('VectorStore 尚未初始化');
|
||||
}
|
||||
|
||||
console.error(`[VectorStore] 搜索: "${query}"`);
|
||||
|
||||
// 生成查询向量
|
||||
const queryEmbedding = await this.embeddingProvider.embed(query);
|
||||
const queryLower = query.toLowerCase();
|
||||
|
||||
// 混合打分
|
||||
const results: SearchResult[] = this.prompts.map((prompt) => {
|
||||
// A. 向量相似度
|
||||
const vectorScore = this.cosineSimilarity(queryEmbedding, prompt.embedding);
|
||||
|
||||
// B. 关键词加分
|
||||
let boostScore = 0;
|
||||
if (prompt.tags && prompt.tags.length > 0) {
|
||||
for (const tag of prompt.tags) {
|
||||
if (queryLower.includes(tag.toLowerCase())) {
|
||||
boostScore = KEYWORD_BOOST_SCORE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 最终得分
|
||||
const finalScore = Math.min(vectorScore + boostScore, 1.0);
|
||||
|
||||
return {
|
||||
prompt: {
|
||||
id: prompt.id,
|
||||
tags: prompt.tags,
|
||||
description: prompt.description,
|
||||
content: prompt.content,
|
||||
},
|
||||
similarity: finalScore,
|
||||
};
|
||||
});
|
||||
|
||||
// 排序
|
||||
results.sort((a, b) => b.similarity - a.similarity);
|
||||
const bestMatch = results[0];
|
||||
|
||||
// 调试输出
|
||||
console.error(`--- Top 3 候选 ---`);
|
||||
results.slice(0, 3).forEach((r, i) => {
|
||||
console.error(`${i + 1}. [${r.prompt.id}] 得分: ${r.similarity.toFixed(4)}`);
|
||||
});
|
||||
|
||||
// 阈值检查
|
||||
if (bestMatch.similarity < SIMILARITY_THRESHOLD) {
|
||||
console.error(
|
||||
`[VectorStore] ✗ 最高分 ${bestMatch.similarity.toFixed(4)} 低于阈值 ${SIMILARITY_THRESHOLD}`
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
console.error(
|
||||
`[VectorStore] ✓ 选中: ${bestMatch.prompt.id} (得分: ${bestMatch.similarity.toFixed(4)})`
|
||||
);
|
||||
return bestMatch;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取所有可用的 Prompt ID
|
||||
*/
|
||||
getAvailablePrompts(): string[] {
|
||||
return this.prompts.map((p) => p.id);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取当前使用的 Embedding 提供者信息
|
||||
*/
|
||||
getProviderInfo(): { provider: string; initialized: boolean } {
|
||||
return {
|
||||
provider: this.config.provider,
|
||||
initialized: this.initialized,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// 导出单例(使用环境变量配置)
|
||||
export const vectorStore = new VectorStore();
|
||||
184
src/test.ts
Normal file
184
src/test.ts
Normal file
@@ -0,0 +1,184 @@
|
||||
/**
|
||||
* 测试脚本:验证向量存储和搜索功能
|
||||
* 包含多场景测试用例
|
||||
*/
|
||||
|
||||
// 加载 .env 文件
|
||||
import 'dotenv/config';
|
||||
|
||||
import { vectorStore } from './lib/vectorStore.js';
|
||||
|
||||
// 定义测试用例结构
|
||||
interface TestCase {
|
||||
query: string;
|
||||
expectedId: string | null; // null 表示期望无匹配
|
||||
category: string;
|
||||
}
|
||||
|
||||
// 测试用例集
|
||||
const testCases: TestCase[] = [
|
||||
// ==================== 编程语言类 ====================
|
||||
{ query: '如何用 Python 写一个爬虫?', expectedId: 'python-expert', category: '编程-Python' },
|
||||
{ query: '帮我用 FastAPI 搭建一个后端服务', expectedId: 'python-expert', category: '编程-Python' },
|
||||
{ query: 'Django ORM 怎么做多表关联查询?', expectedId: 'python-expert', category: '编程-Python' },
|
||||
|
||||
{ query: '如何用 Java 实现单例模式?', expectedId: 'java-expert', category: '编程-Java' },
|
||||
{ query: 'Spring Boot 项目如何配置多数据源?', expectedId: 'java-expert', category: '编程-Java' },
|
||||
{ query: 'JVM 垃圾回收机制是怎样的?', expectedId: 'java-expert', category: '编程-Java' },
|
||||
|
||||
{ query: '用 Go 写一个高并发的 HTTP 服务', expectedId: 'golang-expert', category: '编程-Go' },
|
||||
{ query: 'Golang 的 channel 和 goroutine 怎么用?', expectedId: 'golang-expert', category: '编程-Go' },
|
||||
{ query: '如何用 gRPC 实现微服务通信?', expectedId: 'golang-expert', category: '编程-Go' },
|
||||
|
||||
{ query: 'React Hooks 的最佳实践是什么?', expectedId: 'javascript-expert', category: '编程-JS/TS' },
|
||||
{ query: 'TypeScript 泛型怎么用?', expectedId: 'javascript-expert', category: '编程-JS/TS' },
|
||||
{ query: 'Vue 3 的 Composition API 和 Options API 有什么区别?', expectedId: 'javascript-expert', category: '编程-JS/TS' },
|
||||
{ query: 'Node.js 如何处理大文件上传?', expectedId: 'javascript-expert', category: '编程-JS/TS' },
|
||||
|
||||
{ query: 'Rust 的所有权机制是什么?', expectedId: 'rust-expert', category: '编程-Rust' },
|
||||
{ query: '如何用 Rust 编写 WebAssembly 模块?', expectedId: 'rust-expert', category: '编程-Rust' },
|
||||
|
||||
{ query: 'C++ 智能指针有哪几种?', expectedId: 'cpp-expert', category: '编程-C++' },
|
||||
{ query: '如何优化 C++ 程序的内存使用?', expectedId: 'cpp-expert', category: '编程-C++' },
|
||||
{ query: 'Unreal Engine 游戏开发入门', expectedId: 'cpp-expert', category: '编程-C++' },
|
||||
|
||||
// ==================== DevOps/运维类 ====================
|
||||
{ query: 'Docker 镜像怎么优化体积?', expectedId: 'devops-expert', category: 'DevOps' },
|
||||
{ query: 'Kubernetes 如何实现滚动更新?', expectedId: 'devops-expert', category: 'DevOps' },
|
||||
{ query: 'GitHub Actions 如何配置 CI/CD?', expectedId: 'devops-expert', category: 'DevOps' },
|
||||
{ query: 'Terraform 怎么管理云资源?', expectedId: 'devops-expert', category: 'DevOps' },
|
||||
|
||||
// ==================== 数据库类 ====================
|
||||
{ query: 'MySQL 索引优化有哪些技巧?', expectedId: 'database-expert', category: '数据库' },
|
||||
{ query: 'Redis 缓存穿透怎么解决?', expectedId: 'database-expert', category: '数据库' },
|
||||
{ query: 'MongoDB 如何设计 Schema?', expectedId: 'database-expert', category: '数据库' },
|
||||
{ query: '如何实现数据库读写分离?', expectedId: 'database-expert', category: '数据库' },
|
||||
|
||||
// ==================== 安全类 ====================
|
||||
{ query: 'SQL 注入攻击如何防范?', expectedId: 'security-expert', category: '安全' },
|
||||
{ query: 'HTTPS 证书怎么配置?', expectedId: 'security-expert', category: '安全' },
|
||||
{ query: 'JWT Token 安全最佳实践', expectedId: 'security-expert', category: '安全' },
|
||||
|
||||
// ==================== AI/ML 类 ====================
|
||||
{ query: '如何用 PyTorch 训练一个图像分类模型?', expectedId: 'ai-ml-expert', category: 'AI/ML' },
|
||||
{ query: '大语言模型微调有哪些方法?', expectedId: 'ai-ml-expert', category: 'AI/ML' },
|
||||
{ query: 'RAG 系统怎么搭建?', expectedId: 'ai-ml-expert', category: 'AI/ML' },
|
||||
{ query: 'YOLO 目标检测怎么用?', expectedId: 'ai-ml-expert', category: 'AI/ML' },
|
||||
|
||||
// ==================== 前端 UI 类 ====================
|
||||
{ query: 'Tailwind CSS 怎么自定义主题?', expectedId: 'frontend-ui-expert', category: '前端UI' },
|
||||
{ query: '如何实现响应式布局?', expectedId: 'frontend-ui-expert', category: '前端UI' },
|
||||
{ query: '网站无障碍设计要注意什么?', expectedId: 'frontend-ui-expert', category: '前端UI' },
|
||||
|
||||
// ==================== 移动端类 ====================
|
||||
{ query: 'SwiftUI 和 UIKit 哪个更好?', expectedId: 'mobile-expert', category: '移动端' },
|
||||
{ query: 'Flutter 性能优化技巧', expectedId: 'mobile-expert', category: '移动端' },
|
||||
{ query: 'React Native 如何调用原生模块?', expectedId: 'mobile-expert', category: '移动端' },
|
||||
{ query: 'Android Kotlin 协程怎么用?', expectedId: 'mobile-expert', category: '移动端' },
|
||||
|
||||
// ==================== 非技术类 ====================
|
||||
{ query: '帮我写一份劳动合同', expectedId: 'legal-advisor', category: '法律' },
|
||||
{ query: '商标注册流程是什么?', expectedId: 'legal-advisor', category: '法律' },
|
||||
{ query: '公司股权架构怎么设计?', expectedId: 'legal-advisor', category: '法律' },
|
||||
|
||||
{ query: '帮我写一个广告文案', expectedId: 'creative-writer', category: '写作' },
|
||||
{ query: '如何写一个吸引人的故事开头?', expectedId: 'creative-writer', category: '写作' },
|
||||
{ query: '品牌故事怎么写才能打动人?', expectedId: 'creative-writer', category: '写作' },
|
||||
|
||||
{ query: '如何做用户调研?', expectedId: 'product-manager', category: '产品' },
|
||||
{ query: 'PRD 文档怎么写?', expectedId: 'product-manager', category: '产品' },
|
||||
{ query: 'Scrum 和 Kanban 有什么区别?', expectedId: 'product-manager', category: '产品' },
|
||||
|
||||
{ query: '用户留存率怎么分析?', expectedId: 'data-analyst', category: '数据分析' },
|
||||
{ query: 'SQL 窗口函数怎么用?', expectedId: 'data-analyst', category: '数据分析' },
|
||||
{ query: 'Power BI 仪表盘怎么设计?', expectedId: 'data-analyst', category: '数据分析' },
|
||||
|
||||
{ query: '雅思写作怎么拿高分?', expectedId: 'english-teacher', category: '英语' },
|
||||
{ query: '英语语法中的虚拟语气怎么用?', expectedId: 'english-teacher', category: '英语' },
|
||||
{ query: '托福口语怎么准备?', expectedId: 'english-teacher', category: '英语' },
|
||||
|
||||
{ query: '制定一个增肌训练计划', expectedId: 'fitness-coach', category: '健身' },
|
||||
{ query: '减脂期间怎么吃?', expectedId: 'fitness-coach', category: '健身' },
|
||||
{ query: '深蹲的标准动作是什么?', expectedId: 'fitness-coach', category: '健身' },
|
||||
|
||||
{ query: '基金定投策略有哪些?', expectedId: 'finance-advisor', category: '理财' },
|
||||
{ query: '个人所得税怎么优化?', expectedId: 'finance-advisor', category: '理财' },
|
||||
{ query: '如何分析一只股票?', expectedId: 'finance-advisor', category: '理财' },
|
||||
|
||||
{ query: '最近工作压力很大怎么办?', expectedId: 'psychologist', category: '心理' },
|
||||
{ query: '如何克服社交焦虑?', expectedId: 'psychologist', category: '心理' },
|
||||
{ query: '和同事关系紧张怎么处理?', expectedId: 'psychologist', category: '心理' },
|
||||
|
||||
// ==================== 无匹配/边界测试 ====================
|
||||
{ query: '今天天气怎么样?', expectedId: null, category: '无匹配' },
|
||||
{ query: '推荐一部好看的电影', expectedId: null, category: '无匹配' },
|
||||
{ query: '附近有什么好吃的?', expectedId: null, category: '无匹配' },
|
||||
];
|
||||
|
||||
async function runTests() {
|
||||
console.log('='.repeat(60));
|
||||
console.log('MCP RAG Prompts - 综合测试');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
// 初始化
|
||||
await vectorStore.initialize();
|
||||
|
||||
const providerInfo = vectorStore.getProviderInfo();
|
||||
console.log(`\n当前 Embedding 提供者: ${providerInfo.provider}`);
|
||||
console.log(`总测试用例数: ${testCases.length}\n`);
|
||||
|
||||
// 统计
|
||||
let passed = 0;
|
||||
let failed = 0;
|
||||
const failedCases: { query: string; expected: string | null; actual: string | null }[] = [];
|
||||
|
||||
// 按类别分组显示
|
||||
let currentCategory = '';
|
||||
|
||||
for (const testCase of testCases) {
|
||||
// 打印类别标题
|
||||
if (testCase.category !== currentCategory) {
|
||||
currentCategory = testCase.category;
|
||||
console.log(`\n--- ${currentCategory} ---`);
|
||||
}
|
||||
|
||||
const result = await vectorStore.search(testCase.query);
|
||||
const actualId = result?.prompt.id ?? null;
|
||||
const isPass = actualId === testCase.expectedId;
|
||||
|
||||
if (isPass) {
|
||||
passed++;
|
||||
console.log(`✓ "${testCase.query.slice(0, 30)}..." → ${actualId ?? '(无匹配)'}`);
|
||||
} else {
|
||||
failed++;
|
||||
failedCases.push({
|
||||
query: testCase.query,
|
||||
expected: testCase.expectedId,
|
||||
actual: actualId,
|
||||
});
|
||||
console.log(`✗ "${testCase.query.slice(0, 30)}..."`);
|
||||
console.log(` 期望: ${testCase.expectedId ?? '(无匹配)'}, 实际: ${actualId ?? '(无匹配)'}`);
|
||||
}
|
||||
}
|
||||
|
||||
// 打印统计结果
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('测试结果统计');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`总计: ${testCases.length} 个用例`);
|
||||
console.log(`通过: ${passed} (${((passed / testCases.length) * 100).toFixed(1)}%)`);
|
||||
console.log(`失败: ${failed} (${((failed / testCases.length) * 100).toFixed(1)}%)`);
|
||||
|
||||
if (failedCases.length > 0) {
|
||||
console.log('\n失败用例详情:');
|
||||
failedCases.forEach((fc, i) => {
|
||||
console.log(`${i + 1}. "${fc.query}"`);
|
||||
console.log(` 期望: ${fc.expected ?? '(无匹配)'}`);
|
||||
console.log(` 实际: ${fc.actual ?? '(无匹配)'}`);
|
||||
});
|
||||
}
|
||||
|
||||
console.log('\n测试完成!');
|
||||
}
|
||||
|
||||
runTests().catch(console.error);
|
||||
129
src/tools/searchPersona.ts
Normal file
129
src/tools/searchPersona.ts
Normal file
@@ -0,0 +1,129 @@
|
||||
/**
|
||||
* MCP Tool: search_expert_persona
|
||||
* 根据用户问题语义搜索最匹配的专家角色设定
|
||||
*/
|
||||
|
||||
import { z } from 'zod';
|
||||
import { vectorStore } from '../lib/vectorStore.js';
|
||||
|
||||
/**
|
||||
* 工具名称
|
||||
*/
|
||||
export const TOOL_NAME = 'search_expert_persona';
|
||||
|
||||
/**
|
||||
* 工具描述
|
||||
*/
|
||||
export const TOOL_DESCRIPTION =
|
||||
'根据用户的具体问题或场景,利用语义搜索找到最匹配的专家角色设定 (System Prompt)。' +
|
||||
'输入用户的问题或需求描述,返回最适合处理该问题的专家角色 Prompt。';
|
||||
|
||||
/**
|
||||
* 参数 Schema(使用 Zod 定义)
|
||||
*/
|
||||
export const inputSchema = z.object({
|
||||
query: z
|
||||
.string()
|
||||
.min(1, '查询内容不能为空')
|
||||
.describe('用户的原始问题或需求描述,例如:"如何用 Python 实现一个 REST API?"'),
|
||||
});
|
||||
|
||||
/**
|
||||
* 参数类型
|
||||
*/
|
||||
export type SearchPersonaInput = z.infer<typeof inputSchema>;
|
||||
|
||||
/**
|
||||
* 将 Zod Schema 转换为 JSON Schema(供 MCP SDK 使用)
|
||||
*/
|
||||
export const inputJsonSchema = {
|
||||
type: 'object' as const,
|
||||
properties: {
|
||||
query: {
|
||||
type: 'string',
|
||||
description: '用户的原始问题或需求描述,例如:"如何用 Python 实现一个 REST API?"',
|
||||
},
|
||||
},
|
||||
required: ['query'],
|
||||
};
|
||||
|
||||
/**
|
||||
* 执行搜索的处理函数
|
||||
* @param input 经过验证的输入参数
|
||||
* @returns MCP 工具响应内容
|
||||
*/
|
||||
export async function handleSearchPersona(input: SearchPersonaInput): Promise<{
|
||||
content: Array<{ type: 'text'; text: string }>;
|
||||
isError?: boolean;
|
||||
}> {
|
||||
try {
|
||||
// 验证输入
|
||||
const validatedInput = inputSchema.parse(input);
|
||||
|
||||
// 执行语义搜索
|
||||
const result = await vectorStore.search(validatedInput.query);
|
||||
|
||||
if (!result) {
|
||||
// 没有找到匹配结果
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: JSON.stringify(
|
||||
{
|
||||
success: false,
|
||||
message: '未找到与您问题匹配的专家角色。请尝试更具体地描述您的需求。',
|
||||
availableExperts: vectorStore.getAvailablePrompts(),
|
||||
},
|
||||
null,
|
||||
2
|
||||
),
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
// 返回匹配结果
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: JSON.stringify(
|
||||
{
|
||||
success: true,
|
||||
matchedExpert: {
|
||||
id: result.prompt.id,
|
||||
tags: result.prompt.tags,
|
||||
description: result.prompt.description,
|
||||
similarity: Math.round(result.similarity * 100) / 100,
|
||||
},
|
||||
systemPrompt: result.prompt.content,
|
||||
},
|
||||
null,
|
||||
2
|
||||
),
|
||||
},
|
||||
],
|
||||
};
|
||||
} catch (error) {
|
||||
// 处理错误
|
||||
const errorMessage = error instanceof Error ? error.message : '未知错误';
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: JSON.stringify(
|
||||
{
|
||||
success: false,
|
||||
error: errorMessage,
|
||||
},
|
||||
null,
|
||||
2
|
||||
),
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user