From 596ddc849c7ab5ae302d4fed85723661e20fe3f3 Mon Sep 17 00:00:00 2001 From: evil7 Date: Tue, 4 Mar 2025 13:09:40 +0800 Subject: [PATCH] init --- .gitignore | 36 +++++ README.md | 199 +++++++++++++++++++++++++ dev/OpenAI-API_payload.md | 140 +++++++++++++++++ dev/cf-WorkerAI_example.js | 133 +++++++++++++++++ dev/cf-WorkerAI_schema.json | 290 ++++++++++++++++++++++++++++++++++++ package.json | 16 ++ src/auth.js | 7 + src/config.js | 13 ++ src/cors.js | 8 + src/errors.js | 32 ++++ src/formatters.js | 90 +++++++++++ src/headers.js | 18 +++ src/stream.js | 38 +++++ src/worker.js | 128 ++++++++++++++++ wrangler.toml | 14 ++ 15 files changed, 1162 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 dev/OpenAI-API_payload.md create mode 100644 dev/cf-WorkerAI_example.js create mode 100644 dev/cf-WorkerAI_schema.json create mode 100644 package.json create mode 100644 src/auth.js create mode 100644 src/config.js create mode 100644 src/cors.js create mode 100644 src/errors.js create mode 100644 src/formatters.js create mode 100644 src/headers.js create mode 100644 src/stream.js create mode 100644 src/worker.js create mode 100644 wrangler.toml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..261adc3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,36 @@ +# Node.js +node_modules/ +npm-debug.log* +yarn-debug.log* +yarn-error.log* +package-lock.json +yarn.lock + +# Cloudflare Workers +.dev.vars +.wrangler/ +dist/ + +# IDE & Editor +.idea/ +.vscode/ +*.swp +*.swo +.DS_Store + +# 测试和临时文件 +test/ +*.test.js +*.spec.js +coverage/ +.env +*.log +*.tmp +temp/ +tmp/ + +# 其他 +.env.local +.env.development.local +.env.test.local +.env.production.local diff --git a/README.md b/README.md new file mode 100644 index 0000000..9401631 --- /dev/null +++ b/README.md @@ -0,0 +1,199 @@ +# Cloudflare Worker AI Endpoint + +一个基于 Cloudflare Worker AI 的模型的 OpenAI API 兼容接口实现 + +支持多模型调用、多 API 密钥、流式输出等特性 + +## ✨ 特性 + +- 🔄 动态获取最新的 Cloudflare AI 模型列表 +- 🔑 支持多个 API 密钥配置,避免他人滥用 +- 🎯 支持多个 AI 模型配置和调用 +- 🌊 支持流式输出 (SSE) +- ✅ 完整的参数验证 +- 🌐 默认启用 CORS +- 📝 详细的错误提示 + +## 🚀 快速开始 + +### 安装 + +```bash +# 克隆项目 +git clone https://github.com/yourusername/cf-ai-endpoint.git +cd cf-ai-endpoint + +# 安装依赖 +npm install +``` + +### 配置 + +1. 设置 API 密钥 (支持多个,以逗号分隔): + +```bash +# E.g.: 生成单个API密钥并配置 +openssl rand -base64 32 | tr -d '/+' | cut -c1-32 | npx wrangler secret put API_KEY +``` + +2. 配置允许使用的模型列表(wrangler.toml): + +```bash +# E.g.: 允许如下3个模型被调用 +[vars] +MODELS = "@cf/meta/llama-2-7b-chat-int8,@cf/meta/llama-2-7b-chat-fp16,@cf/mistral/mistral-7b-instruct-v0.1" +``` + +同样可以手动在 Cloudflare 后台配置对应的 ENV。 + +> [!WARNING] +> 请在后台使用 **Secret** 格式配置 `API_KEY` 设定访问接口的 API 密钥,并确保 API 存放在安全的地方。 + +### 部署 + +```bash +npm run deploy +# 或者 +npx wrangler publish +``` + +## 📖 API 参考 + +### 1. 获取可用模型列表 + +```http +GET /v1/models +Authorization: Bearer +``` + +响应示例: + +```json +{ + "object": "list", + "data": [ + { + "id": "@cf/meta/llama-2-7b-chat-int8", + "object": "model", + "created": 1708661717835, + "owned_by": "cloudflare", + "permission": [], + "root": "@cf/meta/llama-2-7b-chat-int8", + "parent": null, + "metadata": { + "description": "Quantized (int8) generative text model...", + "task": "Text Generation", + "context_window": "8192" + } + } + ] +} +``` + +### 2. 文本补全 + +```http +POST /v1/completions +Authorization: Bearer +Content-Type: application/json + +{ + "model": "@cf/meta/llama-2-7b-chat-int8", + "prompt": "你好", + "stream": true +} +``` + +### 3. 对话补全 + +```http +POST /v1/chat/completions +Authorization: Bearer +Content-Type: application/json + +{ + "model": "@cf/meta/llama-2-7b-chat-int8", + "messages": [ + {"role": "user", "content": "你好"} + ], + "stream": true +} +``` + +## 👀 支持的参数 + +| 参数 | 类型 | 默认值 | 范围 | 说明 | +| ------------------ | ------- | ------ | ------------ | ----------------- | +| model | string | - | - | 必选,模型 ID | +| stream | boolean | false | - | 是否使用流式响应 | +| max_tokens | integer | 256 | ≥1 | 最大生成 token 数 | +| temperature | number | 0.6 | 0-5 | 采样温度 | +| top_p | number | - | 0-2 | 核采样概率 | +| top_k | integer | - | 1-50 | 核采样数量 | +| frequency_penalty | number | - | 0-2 | 频率惩罚 | +| presence_penalty | number | - | 0-2 | 存在惩罚 | +| repetition_penalty | number | - | 0-2 | 重复惩罚 | +| seed | integer | - | 1-9999999999 | 随机种子 | + +## 💻 调用示例 + +### Node.js (使用 OpenAI SDK) + +```javascript +import OpenAI from "openai"; + +const openai = new OpenAI({ + baseURL: "https://your-worker.workers.dev/v1", + apiKey: "", +}); + +// 流式响应 +const stream = await openai.chat.completions.create({ + model: "@cf/meta/llama-2-7b-chat-int8", + messages: [{ role: "user", content: "你好" }], + stream: true, +}); + +for await (const chunk of stream) { + process.stdout.write(chunk.choices[0]?.delta?.content || ""); +} +``` + +### fetch API + +```javascript +const response = await fetch("https://your-worker.workers.dev/v1/chat/completions", { + method: "POST", + headers: { + Authorization: "Bearer ", + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: "@cf/meta/llama-2-7b-chat-int8", + messages: [{ role: "user", content: "你好" }], + stream: true, + }), +}); + +// 处理流式响应 +const reader = response.body.getReader(); +while (true) { + const { value, done } = await reader.read(); + if (done) break; + console.log(new TextDecoder().decode(value)); +} +``` + +## 📝 注意事项 + +> [!NOTE] +> +> 1. 由于使用了 Cloudflare AI API 获取模型列表,首次请求可能会稍慢 +> 2. 建议在生产环境中设置更严格的 CORS 策略 +> 3. API 密钥支持多个,便于权限管理和轮换 +> 4. 模型配置支持动态过滤,可随时调整可用模型列表 +> 5. 内容长度限制为 131072 字符 + +## 📄 License + +MIT diff --git a/dev/OpenAI-API_payload.md b/dev/OpenAI-API_payload.md new file mode 100644 index 0000000..ba6b518 --- /dev/null +++ b/dev/OpenAI-API_payload.md @@ -0,0 +1,140 @@ +## REQUEST (stream) + +- POST: /v1/completions + +```json +{"model":"qwen2.5:3b","prompt":"你好啊","temperature":0.7,"stream":true} +``` + +- POST: /v1/chat/completions + +```json +{"model":"qwen2.5:3b","messages":[{"role":"user","content":"你好"}],"temperature":0.7,"stream":true} +``` + +## RESPONSE (stream) + +- Event Stream res for /v1/completions + +```text +data: {"id":"cmpl-591","object":"text_completion","created":1740898011,"choices":[{"text":"你好","index":0,"finish_reason":null}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"} + +data: {"id":"cmpl-591","object":"text_completion","created":1740898011,"choices":[{"text":"!","index":0,"finish_reason":null}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"} + +data: {"id":"cmpl-591","object":"text_completion","created":1740898011,"choices":[{"text":"很高兴","index":0,"finish_reason":null}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"} + +data: {"id":"cmpl-591","object":"text_completion","created":1740898011,"choices":[{"text":"能","index":0,"finish_reason":null}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"} + +data: {"id":"cmpl-591","object":"text_completion","created":1740898011,"choices":[{"text":"帮助","index":0,"finish_reason":null}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"} + +data: {"id":"cmpl-591","object":"text_completion","created":1740898011,"choices":[{"text":"你","index":0,"finish_reason":null}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"} + +data: {"id":"cmpl-591","object":"text_completion","created":1740898011,"choices":[{"text":"。","index":0,"finish_reason":null}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"} + +data: {"id":"cmpl-591","object":"text_completion","created":1740898011,"choices":[{"text":"有什么","index":0,"finish_reason":null}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"} + +data: {"id":"cmpl-591","object":"text_completion","created":1740898011,"choices":[{"text":"我可以","index":0,"finish_reason":null}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"} + +data: {"id":"cmpl-591","object":"text_completion","created":1740898011,"choices":[{"text":"做的","index":0,"finish_reason":null}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"} + +data: {"id":"cmpl-591","object":"text_completion","created":1740898011,"choices":[{"text":"吗","index":0,"finish_reason":null}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"} + +data: {"id":"cmpl-591","object":"text_completion","created":1740898011,"choices":[{"text":"?","index":0,"finish_reason":null}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"} + +data: {"id":"cmpl-591","object":"text_completion","created":1740898012,"choices":[{"text":"","index":0,"finish_reason":"stop"}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"} + +data: [DONE] + +``` + +- Event Stream res for /v1/chat/completions + +```text +data: {"id":"chatcmpl-562","object":"chat.completion.chunk","created":1740898282,"model":"qwen2.5:3b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"你好"},"finish_reason":null}]} + +data: {"id":"chatcmpl-562","object":"chat.completion.chunk","created":1740898283,"model":"qwen2.5:3b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"!"},"finish_reason":null}]} + +data: {"id":"chatcmpl-562","object":"chat.completion.chunk","created":1740898283,"model":"qwen2.5:3b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"有什么"},"finish_reason":null}]} + +data: {"id":"chatcmpl-562","object":"chat.completion.chunk","created":1740898283,"model":"qwen2.5:3b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"我可以"},"finish_reason":null}]} + +data: {"id":"chatcmpl-562","object":"chat.completion.chunk","created":1740898283,"model":"qwen2.5:3b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"帮助"},"finish_reason":null}]} + +data: {"id":"chatcmpl-562","object":"chat.completion.chunk","created":1740898283,"model":"qwen2.5:3b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"你的"},"finish_reason":null}]} + +data: {"id":"chatcmpl-562","object":"chat.completion.chunk","created":1740898283,"model":"qwen2.5:3b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"吗"},"finish_reason":null}]} + +data: {"id":"chatcmpl-562","object":"chat.completion.chunk","created":1740898283,"model":"qwen2.5:3b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"finish_reason":null}]} + +data: {"id":"chatcmpl-562","object":"chat.completion.chunk","created":1740898283,"model":"qwen2.5:3b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":"stop"}]} + +data: [DONE] + +``` + +## REQUEST (normal) + +- POST: /v1/completions + +```json +{"model":"qwen2.5:3b","prompt":"你好啊","temperature":0.7} +``` + +- POST: /v1/chat/completions + +```json +{"model":"qwen2.5:3b","messages":[{"role":"user","content":"你好"}],"temperature":0.7} +``` + +## RESPONSE (normal) + +- res for /v1/completions + +```json +{ + "id": "cmpl-765", + "object": "text_completion", + "created": 1740898347, + "model": "qwen2.5:3b", + "system_fingerprint": "fp_ollama", + "choices": [ + { + "text": "你好!很高兴为你服务。有什么我可以帮助你的吗?", + "index": 0, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 31, + "completion_tokens": 13, + "total_tokens": 44 + } +} +``` + +- res for /v1/chat/completions + +```json +{ + "id": "chatcmpl-323", + "object": "chat.completion", + "created": 1740898382, + "model": "qwen2.5:3b", + "system_fingerprint": "fp_ollama", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "你好!有什么我可以帮助你的吗?" + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 30, + "completion_tokens": 9, + "total_tokens": 39 + } +} +``` \ No newline at end of file diff --git a/dev/cf-WorkerAI_example.js b/dev/cf-WorkerAI_example.js new file mode 100644 index 0000000..2788690 --- /dev/null +++ b/dev/cf-WorkerAI_example.js @@ -0,0 +1,133 @@ +export default { + async fetch(request, env) { + if (request.method === "POST" && request.url.includes("v1/chat/completions")) { + // todo 验证 key + + try { + const requestData = await request.json(); + + const { model, messages } = requestData; + + let isSteam = false; + if (requestData?.stream) { + isSteam = true; + } + + // get the current time in epoch seconds + const created = Math.floor(Date.now() / 1000); + const uuid = crypto.randomUUID(); + + const stream = await env.AI.run("@cf/deepseek-ai/deepseek-r1-distill-qwen-32b", { + messages: messages, + stream: isSteam, + }); + + if (isSteam) { + // 创建一个转换流 (TransformStream) 以便实现流式返回 + const { readable, writable } = new TransformStream(); + const writer = writable.getWriter(); + const reader = stream.getReader(); + + // 处理流读取数据并逐帧向客户端发送 + async function processStream() { + const decoder = new TextDecoder(); + const encoder = new TextEncoder(); + + try { + while (true) { + const { done, value } = await reader.read(); + if (done) { + break; + } + + // 确保 value 被转换为字符串类型 + const stringValue = decoder.decode(value); + + // 处理每行数据 + const lines = stringValue.split("\n").filter((line) => !!line); + for (const line of lines) { + // 跳过非 JSON 数据(例如 [DONE] ) + if (line.endsWith("data: [DONE]")) { + continue; + } + + try { + const json = JSON.parse(line.replace(/^data: /, "")); + + const formattedValue = `data: ${JSON.stringify({ + id: uuid, + created, + object: "chat.completion.chunk", + model, + choices: [ + { + delta: { content: json.response }, + index: 0, + finish_reason: null, + }, + ], + })}\n\n`; + const encodedValue = encoder.encode(formattedValue); + await writer.write(encodedValue); + } catch (e) { + console.error("Failed to parse JSON: ", e); + } + } + } + + // 发送完成标识 + await writer.write(encoder.encode("data: [DONE]\n\n")); + writer.close(); + } catch (err) { + console.error("Stream processing error: ", err); + // 确保 writer 在错误情况下关闭 + writer.close(); + } + } + + // 调用处理流函数 + processStream().catch((err) => { + console.error("Stream processing error: ", err); + }); + + return new Response(readable, { + headers: { + "content-type": "text/event-stream", + "Cache-Control": "no-cache", + Connection: "keep-alive", + }, + }); + } else { + return Response.json({ + id: uuid, + model, + created, + object: "chat.completion", + choices: [ + { + index: 0, + message: { + role: "assistant", + content: stream.response, + }, + finish_reason: "stop", + }, + ], + usage: { + prompt_tokens: 0, + completion_tokens: 0, + total_tokens: 0, + }, + }); + } + } catch (error) { + return new Response(JSON.stringify({ error: "Invalid request" }), { + status: 400, + headers: { "Content-Type": "application/json" }, + }); + } + } + + return new Response("Not Found", { status: 404 }); + }, +}; diff --git a/dev/cf-WorkerAI_schema.json b/dev/cf-WorkerAI_schema.json new file mode 100644 index 0000000..63ec4d2 --- /dev/null +++ b/dev/cf-WorkerAI_schema.json @@ -0,0 +1,290 @@ +{ + "type": "object", + "oneOf": [ + { + "title": "Prompt", + "properties": { + "prompt": { + "type": "string", + "minLength": 1, + "maxLength": 131072, + "description": "The input text prompt for the model to generate a response." + }, + "raw": { + "type": "boolean", + "default": false, + "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting." + }, + "stream": { + "type": "boolean", + "default": false, + "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events." + }, + "max_tokens": { + "type": "integer", + "default": 256, + "description": "The maximum number of tokens to generate in the response." + }, + "temperature": { + "type": "number", + "default": 0.6, + "minimum": 0, + "maximum": 5, + "description": "Controls the randomness of the output; higher values produce more random results." + }, + "top_p": { + "type": "number", + "minimum": 0, + "maximum": 2, + "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses." + }, + "top_k": { + "type": "integer", + "minimum": 1, + "maximum": 50, + "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises." + }, + "seed": { + "type": "integer", + "minimum": 1, + "maximum": 9999999999, + "description": "Random seed for reproducibility of the generation." + }, + "repetition_penalty": { + "type": "number", + "minimum": 0, + "maximum": 2, + "description": "Penalty for repeated tokens; higher values discourage repetition." + }, + "frequency_penalty": { + "type": "number", + "minimum": 0, + "maximum": 2, + "description": "Decreases the likelihood of the model repeating the same lines verbatim." + }, + "presence_penalty": { + "type": "number", + "minimum": 0, + "maximum": 2, + "description": "Increases the likelihood of the model introducing new topics." + }, + "lora": { + "type": "string", + "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model." + } + }, + "required": ["prompt"] + }, + { + "title": "Messages", + "properties": { + "messages": { + "type": "array", + "description": "An array of message objects representing the conversation history.", + "items": { + "type": "object", + "properties": { + "role": { + "type": "string", + "description": "The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool')." + }, + "content": { + "type": "string", + "maxLength": 131072, + "description": "The content of the message as a string." + } + }, + "required": ["role", "content"] + } + }, + "functions": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "code": { + "type": "string" + } + }, + "required": ["name", "code"] + } + }, + "tools": { + "type": "array", + "description": "A list of tools available for the assistant to use.", + "items": { + "type": "object", + "oneOf": [ + { + "properties": { + "name": { + "type": "string", + "description": "The name of the tool. More descriptive the better." + }, + "description": { + "type": "string", + "description": "A brief description of what the tool does." + }, + "parameters": { + "type": "object", + "description": "Schema defining the parameters accepted by the tool.", + "properties": { + "type": { + "type": "string", + "description": "The type of the parameters object (usually 'object')." + }, + "required": { + "type": "array", + "description": "List of required parameter names.", + "items": { + "type": "string" + } + }, + "properties": { + "type": "object", + "description": "Definitions of each parameter.", + "additionalProperties": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "The data type of the parameter." + }, + "description": { + "type": "string", + "description": "A description of the expected parameter." + } + }, + "required": ["type", "description"] + } + } + }, + "required": ["type", "properties"] + } + }, + "required": ["name", "description", "parameters"] + }, + { + "properties": { + "type": { + "type": "string", + "description": "Specifies the type of tool (e.g., 'function')." + }, + "function": { + "type": "object", + "description": "Details of the function tool.", + "properties": { + "name": { + "type": "string", + "description": "The name of the function." + }, + "description": { + "type": "string", + "description": "A brief description of what the function does." + }, + "parameters": { + "type": "object", + "description": "Schema defining the parameters accepted by the function.", + "properties": { + "type": { + "type": "string", + "description": "The type of the parameters object (usually 'object')." + }, + "required": { + "type": "array", + "description": "List of required parameter names.", + "items": { + "type": "string" + } + }, + "properties": { + "type": "object", + "description": "Definitions of each parameter.", + "additionalProperties": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "The data type of the parameter." + }, + "description": { + "type": "string", + "description": "A description of the expected parameter." + } + }, + "required": ["type", "description"] + } + } + }, + "required": ["type", "properties"] + } + }, + "required": ["name", "description", "parameters"] + } + }, + "required": ["type", "function"] + } + ] + } + }, + "stream": { + "type": "boolean", + "default": false, + "description": "If true, the response will be streamed back incrementally." + }, + "max_tokens": { + "type": "integer", + "default": 256, + "description": "The maximum number of tokens to generate in the response." + }, + "temperature": { + "type": "number", + "default": 0.6, + "minimum": 0, + "maximum": 5, + "description": "Controls the randomness of the output; higher values produce more random results." + }, + "top_p": { + "type": "number", + "minimum": 0, + "maximum": 2, + "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses." + }, + "top_k": { + "type": "integer", + "minimum": 1, + "maximum": 50, + "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises." + }, + "seed": { + "type": "integer", + "minimum": 1, + "maximum": 9999999999, + "description": "Random seed for reproducibility of the generation." + }, + "repetition_penalty": { + "type": "number", + "minimum": 0, + "maximum": 2, + "description": "Penalty for repeated tokens; higher values discourage repetition." + }, + "frequency_penalty": { + "type": "number", + "minimum": 0, + "maximum": 2, + "description": "Decreases the likelihood of the model repeating the same lines verbatim." + }, + "presence_penalty": { + "type": "number", + "minimum": 0, + "maximum": 2, + "description": "Increases the likelihood of the model introducing new topics." + } + }, + "required": ["messages"] + } + ] +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..69b0013 --- /dev/null +++ b/package.json @@ -0,0 +1,16 @@ +{ + "name": "cf-ai-endpoint", + "version": "1.0.0", + "description": "Cloudflare AI endpoint with OpenAI API compatibility", + "main": "src/worker.js", + "type": "module", + "scripts": { + "dev": "npx wrangler dev", + "deploy": "npx wrangler deploy", + "minify": "esbuild src/worker.js --bundle --minify --format=esm --outfile=dist/worker.min.js" + }, + "devDependencies": { + "wrangler": "^3.0.0", + "esbuild": "^0.25.0" + } +} diff --git a/src/auth.js b/src/auth.js new file mode 100644 index 0000000..edf88f6 --- /dev/null +++ b/src/auth.js @@ -0,0 +1,7 @@ +import { ValidationError } from './errors'; + +export const validateApiKey = (apiKey, validKeys) => { + if (!apiKey || !validKeys.has(apiKey.replace("Bearer ", ""))) { + throw new ValidationError("Invalid API key", "invalid_api_key"); + } +}; diff --git a/src/config.js b/src/config.js new file mode 100644 index 0000000..29c77ea --- /dev/null +++ b/src/config.js @@ -0,0 +1,13 @@ +export const CONFIG = { + MAX_CONTENT_LENGTH: 131072, + PARAM_RANGES: { + max_tokens: { min: 1, type: 'integer', default: 256 }, + temperature: { min: 0, max: 5, default: 0.6 }, + top_p: { min: 0, max: 2 }, + top_k: { min: 1, max: 50, type: 'integer' }, + frequency_penalty: { min: 0, max: 2 }, + presence_penalty: { min: 0, max: 2 }, + repetition_penalty: { min: 0, max: 2 }, + seed: { min: 1, max: 9999999999, type: 'integer' } + } +}; diff --git a/src/cors.js b/src/cors.js new file mode 100644 index 0000000..46bed26 --- /dev/null +++ b/src/cors.js @@ -0,0 +1,8 @@ +import { CORS_HEADERS } from './headers'; + +export const handleCors = (request) => { + if (request.method === "OPTIONS") { + return new Response(null, { headers: CORS_HEADERS }); + } + return null; +}; diff --git a/src/errors.js b/src/errors.js new file mode 100644 index 0000000..b28a68d --- /dev/null +++ b/src/errors.js @@ -0,0 +1,32 @@ +import { JSON_HEADERS } from './headers'; + +export class ValidationError extends Error { + constructor(message, code, param) { + super(message); + this.code = code; + this.param = param; + this.type = 'invalid_request_error'; + } +} + +export const errorHandler = (error) => { + const status = error instanceof ValidationError ? 400 + : error.code === 'invalid_api_key' ? 401 + : 500; + + return Response.json({ + error: { + message: error.message, + type: error.type || 'api_error', + code: error.code || 'internal_error', + param: error.param, + metadata: { + timestamp: new Date().toISOString(), + request_id: crypto.randomUUID() + } + } + }, { + status, + headers: JSON_HEADERS + }); +}; diff --git a/src/formatters.js b/src/formatters.js new file mode 100644 index 0000000..ee07d5e --- /dev/null +++ b/src/formatters.js @@ -0,0 +1,90 @@ +export const formatters = { + toCloudflare: (params, type) => { + // 从参数中提取所有可能的字段 + const { + stream = false, + max_tokens, + temperature, + top_p, + top_k, + seed, + repetition_penalty, + frequency_penalty, + presence_penalty, + raw, + lora, + functions, + tools, + messages, + prompt + } = params; + + // 构建基础请求参数 + const requestParams = { + messages: type === 'prompt' + ? [{ role: "user", content: prompt }] + : messages, + stream + }; + + // 添加各种可选参数,只在有值时添加 + const optionalParams = { + max_tokens, + temperature, + top_p, + top_k, + seed, + repetition_penalty, + frequency_penalty, + presence_penalty, + raw, + lora + }; + + // 添加函数调用相关参数 + if (functions?.length) { + requestParams.functions = functions; + } + if (tools?.length) { + requestParams.tools = tools; + } + + // 过滤掉未定义的可选参数 + Object.entries(optionalParams).forEach(([key, value]) => { + if (value !== undefined) { + requestParams[key] = value; + } + }); + + return requestParams; + }, + + toOpenAI: (response, type, model, isStream = false) => { + const content = typeof response === 'string' ? response : response.response || ''; + const id = `${type === 'chat' ? 'chatcmpl' : 'cmpl'}-${crypto.randomUUID()}`; + const timestamp = Date.now(); + + const baseResponse = { + id, + created: timestamp, + model, + choices: [{ + index: 0, + finish_reason: isStream ? null : "stop" + }] + }; + + if (isStream) { + baseResponse.object = `${type}.completion.chunk`; + baseResponse.choices[0][type === 'chat' ? 'delta' : 'text'] = + type === 'chat' ? { role: 'assistant', content } : content; + } else { + baseResponse.object = `${type}.completion`; + baseResponse.choices[0][type === 'chat' ? 'message' : 'text'] = + type === 'chat' ? { role: 'assistant', content } : content; + baseResponse.usage = { prompt_tokens: -1, completion_tokens: -1, total_tokens: -1 }; + } + + return baseResponse; + } +}; diff --git a/src/headers.js b/src/headers.js new file mode 100644 index 0000000..3c00d87 --- /dev/null +++ b/src/headers.js @@ -0,0 +1,18 @@ +export const CORS_HEADERS = { + "Access-Control-Allow-Origin": "*", + "Access-Control-Allow-Methods": "POST, GET, OPTIONS", + "Access-Control-Allow-Headers": "Content-Type, Authorization", + "Access-Control-Max-Age": "86400", +}; + +export const STREAM_HEADERS = { + ...CORS_HEADERS, + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache", + "Connection": "keep-alive", +}; + +export const JSON_HEADERS = { + ...CORS_HEADERS, + "Content-Type": "application/json", +}; diff --git a/src/stream.js b/src/stream.js new file mode 100644 index 0000000..9f689f9 --- /dev/null +++ b/src/stream.js @@ -0,0 +1,38 @@ +import { formatters } from './formatters'; + +export const createStreamHandler = (type, model) => { + const encoder = new TextEncoder(); + const decoder = new TextDecoder(); + + return new TransformStream({ + transform(chunk, controller) { + const text = decoder.decode(chunk); + if (!text.trim()) return; + + const lines = text.split("\n").filter(line => line.trim()); + + for (const line of lines) { + if (line.includes('[DONE]')) { + const finalResponse = formatters.toOpenAI('', type, model, true); + finalResponse.choices[0].finish_reason = 'stop'; + controller.enqueue(encoder.encode( + `data: ${JSON.stringify(finalResponse)}\n\ndata: [DONE]\n\n` + )); + return; + } + + if (line.startsWith('data: ')) { + try { + const rawResponse = JSON.parse(line.slice(6)); + if (rawResponse?.response !== undefined) { + const formatted = formatters.toOpenAI(rawResponse.response, type, model, true); + controller.enqueue(encoder.encode(`data: ${JSON.stringify(formatted)}\n\n`)); + } + } catch (e) { + console.error('Stream parse error:', e, 'for line:', line); + } + } + } + } + }); +}; diff --git a/src/worker.js b/src/worker.js new file mode 100644 index 0000000..6f8fff9 --- /dev/null +++ b/src/worker.js @@ -0,0 +1,128 @@ +import { CONFIG } from './config'; +import { ValidationError, errorHandler } from './errors'; +import { formatters } from './formatters'; +import { createStreamHandler } from './stream'; +import { validateApiKey } from './auth'; +import { handleCors } from './cors'; +import { STREAM_HEADERS, JSON_HEADERS } from './headers'; + +// 验证并获取配置 +async function getConfig(env) { + if (!env.AI) throw new Error('AI binding not configured'); + if (!env.API_KEY) throw new Error('API_KEY not set'); + + let models = await env.AI.models(); + const apiKeys = new Set(env.API_KEY.split(',').map(k => k.trim())); + + if (env.MODELS) { + const allowedModels = new Set(env.MODELS.split(',').map(m => m.trim())); + models = models.filter(m => allowedModels.has(m.name)); + if (!models.length) { + throw new Error('No valid models configured. Check your MODELS environment variable.'); + } + } + + return { + apiKeys, + models + }; +} + +// 参数验证 +function validateParams(params, type) { + // 验证内容 + if (type === 'prompt' ? !params.prompt : !Array.isArray(params.messages)) { + throw new ValidationError( + `Invalid ${type}`, + `invalid_${type}`, + type + ); + } + + // 验证参数范围 + const validatedParams = { + model: params.model, + stream: params.stream || false, + [type]: params[type] + }; + + for (const [param, range] of Object.entries(CONFIG.PARAM_RANGES)) { + const value = params[param] ?? range.default; + if (value !== undefined) { + if ((range.type === 'integer' && !Number.isInteger(value)) || + value < range.min || (range.max && value > range.max)) { + throw new ValidationError( + `${param} must be ${range.type === 'integer' ? 'an integer ' : ''}between ${range.min} and ${range.max}`, + `invalid_${param}`, + param + ); + } + validatedParams[param] = value; + } + } + + return validatedParams; +} + +export default { + async fetch(request, env, ctx) { + try { + // CORS预检处理 + const corsResponse = handleCors(request); + if (corsResponse) return corsResponse; + + const url = new URL(request.url); + if (!url.pathname.startsWith('/v1/')) { + return new Response('Not Found', { status: 404 }); + } + + const config = await getConfig(env); + validateApiKey(request.headers.get("Authorization"), config.apiKeys); + + switch (url.pathname) { + case "/v1/models": + if (request.method !== "GET") { + throw new ValidationError("Method not allowed", "method_not_allowed"); + } + return Response.json({ + object: "list", + data: config.models + }, { + headers: JSON_HEADERS + }); + + case "/v1/completions": + case "/v1/chat/completions": { + if (request.method !== "POST") { + throw new ValidationError("Method not allowed", "method_not_allowed"); + } + + const isChat = url.pathname === "/v1/chat/completions"; + const type = isChat ? "chat" : "completion"; + const requestData = await request.json(); + const validatedParams = validateParams( + requestData, + isChat ? "messages" : "prompt" + ); + const cfParams = formatters.toCloudflare(validatedParams, type); + const response = await env.AI.run(validatedParams.model, cfParams); + + return validatedParams.stream + ? new Response( + response.pipeThrough(createStreamHandler(type, validatedParams.model)), + { headers: STREAM_HEADERS } + ) + : Response.json( + formatters.toOpenAI(response, type, validatedParams.model), + { headers: JSON_HEADERS } + ); + } + + default: + throw new ValidationError("Not Found", "not_found"); + } + } catch (error) { + return errorHandler(error); + } + } +}; diff --git a/wrangler.toml b/wrangler.toml new file mode 100644 index 0000000..b6655cc --- /dev/null +++ b/wrangler.toml @@ -0,0 +1,14 @@ +name = "ai" +main = "src/worker.js" +compatibility_date = "2025-01-01" + +[ai] +binding = "AI" + +[vars] +# 允许使用的模型列表,英文逗号分隔 +MODELS = "@cf/deepseek-ai/deepseek-r1-distill-qwen-32b,@hf/meta-llama/meta-llama-3-8b-instruct,@hf/google/gemma-7b-it" + +# 生产环境请使用以下命令设置多个 API_KEY (英文逗号分隔) +# wrangler secret put API_KEY +# 开发模式可以在同目录下放置 .dev.vars 临时文件配置API_KEY=test-key \ No newline at end of file