From 596ddc849c7ab5ae302d4fed85723661e20fe3f3 Mon Sep 17 00:00:00 2001
From: evil7 <bilibili@csu.edu.cn>
Date: Tue, 4 Mar 2025 13:09:40 +0800
Subject: [PATCH] init

---
 .gitignore                  |  36 +++++
 README.md                   | 199 +++++++++++++++++++++++++
 dev/OpenAI-API_payload.md   | 140 +++++++++++++++++
 dev/cf-WorkerAI_example.js  | 133 +++++++++++++++++
 dev/cf-WorkerAI_schema.json | 290 ++++++++++++++++++++++++++++++++++++
 package.json                |  16 ++
 src/auth.js                 |   7 +
 src/config.js               |  13 ++
 src/cors.js                 |   8 +
 src/errors.js               |  32 ++++
 src/formatters.js           |  90 +++++++++++
 src/headers.js              |  18 +++
 src/stream.js               |  38 +++++
 src/worker.js               | 128 ++++++++++++++++
 wrangler.toml               |  14 ++
 15 files changed, 1162 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 README.md
 create mode 100644 dev/OpenAI-API_payload.md
 create mode 100644 dev/cf-WorkerAI_example.js
 create mode 100644 dev/cf-WorkerAI_schema.json
 create mode 100644 package.json
 create mode 100644 src/auth.js
 create mode 100644 src/config.js
 create mode 100644 src/cors.js
 create mode 100644 src/errors.js
 create mode 100644 src/formatters.js
 create mode 100644 src/headers.js
 create mode 100644 src/stream.js
 create mode 100644 src/worker.js
 create mode 100644 wrangler.toml
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..261adc3
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,36 @@
+# Node.js
+node_modules/
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+package-lock.json
+yarn.lock
+
+# Cloudflare Workers
+.dev.vars
+.wrangler/
+dist/
+
+# IDE & Editor
+.idea/
+.vscode/
+*.swp
+*.swo
+.DS_Store
+
+# 测试和临时文件
+test/
+*.test.js
+*.spec.js
+coverage/
+.env
+*.log
+*.tmp
+temp/
+tmp/
+
+# 其他
+.env.local
+.env.development.local
+.env.test.local
+.env.production.local
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..9401631
--- /dev/null
+++ b/README.md
@@ -0,0 +1,199 @@
+# Cloudflare Worker AI Endpoint
+
+一个基于 Cloudflare Worker AI 的模型的 OpenAI API 兼容接口实现
+
+支持多模型调用、多 API 密钥、流式输出等特性
+
+## ✨ 特性
+
+- 🔄 动态获取最新的 Cloudflare AI 模型列表
+- 🔑 支持多个 API 密钥配置，避免他人滥用
+- 🎯 支持多个 AI 模型配置和调用
+- 🌊 支持流式输出 (SSE)
+- ✅ 完整的参数验证
+- 🌐 默认启用 CORS
+- 📝 详细的错误提示
+
+## 🚀 快速开始
+
+### 安装
+
+```bash
+# 克隆项目
+git clone https://github.com/yourusername/cf-ai-endpoint.git
+cd cf-ai-endpoint
+
+# 安装依赖
+npm install
+```
+
+### 配置
+
+1. 设置 API 密钥 (支持多个，以逗号分隔):
+
+```bash
+# E.g.: 生成单个API密钥并配置
+openssl rand -base64 32 | tr -d '/+' | cut -c1-32 | npx wrangler secret put API_KEY
+```
+
+2. 配置允许使用的模型列表(wrangler.toml):
+
+```bash
+# E.g.: 允许如下3个模型被调用
+[vars]
+MODELS = "@cf/meta/llama-2-7b-chat-int8,@cf/meta/llama-2-7b-chat-fp16,@cf/mistral/mistral-7b-instruct-v0.1"
+```
+
+同样可以手动在 Cloudflare 后台配置对应的 ENV。
+
+> [!WARNING]
+> 请在后台使用 **Secret** 格式配置 `API_KEY` 设定访问接口的 API 密钥，并确保 API 存放在安全的地方。
+
+### 部署
+
+```bash
+npm run deploy
+# 或者
+npx wrangler publish
+```
+
+## 📖 API 参考
+
+### 1. 获取可用模型列表
+
+```http
+GET /v1/models
+Authorization: Bearer <your-api-key>
+```
+
+响应示例:
+
+```json
+{
+  "object": "list",
+  "data": [
+    {
+      "id": "@cf/meta/llama-2-7b-chat-int8",
+      "object": "model",
+      "created": 1708661717835,
+      "owned_by": "cloudflare",
+      "permission": [],
+      "root": "@cf/meta/llama-2-7b-chat-int8",
+      "parent": null,
+      "metadata": {
+        "description": "Quantized (int8) generative text model...",
+        "task": "Text Generation",
+        "context_window": "8192"
+      }
+    }
+  ]
+}
+```
+
+### 2. 文本补全
+
+```http
+POST /v1/completions
+Authorization: Bearer <your-api-key>
+Content-Type: application/json
+
+{
+    "model": "@cf/meta/llama-2-7b-chat-int8",
+    "prompt": "你好",
+    "stream": true
+}
+```
+
+### 3. 对话补全
+
+```http
+POST /v1/chat/completions
+Authorization: Bearer <your-api-key>
+Content-Type: application/json
+
+{
+    "model": "@cf/meta/llama-2-7b-chat-int8",
+    "messages": [
+        {"role": "user", "content": "你好"}
+    ],
+    "stream": true
+}
+```
+
+## 👀 支持的参数
+
+| 参数               | 类型    | 默认值 | 范围         | 说明              |
+| ------------------ | ------- | ------ | ------------ | ----------------- |
+| model              | string  | -      | -            | 必选，模型 ID     |
+| stream             | boolean | false  | -            | 是否使用流式响应  |
+| max_tokens         | integer | 256    | ≥1           | 最大生成 token 数 |
+| temperature        | number  | 0.6    | 0-5          | 采样温度          |
+| top_p              | number  | -      | 0-2          | 核采样概率        |
+| top_k              | integer | -      | 1-50         | 核采样数量        |
+| frequency_penalty  | number  | -      | 0-2          | 频率惩罚          |
+| presence_penalty   | number  | -      | 0-2          | 存在惩罚          |
+| repetition_penalty | number  | -      | 0-2          | 重复惩罚          |
+| seed               | integer | -      | 1-9999999999 | 随机种子          |
+
+## 💻 调用示例
+
+### Node.js (使用 OpenAI SDK)
+
+```javascript
+import OpenAI from "openai";
+
+const openai = new OpenAI({
+  baseURL: "https://your-worker.workers.dev/v1",
+  apiKey: "<your-api-key>",
+});
+
+// 流式响应
+const stream = await openai.chat.completions.create({
+  model: "@cf/meta/llama-2-7b-chat-int8",
+  messages: [{ role: "user", content: "你好" }],
+  stream: true,
+});
+
+for await (const chunk of stream) {
+  process.stdout.write(chunk.choices[0]?.delta?.content || "");
+}
+```
+
+### fetch API
+
+```javascript
+const response = await fetch("https://your-worker.workers.dev/v1/chat/completions", {
+  method: "POST",
+  headers: {
+    Authorization: "Bearer <your-api-key>",
+    "Content-Type": "application/json",
+  },
+  body: JSON.stringify({
+    model: "@cf/meta/llama-2-7b-chat-int8",
+    messages: [{ role: "user", content: "你好" }],
+    stream: true,
+  }),
+});
+
+// 处理流式响应
+const reader = response.body.getReader();
+while (true) {
+  const { value, done } = await reader.read();
+  if (done) break;
+  console.log(new TextDecoder().decode(value));
+}
+```
+
+## 📝 注意事项
+
+> [!NOTE]
+>
+> 1. 由于使用了 Cloudflare AI API 获取模型列表，首次请求可能会稍慢
+> 2. 建议在生产环境中设置更严格的 CORS 策略
+> 3. API 密钥支持多个，便于权限管理和轮换
+> 4. 模型配置支持动态过滤，可随时调整可用模型列表
+> 5. 内容长度限制为 131072 字符
+
+## 📄 License
+
+MIT
diff --git a/dev/OpenAI-API_payload.md b/dev/OpenAI-API_payload.md
new file mode 100644
index 0000000..ba6b518
--- /dev/null
+++ b/dev/OpenAI-API_payload.md
@@ -0,0 +1,140 @@
+## REQUEST (stream)
+
+- POST: /v1/completions
+
+```json
+{"model":"qwen2.5:3b","prompt":"你好啊","temperature":0.7,"stream":true}
+```
+
+- POST: /v1/chat/completions
+
+```json
+{"model":"qwen2.5:3b","messages":[{"role":"user","content":"你好"}],"temperature":0.7,"stream":true}
+```
+
+## RESPONSE (stream)
+
+- Event Stream res for /v1/completions
+
+```text
+data: {"id":"cmpl-591","object":"text_completion","created":1740898011,"choices":[{"text":"你好","index":0,"finish_reason":null}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"}
+
+data: {"id":"cmpl-591","object":"text_completion","created":1740898011,"choices":[{"text":"！","index":0,"finish_reason":null}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"}
+
+data: {"id":"cmpl-591","object":"text_completion","created":1740898011,"choices":[{"text":"很高兴","index":0,"finish_reason":null}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"}
+
+data: {"id":"cmpl-591","object":"text_completion","created":1740898011,"choices":[{"text":"能","index":0,"finish_reason":null}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"}
+
+data: {"id":"cmpl-591","object":"text_completion","created":1740898011,"choices":[{"text":"帮助","index":0,"finish_reason":null}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"}
+
+data: {"id":"cmpl-591","object":"text_completion","created":1740898011,"choices":[{"text":"你","index":0,"finish_reason":null}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"}
+
+data: {"id":"cmpl-591","object":"text_completion","created":1740898011,"choices":[{"text":"。","index":0,"finish_reason":null}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"}
+
+data: {"id":"cmpl-591","object":"text_completion","created":1740898011,"choices":[{"text":"有什么","index":0,"finish_reason":null}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"}
+
+data: {"id":"cmpl-591","object":"text_completion","created":1740898011,"choices":[{"text":"我可以","index":0,"finish_reason":null}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"}
+
+data: {"id":"cmpl-591","object":"text_completion","created":1740898011,"choices":[{"text":"做的","index":0,"finish_reason":null}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"}
+
+data: {"id":"cmpl-591","object":"text_completion","created":1740898011,"choices":[{"text":"吗","index":0,"finish_reason":null}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"}
+
+data: {"id":"cmpl-591","object":"text_completion","created":1740898011,"choices":[{"text":"？","index":0,"finish_reason":null}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"}
+
+data: {"id":"cmpl-591","object":"text_completion","created":1740898012,"choices":[{"text":"","index":0,"finish_reason":"stop"}],"model":"qwen2.5:3b","system_fingerprint":"fp_ollama"}
+
+data: [DONE]
+
+```
+
+- Event Stream res for /v1/chat/completions
+
+```text
+data: {"id":"chatcmpl-562","object":"chat.completion.chunk","created":1740898282,"model":"qwen2.5:3b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"你好"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-562","object":"chat.completion.chunk","created":1740898283,"model":"qwen2.5:3b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"！"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-562","object":"chat.completion.chunk","created":1740898283,"model":"qwen2.5:3b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"有什么"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-562","object":"chat.completion.chunk","created":1740898283,"model":"qwen2.5:3b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"我可以"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-562","object":"chat.completion.chunk","created":1740898283,"model":"qwen2.5:3b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"帮助"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-562","object":"chat.completion.chunk","created":1740898283,"model":"qwen2.5:3b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"你的"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-562","object":"chat.completion.chunk","created":1740898283,"model":"qwen2.5:3b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"吗"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-562","object":"chat.completion.chunk","created":1740898283,"model":"qwen2.5:3b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"？"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-562","object":"chat.completion.chunk","created":1740898283,"model":"qwen2.5:3b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":"stop"}]}
+
+data: [DONE]
+
+```
+
+## REQUEST (normal)
+
+- POST: /v1/completions
+
+```json
+{"model":"qwen2.5:3b","prompt":"你好啊","temperature":0.7}
+```
+
+- POST: /v1/chat/completions
+
+```json
+{"model":"qwen2.5:3b","messages":[{"role":"user","content":"你好"}],"temperature":0.7}
+```
+
+## RESPONSE (normal)
+
+- res for /v1/completions
+
+```json
+{
+    "id": "cmpl-765",
+    "object": "text_completion",
+    "created": 1740898347,
+    "model": "qwen2.5:3b",
+    "system_fingerprint": "fp_ollama",
+    "choices": [
+        {
+            "text": "你好！很高兴为你服务。有什么我可以帮助你的吗？",
+            "index": 0,
+            "finish_reason": "stop"
+        }
+    ],
+    "usage": {
+        "prompt_tokens": 31,
+        "completion_tokens": 13,
+        "total_tokens": 44
+    }
+}
+```
+
+- res for /v1/chat/completions
+
+```json
+{
+    "id": "chatcmpl-323",
+    "object": "chat.completion",
+    "created": 1740898382,
+    "model": "qwen2.5:3b",
+    "system_fingerprint": "fp_ollama",
+    "choices": [
+        {
+            "index": 0,
+            "message": {
+                "role": "assistant",
+                "content": "你好！有什么我可以帮助你的吗？"
+            },
+            "finish_reason": "stop"
+        }
+    ],
+    "usage": {
+        "prompt_tokens": 30,
+        "completion_tokens": 9,
+        "total_tokens": 39
+    }
+}
+```
\ No newline at end of file
diff --git a/dev/cf-WorkerAI_example.js b/dev/cf-WorkerAI_example.js
new file mode 100644
index 0000000..2788690
--- /dev/null
+++ b/dev/cf-WorkerAI_example.js
@@ -0,0 +1,133 @@
+export default {
+  async fetch(request, env) {
+    if (request.method === "POST" && request.url.includes("v1/chat/completions")) {
+      // todo 验证 key
+
+      try {
+        const requestData = await request.json();
+
+        const { model, messages } = requestData;
+
+        let isSteam = false;
+        if (requestData?.stream) {
+          isSteam = true;
+        }
+
+        // get the current time in epoch seconds
+        const created = Math.floor(Date.now() / 1000);
+        const uuid = crypto.randomUUID();
+
+        const stream = await env.AI.run("@cf/deepseek-ai/deepseek-r1-distill-qwen-32b", {
+          messages: messages,
+          stream: isSteam,
+        });
+
+        if (isSteam) {
+          // 创建一个转换流 (TransformStream) 以便实现流式返回
+          const { readable, writable } = new TransformStream();
+          const writer = writable.getWriter();
+          const reader = stream.getReader();
+
+          // 处理流读取数据并逐帧向客户端发送
+          async function processStream() {
+            const decoder = new TextDecoder();
+            const encoder = new TextEncoder();
+
+            try {
+              while (true) {
+                const { done, value } = await reader.read();
+                if (done) {
+                  break;
+                }
+
+                // 确保 value 被转换为字符串类型
+                const stringValue = decoder.decode(value);
+
+                // 处理每行数据
+                const lines = stringValue.split("\n").filter((line) => !!line);
+                for (const line of lines) {
+                  // 跳过非 JSON 数据（例如 [DONE] ）
+                  if (line.endsWith("data: [DONE]")) {
+                    continue;
+                  }
+
+                  try {
+                    const json = JSON.parse(line.replace(/^data: /, ""));
+
+                    const formattedValue = `data: ${JSON.stringify({
+                      id: uuid,
+                      created,
+                      object: "chat.completion.chunk",
+                      model,
+                      choices: [
+                        {
+                          delta: { content: json.response },
+                          index: 0,
+                          finish_reason: null,
+                        },
+                      ],
+                    })}\n\n`;
+                    const encodedValue = encoder.encode(formattedValue);
+                    await writer.write(encodedValue);
+                  } catch (e) {
+                    console.error("Failed to parse JSON: ", e);
+                  }
+                }
+              }
+
+              // 发送完成标识
+              await writer.write(encoder.encode("data: [DONE]\n\n"));
+              writer.close();
+            } catch (err) {
+              console.error("Stream processing error: ", err);
+              // 确保 writer 在错误情况下关闭
+              writer.close();
+            }
+          }
+
+          // 调用处理流函数
+          processStream().catch((err) => {
+            console.error("Stream processing error: ", err);
+          });
+
+          return new Response(readable, {
+            headers: {
+              "content-type": "text/event-stream",
+              "Cache-Control": "no-cache",
+              Connection: "keep-alive",
+            },
+          });
+        } else {
+          return Response.json({
+            id: uuid,
+            model,
+            created,
+            object: "chat.completion",
+            choices: [
+              {
+                index: 0,
+                message: {
+                  role: "assistant",
+                  content: stream.response,
+                },
+                finish_reason: "stop",
+              },
+            ],
+            usage: {
+              prompt_tokens: 0,
+              completion_tokens: 0,
+              total_tokens: 0,
+            },
+          });
+        }
+      } catch (error) {
+        return new Response(JSON.stringify({ error: "Invalid request" }), {
+          status: 400,
+          headers: { "Content-Type": "application/json" },
+        });
+      }
+    }
+
+    return new Response("Not Found", { status: 404 });
+  },
+};
diff --git a/dev/cf-WorkerAI_schema.json b/dev/cf-WorkerAI_schema.json
new file mode 100644
index 0000000..63ec4d2
--- /dev/null
+++ b/dev/cf-WorkerAI_schema.json
@@ -0,0 +1,290 @@
+{
+  "type": "object",
+  "oneOf": [
+    {
+      "title": "Prompt",
+      "properties": {
+        "prompt": {
+          "type": "string",
+          "minLength": 1,
+          "maxLength": 131072,
+          "description": "The input text prompt for the model to generate a response."
+        },
+        "raw": {
+          "type": "boolean",
+          "default": false,
+          "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+        },
+        "stream": {
+          "type": "boolean",
+          "default": false,
+          "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
+        },
+        "max_tokens": {
+          "type": "integer",
+          "default": 256,
+          "description": "The maximum number of tokens to generate in the response."
+        },
+        "temperature": {
+          "type": "number",
+          "default": 0.6,
+          "minimum": 0,
+          "maximum": 5,
+          "description": "Controls the randomness of the output; higher values produce more random results."
+        },
+        "top_p": {
+          "type": "number",
+          "minimum": 0,
+          "maximum": 2,
+          "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+        },
+        "top_k": {
+          "type": "integer",
+          "minimum": 1,
+          "maximum": 50,
+          "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises."
+        },
+        "seed": {
+          "type": "integer",
+          "minimum": 1,
+          "maximum": 9999999999,
+          "description": "Random seed for reproducibility of the generation."
+        },
+        "repetition_penalty": {
+          "type": "number",
+          "minimum": 0,
+          "maximum": 2,
+          "description": "Penalty for repeated tokens; higher values discourage repetition."
+        },
+        "frequency_penalty": {
+          "type": "number",
+          "minimum": 0,
+          "maximum": 2,
+          "description": "Decreases the likelihood of the model repeating the same lines verbatim."
+        },
+        "presence_penalty": {
+          "type": "number",
+          "minimum": 0,
+          "maximum": 2,
+          "description": "Increases the likelihood of the model introducing new topics."
+        },
+        "lora": {
+          "type": "string",
+          "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+        }
+      },
+      "required": ["prompt"]
+    },
+    {
+      "title": "Messages",
+      "properties": {
+        "messages": {
+          "type": "array",
+          "description": "An array of message objects representing the conversation history.",
+          "items": {
+            "type": "object",
+            "properties": {
+              "role": {
+                "type": "string",
+                "description": "The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool')."
+              },
+              "content": {
+                "type": "string",
+                "maxLength": 131072,
+                "description": "The content of the message as a string."
+              }
+            },
+            "required": ["role", "content"]
+          }
+        },
+        "functions": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "name": {
+                "type": "string"
+              },
+              "code": {
+                "type": "string"
+              }
+            },
+            "required": ["name", "code"]
+          }
+        },
+        "tools": {
+          "type": "array",
+          "description": "A list of tools available for the assistant to use.",
+          "items": {
+            "type": "object",
+            "oneOf": [
+              {
+                "properties": {
+                  "name": {
+                    "type": "string",
+                    "description": "The name of the tool. More descriptive the better."
+                  },
+                  "description": {
+                    "type": "string",
+                    "description": "A brief description of what the tool does."
+                  },
+                  "parameters": {
+                    "type": "object",
+                    "description": "Schema defining the parameters accepted by the tool.",
+                    "properties": {
+                      "type": {
+                        "type": "string",
+                        "description": "The type of the parameters object (usually 'object')."
+                      },
+                      "required": {
+                        "type": "array",
+                        "description": "List of required parameter names.",
+                        "items": {
+                          "type": "string"
+                        }
+                      },
+                      "properties": {
+                        "type": "object",
+                        "description": "Definitions of each parameter.",
+                        "additionalProperties": {
+                          "type": "object",
+                          "properties": {
+                            "type": {
+                              "type": "string",
+                              "description": "The data type of the parameter."
+                            },
+                            "description": {
+                              "type": "string",
+                              "description": "A description of the expected parameter."
+                            }
+                          },
+                          "required": ["type", "description"]
+                        }
+                      }
+                    },
+                    "required": ["type", "properties"]
+                  }
+                },
+                "required": ["name", "description", "parameters"]
+              },
+              {
+                "properties": {
+                  "type": {
+                    "type": "string",
+                    "description": "Specifies the type of tool (e.g., 'function')."
+                  },
+                  "function": {
+                    "type": "object",
+                    "description": "Details of the function tool.",
+                    "properties": {
+                      "name": {
+                        "type": "string",
+                        "description": "The name of the function."
+                      },
+                      "description": {
+                        "type": "string",
+                        "description": "A brief description of what the function does."
+                      },
+                      "parameters": {
+                        "type": "object",
+                        "description": "Schema defining the parameters accepted by the function.",
+                        "properties": {
+                          "type": {
+                            "type": "string",
+                            "description": "The type of the parameters object (usually 'object')."
+                          },
+                          "required": {
+                            "type": "array",
+                            "description": "List of required parameter names.",
+                            "items": {
+                              "type": "string"
+                            }
+                          },
+                          "properties": {
+                            "type": "object",
+                            "description": "Definitions of each parameter.",
+                            "additionalProperties": {
+                              "type": "object",
+                              "properties": {
+                                "type": {
+                                  "type": "string",
+                                  "description": "The data type of the parameter."
+                                },
+                                "description": {
+                                  "type": "string",
+                                  "description": "A description of the expected parameter."
+                                }
+                              },
+                              "required": ["type", "description"]
+                            }
+                          }
+                        },
+                        "required": ["type", "properties"]
+                      }
+                    },
+                    "required": ["name", "description", "parameters"]
+                  }
+                },
+                "required": ["type", "function"]
+              }
+            ]
+          }
+        },
+        "stream": {
+          "type": "boolean",
+          "default": false,
+          "description": "If true, the response will be streamed back incrementally."
+        },
+        "max_tokens": {
+          "type": "integer",
+          "default": 256,
+          "description": "The maximum number of tokens to generate in the response."
+        },
+        "temperature": {
+          "type": "number",
+          "default": 0.6,
+          "minimum": 0,
+          "maximum": 5,
+          "description": "Controls the randomness of the output; higher values produce more random results."
+        },
+        "top_p": {
+          "type": "number",
+          "minimum": 0,
+          "maximum": 2,
+          "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+        },
+        "top_k": {
+          "type": "integer",
+          "minimum": 1,
+          "maximum": 50,
+          "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises."
+        },
+        "seed": {
+          "type": "integer",
+          "minimum": 1,
+          "maximum": 9999999999,
+          "description": "Random seed for reproducibility of the generation."
+        },
+        "repetition_penalty": {
+          "type": "number",
+          "minimum": 0,
+          "maximum": 2,
+          "description": "Penalty for repeated tokens; higher values discourage repetition."
+        },
+        "frequency_penalty": {
+          "type": "number",
+          "minimum": 0,
+          "maximum": 2,
+          "description": "Decreases the likelihood of the model repeating the same lines verbatim."
+        },
+        "presence_penalty": {
+          "type": "number",
+          "minimum": 0,
+          "maximum": 2,
+          "description": "Increases the likelihood of the model introducing new topics."
+        }
+      },
+      "required": ["messages"]
+    }
+  ]
+}
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..69b0013
--- /dev/null
+++ b/package.json
@@ -0,0 +1,16 @@
+{
+  "name": "cf-ai-endpoint",
+  "version": "1.0.0",
+  "description": "Cloudflare AI endpoint with OpenAI API compatibility",
+  "main": "src/worker.js",
+  "type": "module",
+  "scripts": {
+    "dev": "npx wrangler dev",
+    "deploy": "npx wrangler deploy",
+    "minify": "esbuild src/worker.js --bundle --minify --format=esm --outfile=dist/worker.min.js"
+  },
+  "devDependencies": {
+    "wrangler": "^3.0.0",
+    "esbuild": "^0.25.0"
+  }
+}
diff --git a/src/auth.js b/src/auth.js
new file mode 100644
index 0000000..edf88f6
--- /dev/null
+++ b/src/auth.js
@@ -0,0 +1,7 @@
+import { ValidationError } from './errors';
+
+export const validateApiKey = (apiKey, validKeys) => {
+  if (!apiKey || !validKeys.has(apiKey.replace("Bearer ", ""))) {
+    throw new ValidationError("Invalid API key", "invalid_api_key");
+  }
+};
diff --git a/src/config.js b/src/config.js
new file mode 100644
index 0000000..29c77ea
--- /dev/null
+++ b/src/config.js
@@ -0,0 +1,13 @@
+export const CONFIG = {
+  MAX_CONTENT_LENGTH: 131072,
+  PARAM_RANGES: {
+    max_tokens: { min: 1, type: 'integer', default: 256 },
+    temperature: { min: 0, max: 5, default: 0.6 },
+    top_p: { min: 0, max: 2 },
+    top_k: { min: 1, max: 50, type: 'integer' },
+    frequency_penalty: { min: 0, max: 2 },
+    presence_penalty: { min: 0, max: 2 },
+    repetition_penalty: { min: 0, max: 2 },
+    seed: { min: 1, max: 9999999999, type: 'integer' }
+  }
+};
diff --git a/src/cors.js b/src/cors.js
new file mode 100644
index 0000000..46bed26
--- /dev/null
+++ b/src/cors.js
@@ -0,0 +1,8 @@
+import { CORS_HEADERS } from './headers';
+
+export const handleCors = (request) => {
+  if (request.method === "OPTIONS") {
+    return new Response(null, { headers: CORS_HEADERS });
+  }
+  return null;
+};
diff --git a/src/errors.js b/src/errors.js
new file mode 100644
index 0000000..b28a68d
--- /dev/null
+++ b/src/errors.js
@@ -0,0 +1,32 @@
+import { JSON_HEADERS } from './headers';
+
+export class ValidationError extends Error {
+  constructor(message, code, param) {
+    super(message);
+    this.code = code;
+    this.param = param;
+    this.type = 'invalid_request_error';
+  }
+}
+
+export const errorHandler = (error) => {
+  const status = error instanceof ValidationError ? 400 
+    : error.code === 'invalid_api_key' ? 401 
+    : 500;
+
+  return Response.json({
+    error: {
+      message: error.message,
+      type: error.type || 'api_error',
+      code: error.code || 'internal_error',
+      param: error.param,
+      metadata: {
+        timestamp: new Date().toISOString(),
+        request_id: crypto.randomUUID()
+      }
+    }
+  }, { 
+    status,
+    headers: JSON_HEADERS
+  });
+};
diff --git a/src/formatters.js b/src/formatters.js
new file mode 100644
index 0000000..ee07d5e
--- /dev/null
+++ b/src/formatters.js
@@ -0,0 +1,90 @@
+export const formatters = {
+  toCloudflare: (params, type) => {
+    // 从参数中提取所有可能的字段
+    const {
+      stream = false,
+      max_tokens,
+      temperature,
+      top_p,
+      top_k,
+      seed,
+      repetition_penalty,
+      frequency_penalty,
+      presence_penalty,
+      raw,
+      lora,
+      functions,
+      tools,
+      messages,
+      prompt
+    } = params;
+
+    // 构建基础请求参数
+    const requestParams = {
+      messages: type === 'prompt' 
+        ? [{ role: "user", content: prompt }] 
+        : messages,
+      stream
+    };
+
+    // 添加各种可选参数，只在有值时添加
+    const optionalParams = {
+      max_tokens,
+      temperature,
+      top_p,
+      top_k,
+      seed,
+      repetition_penalty,
+      frequency_penalty,
+      presence_penalty,
+      raw,
+      lora
+    };
+
+    // 添加函数调用相关参数
+    if (functions?.length) {
+      requestParams.functions = functions;
+    }
+    if (tools?.length) {
+      requestParams.tools = tools;
+    }
+
+    // 过滤掉未定义的可选参数
+    Object.entries(optionalParams).forEach(([key, value]) => {
+      if (value !== undefined) {
+        requestParams[key] = value;
+      }
+    });
+
+    return requestParams;
+  },
+
+  toOpenAI: (response, type, model, isStream = false) => {
+    const content = typeof response === 'string' ? response : response.response || '';
+    const id = `${type === 'chat' ? 'chatcmpl' : 'cmpl'}-${crypto.randomUUID()}`;
+    const timestamp = Date.now();
+
+    const baseResponse = {
+      id,
+      created: timestamp,
+      model,
+      choices: [{
+        index: 0,
+        finish_reason: isStream ? null : "stop"
+      }]
+    };
+
+    if (isStream) {
+      baseResponse.object = `${type}.completion.chunk`;
+      baseResponse.choices[0][type === 'chat' ? 'delta' : 'text'] = 
+        type === 'chat' ? { role: 'assistant', content } : content;
+    } else {
+      baseResponse.object = `${type}.completion`;
+      baseResponse.choices[0][type === 'chat' ? 'message' : 'text'] = 
+        type === 'chat' ? { role: 'assistant', content } : content;
+      baseResponse.usage = { prompt_tokens: -1, completion_tokens: -1, total_tokens: -1 };
+    }
+
+    return baseResponse;
+  }
+};
diff --git a/src/headers.js b/src/headers.js
new file mode 100644
index 0000000..3c00d87
--- /dev/null
+++ b/src/headers.js
@@ -0,0 +1,18 @@
+export const CORS_HEADERS = {
+  "Access-Control-Allow-Origin": "*",
+  "Access-Control-Allow-Methods": "POST, GET, OPTIONS",
+  "Access-Control-Allow-Headers": "Content-Type, Authorization",
+  "Access-Control-Max-Age": "86400",
+};
+
+export const STREAM_HEADERS = {
+  ...CORS_HEADERS,
+  "Content-Type": "text/event-stream",
+  "Cache-Control": "no-cache",
+  "Connection": "keep-alive",
+};
+
+export const JSON_HEADERS = {
+  ...CORS_HEADERS,
+  "Content-Type": "application/json",
+};
diff --git a/src/stream.js b/src/stream.js
new file mode 100644
index 0000000..9f689f9
--- /dev/null
+++ b/src/stream.js
@@ -0,0 +1,38 @@
+import { formatters } from './formatters';
+
+export const createStreamHandler = (type, model) => {
+  const encoder = new TextEncoder();
+  const decoder = new TextDecoder();
+
+  return new TransformStream({
+    transform(chunk, controller) {
+      const text = decoder.decode(chunk);
+      if (!text.trim()) return;
+
+      const lines = text.split("\n").filter(line => line.trim());
+      
+      for (const line of lines) {
+        if (line.includes('[DONE]')) {
+          const finalResponse = formatters.toOpenAI('', type, model, true);
+          finalResponse.choices[0].finish_reason = 'stop';
+          controller.enqueue(encoder.encode(
+            `data: ${JSON.stringify(finalResponse)}\n\ndata: [DONE]\n\n`
+          ));
+          return;
+        }
+
+        if (line.startsWith('data: ')) {
+          try {
+            const rawResponse = JSON.parse(line.slice(6));
+            if (rawResponse?.response !== undefined) {
+              const formatted = formatters.toOpenAI(rawResponse.response, type, model, true);
+              controller.enqueue(encoder.encode(`data: ${JSON.stringify(formatted)}\n\n`));
+            }
+          } catch (e) {
+            console.error('Stream parse error:', e, 'for line:', line);
+          }
+        }
+      }
+    }
+  });
+};
diff --git a/src/worker.js b/src/worker.js
new file mode 100644
index 0000000..6f8fff9
--- /dev/null
+++ b/src/worker.js
@@ -0,0 +1,128 @@
+import { CONFIG } from './config';
+import { ValidationError, errorHandler } from './errors';
+import { formatters } from './formatters';
+import { createStreamHandler } from './stream';
+import { validateApiKey } from './auth';
+import { handleCors } from './cors';
+import { STREAM_HEADERS, JSON_HEADERS } from './headers';
+
+// 验证并获取配置
+async function getConfig(env) {
+  if (!env.AI) throw new Error('AI binding not configured');
+  if (!env.API_KEY) throw new Error('API_KEY not set');
+
+  let models = await env.AI.models();
+  const apiKeys = new Set(env.API_KEY.split(',').map(k => k.trim()));
+  
+  if (env.MODELS) {
+    const allowedModels = new Set(env.MODELS.split(',').map(m => m.trim()));
+    models = models.filter(m => allowedModels.has(m.name));
+    if (!models.length) {
+      throw new Error('No valid models configured. Check your MODELS environment variable.');
+    }
+  }
+
+  return {
+    apiKeys,
+    models
+  };
+}
+
+// 参数验证
+function validateParams(params, type) {
+  // 验证内容
+  if (type === 'prompt' ? !params.prompt : !Array.isArray(params.messages)) {
+    throw new ValidationError(
+      `Invalid ${type}`,
+      `invalid_${type}`,
+      type
+    );
+  }
+
+  // 验证参数范围
+  const validatedParams = {
+    model: params.model,
+    stream: params.stream || false,
+    [type]: params[type]
+  };
+
+  for (const [param, range] of Object.entries(CONFIG.PARAM_RANGES)) {
+    const value = params[param] ?? range.default;
+    if (value !== undefined) {
+      if ((range.type === 'integer' && !Number.isInteger(value)) ||
+          value < range.min || (range.max && value > range.max)) {
+        throw new ValidationError(
+          `${param} must be ${range.type === 'integer' ? 'an integer ' : ''}between ${range.min} and ${range.max}`,
+          `invalid_${param}`,
+          param
+        );
+      }
+      validatedParams[param] = value;
+    }
+  }
+
+  return validatedParams;
+}
+
+export default {
+  async fetch(request, env, ctx) {
+    try {
+      // CORS预检处理
+      const corsResponse = handleCors(request);
+      if (corsResponse) return corsResponse;
+
+      const url = new URL(request.url);
+      if (!url.pathname.startsWith('/v1/')) {
+        return new Response('Not Found', { status: 404 });
+      }
+
+      const config = await getConfig(env);
+      validateApiKey(request.headers.get("Authorization"), config.apiKeys);
+
+      switch (url.pathname) {
+        case "/v1/models":
+          if (request.method !== "GET") {
+            throw new ValidationError("Method not allowed", "method_not_allowed");
+          }
+          return Response.json({
+            object: "list",
+            data: config.models
+          }, {
+            headers: JSON_HEADERS
+          });
+
+        case "/v1/completions":
+        case "/v1/chat/completions": {
+          if (request.method !== "POST") {
+            throw new ValidationError("Method not allowed", "method_not_allowed");
+          }
+          
+          const isChat = url.pathname === "/v1/chat/completions";
+          const type = isChat ? "chat" : "completion";
+          const requestData = await request.json();
+          const validatedParams = validateParams(
+            requestData, 
+            isChat ? "messages" : "prompt"
+          );
+          const cfParams = formatters.toCloudflare(validatedParams, type);
+          const response = await env.AI.run(validatedParams.model, cfParams);
+          
+          return validatedParams.stream
+            ? new Response(
+                response.pipeThrough(createStreamHandler(type, validatedParams.model)),
+                { headers: STREAM_HEADERS }
+              )
+            : Response.json(
+                formatters.toOpenAI(response, type, validatedParams.model),
+                { headers: JSON_HEADERS }
+              );
+        }
+
+        default:
+          throw new ValidationError("Not Found", "not_found");
+      }
+    } catch (error) {
+      return errorHandler(error);
+    }
+  }
+};
diff --git a/wrangler.toml b/wrangler.toml
new file mode 100644
index 0000000..b6655cc
--- /dev/null
+++ b/wrangler.toml
@@ -0,0 +1,14 @@
+name = "ai"
+main = "src/worker.js"
+compatibility_date = "2025-01-01"
+
+[ai]
+binding = "AI"
+
+[vars]
+# 允许使用的模型列表，英文逗号分隔
+MODELS = "@cf/deepseek-ai/deepseek-r1-distill-qwen-32b,@hf/meta-llama/meta-llama-3-8b-instruct,@hf/google/gemma-7b-it"
+
+# 生产环境请使用以下命令设置多个 API_KEY (英文逗号分隔)
+# wrangler secret put API_KEY
+# 开发模式可以在同目录下放置 .dev.vars 临时文件配置API_KEY=test-key
\ No newline at end of file