feat: 初始提交

2025-10-21 09:38:26 +08:00
parent 2965b8e28f
commit c9fc816fab
175 changed files with 23968 additions and 87 deletions
--- a/backend/app/utils/init.py
+++ b/backend/app/utils/init.py
--- a/backend/app/utils/json_utils.py
+++ b/backend/app/utils/json_utils.py
@@ -0,0 +1,81 @@
+import re
+
+
+def remove_think_tags(raw_text: str) -> str:
+    """移除 <think></think> 标签，避免污染结果。"""
+    if not raw_text:
+        return raw_text
+    return re.sub(r"<think>.*?</think>", "", raw_text, flags=re.DOTALL).strip()
+
+
+def unwrap_markdown_json(raw_text: str) -> str:
+    """从 Markdown 或普通文本中提取 JSON 字符串。"""
+    if not raw_text:
+        return raw_text
+
+    trimmed = raw_text.strip()
+
+    fence_match = re.search(r"```(?:json|JSON)?\s*(.*?)\s*```", trimmed, re.DOTALL)
+    if fence_match:
+        candidate = fence_match.group(1).strip()
+        if candidate:
+            return candidate
+
+    json_start_candidates = [idx for idx in (trimmed.find("{"), trimmed.find("[")) if idx != -1]
+    if json_start_candidates:
+        start_idx = min(json_start_candidates)
+        closing_brace = trimmed.rfind("}")
+        closing_bracket = trimmed.rfind("]")
+        end_idx = max(closing_brace, closing_bracket)
+        if end_idx != -1 and end_idx > start_idx:
+            candidate = trimmed[start_idx : end_idx + 1].strip()
+            if candidate:
+                return candidate
+
+    return trimmed
+
+
+def sanitize_json_like_text(raw_text: str) -> str:
+    """对可能含有未转义换行/引号的 JSON 文本进行清洗。"""
+    if not raw_text:
+        return raw_text
+
+    result = []
+    in_string = False
+    escape_next = False
+    length = len(raw_text)
+    i = 0
+    while i < length:
+        ch = raw_text[i]
+        if in_string:
+            if escape_next:
+                result.append(ch)
+                escape_next = False
+            elif ch == "\\":
+                result.append(ch)
+                escape_next = True
+            elif ch == '"':
+                j = i + 1
+                while j < length and raw_text[j] in " \t\r\n":
+                    j += 1
+
+                if j >= length or raw_text[j] in "}]" or raw_text[j] == ",":
+                    in_string = False
+                    result.append(ch)
+                else:
+                    result.extend(["\\", '"'])
+            elif ch == "\n":
+                result.extend(["\\", "n"])
+            elif ch == "\r":
+                result.extend(["\\", "r"])
+            elif ch == "\t":
+                result.extend(["\\", "t"])
+            else:
+                result.append(ch)
+        else:
+            if ch == '"':
+                in_string = True
+            result.append(ch)
+        i += 1
+
+    return "".join(result)
--- a/backend/app/utils/llm_tool.py
+++ b/backend/app/utils/llm_tool.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+"""OpenAI 兼容型 LLM 工具封装，保持与旧项目一致的接口体验。"""
+
+import os
+from dataclasses import asdict, dataclass
+from typing import AsyncGenerator, Dict, List, Optional
+
+from openai import AsyncOpenAI
+
+
+@dataclass
+class ChatMessage:
+    role: str
+    content: str
+
+    def to_dict(self) -> Dict[str, str]:
+        return asdict(self)
+
+
+class LLMClient:
+    """异步流式调用封装，兼容 OpenAI SDK。"""
+
+    def __init__(self, api_key: Optional[str] = None, base_url: Optional[str] = None):
+        key = api_key or os.environ.get("OPENAI_API_KEY")
+        if not key:
+            raise ValueError("缺少 OPENAI_API_KEY 配置，请在数据库或环境变量中补全。")
+
+        self._client = AsyncOpenAI(api_key=key, base_url=base_url or os.environ.get("OPENAI_API_BASE"))
+
+    async def stream_chat(
+        self,
+        messages: List[ChatMessage],
+        model: Optional[str] = None,
+        response_format: Optional[str] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        timeout: int = 120,
+        **kwargs,
+    ) -> AsyncGenerator[Dict[str, str], None]:
+        payload = {
+            "model": model or os.environ.get("MODEL", "gpt-3.5-turbo"),
+            "messages": [msg.to_dict() for msg in messages],
+            "stream": True,
+            "timeout": timeout,
+            **kwargs,
+        }
+        if response_format:
+            payload["response_format"] = {"type": response_format}
+        if temperature is not None:
+            payload["temperature"] = temperature
+        if top_p is not None:
+            payload["top_p"] = top_p
+        if max_tokens is not None:
+            payload["max_tokens"] = max_tokens
+
+        stream = await self._client.chat.completions.create(**payload)
+        async for chunk in stream:
+            if not chunk.choices:
+                continue
+            choice = chunk.choices[0]
+            yield {
+                "content": choice.delta.content,
+                "finish_reason": choice.finish_reason,
+            }