feat: 初始提交
This commit is contained in:
0
backend/app/utils/__init__.py
Normal file
0
backend/app/utils/__init__.py
Normal file
81
backend/app/utils/json_utils.py
Normal file
81
backend/app/utils/json_utils.py
Normal file
@@ -0,0 +1,81 @@
|
||||
import re
|
||||
|
||||
|
||||
def remove_think_tags(raw_text: str) -> str:
|
||||
"""移除 <think></think> 标签,避免污染结果。"""
|
||||
if not raw_text:
|
||||
return raw_text
|
||||
return re.sub(r"<think>.*?</think>", "", raw_text, flags=re.DOTALL).strip()
|
||||
|
||||
|
||||
def unwrap_markdown_json(raw_text: str) -> str:
|
||||
"""从 Markdown 或普通文本中提取 JSON 字符串。"""
|
||||
if not raw_text:
|
||||
return raw_text
|
||||
|
||||
trimmed = raw_text.strip()
|
||||
|
||||
fence_match = re.search(r"```(?:json|JSON)?\s*(.*?)\s*```", trimmed, re.DOTALL)
|
||||
if fence_match:
|
||||
candidate = fence_match.group(1).strip()
|
||||
if candidate:
|
||||
return candidate
|
||||
|
||||
json_start_candidates = [idx for idx in (trimmed.find("{"), trimmed.find("[")) if idx != -1]
|
||||
if json_start_candidates:
|
||||
start_idx = min(json_start_candidates)
|
||||
closing_brace = trimmed.rfind("}")
|
||||
closing_bracket = trimmed.rfind("]")
|
||||
end_idx = max(closing_brace, closing_bracket)
|
||||
if end_idx != -1 and end_idx > start_idx:
|
||||
candidate = trimmed[start_idx : end_idx + 1].strip()
|
||||
if candidate:
|
||||
return candidate
|
||||
|
||||
return trimmed
|
||||
|
||||
|
||||
def sanitize_json_like_text(raw_text: str) -> str:
|
||||
"""对可能含有未转义换行/引号的 JSON 文本进行清洗。"""
|
||||
if not raw_text:
|
||||
return raw_text
|
||||
|
||||
result = []
|
||||
in_string = False
|
||||
escape_next = False
|
||||
length = len(raw_text)
|
||||
i = 0
|
||||
while i < length:
|
||||
ch = raw_text[i]
|
||||
if in_string:
|
||||
if escape_next:
|
||||
result.append(ch)
|
||||
escape_next = False
|
||||
elif ch == "\\":
|
||||
result.append(ch)
|
||||
escape_next = True
|
||||
elif ch == '"':
|
||||
j = i + 1
|
||||
while j < length and raw_text[j] in " \t\r\n":
|
||||
j += 1
|
||||
|
||||
if j >= length or raw_text[j] in "}]" or raw_text[j] == ",":
|
||||
in_string = False
|
||||
result.append(ch)
|
||||
else:
|
||||
result.extend(["\\", '"'])
|
||||
elif ch == "\n":
|
||||
result.extend(["\\", "n"])
|
||||
elif ch == "\r":
|
||||
result.extend(["\\", "r"])
|
||||
elif ch == "\t":
|
||||
result.extend(["\\", "t"])
|
||||
else:
|
||||
result.append(ch)
|
||||
else:
|
||||
if ch == '"':
|
||||
in_string = True
|
||||
result.append(ch)
|
||||
i += 1
|
||||
|
||||
return "".join(result)
|
||||
65
backend/app/utils/llm_tool.py
Normal file
65
backend/app/utils/llm_tool.py
Normal file
@@ -0,0 +1,65 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""OpenAI 兼容型 LLM 工具封装,保持与旧项目一致的接口体验。"""
|
||||
|
||||
import os
|
||||
from dataclasses import asdict, dataclass
|
||||
from typing import AsyncGenerator, Dict, List, Optional
|
||||
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
|
||||
@dataclass
|
||||
class ChatMessage:
|
||||
role: str
|
||||
content: str
|
||||
|
||||
def to_dict(self) -> Dict[str, str]:
|
||||
return asdict(self)
|
||||
|
||||
|
||||
class LLMClient:
|
||||
"""异步流式调用封装,兼容 OpenAI SDK。"""
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None, base_url: Optional[str] = None):
|
||||
key = api_key or os.environ.get("OPENAI_API_KEY")
|
||||
if not key:
|
||||
raise ValueError("缺少 OPENAI_API_KEY 配置,请在数据库或环境变量中补全。")
|
||||
|
||||
self._client = AsyncOpenAI(api_key=key, base_url=base_url or os.environ.get("OPENAI_API_BASE"))
|
||||
|
||||
async def stream_chat(
|
||||
self,
|
||||
messages: List[ChatMessage],
|
||||
model: Optional[str] = None,
|
||||
response_format: Optional[str] = None,
|
||||
temperature: Optional[float] = None,
|
||||
top_p: Optional[float] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
timeout: int = 120,
|
||||
**kwargs,
|
||||
) -> AsyncGenerator[Dict[str, str], None]:
|
||||
payload = {
|
||||
"model": model or os.environ.get("MODEL", "gpt-3.5-turbo"),
|
||||
"messages": [msg.to_dict() for msg in messages],
|
||||
"stream": True,
|
||||
"timeout": timeout,
|
||||
**kwargs,
|
||||
}
|
||||
if response_format:
|
||||
payload["response_format"] = {"type": response_format}
|
||||
if temperature is not None:
|
||||
payload["temperature"] = temperature
|
||||
if top_p is not None:
|
||||
payload["top_p"] = top_p
|
||||
if max_tokens is not None:
|
||||
payload["max_tokens"] = max_tokens
|
||||
|
||||
stream = await self._client.chat.completions.create(**payload)
|
||||
async for chunk in stream:
|
||||
if not chunk.choices:
|
||||
continue
|
||||
choice = chunk.choices[0]
|
||||
yield {
|
||||
"content": choice.delta.content,
|
||||
"finish_reason": choice.finish_reason,
|
||||
}
|
||||
Reference in New Issue
Block a user