This commit is contained in:
史悦
2025-08-13 19:03:20 +08:00
commit d62a2e9ed9
73 changed files with 7296 additions and 0 deletions

37
embeddings/.gitignore vendored Normal file
View File

@@ -0,0 +1,37 @@
*/target
*/tomcat
*.iml
.idea/
*.class
target/
tomcat/
.project
.settings/
.classpath
src/main/resources/META-INF
.DS_Store
logs/*
.idea/*
application-local.properties
rebel.xml
LOG_DIR_IS_UNDEFINED
index/*
lora.json
ptuning.json
lora
applogs
__pycache__
answers.json
answers.jsonl
answers_back.json
keys.pkl
data.pkl
make_dataset_schedule.d
config.json
venv/
.env
temp/
.ipynb_checkpoints
/modules/*
cert_cache/*
models

18
embeddings/Dockerfile Normal file
View File

@@ -0,0 +1,18 @@
# 使用官方Python运行时作为父镜像
FROM registry.cn-hangzhou.aliyuncs.com/ripper/python:3.9-slim
# 设置工作目录
WORKDIR /app
# 将当前目录内容复制到容器的/app中
ADD . /app
RUN pip install --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple
# 安装程序需要的包
RUN pip install --no-cache-dir -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
# 运行时监听的端口
EXPOSE 6008
# 运行app.py时的命令及其参数
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "6008"]

55
embeddings/README.md Normal file
View File

@@ -0,0 +1,55 @@
# 本地 Embeddings 模型API服务
下载本地 `Embedding模型` 并转为 `OpenAI` 接口格式的 API 服务。
## 准备工作
- Python 3.9+
- 选择合适的模型文件 (根据效果自行测试), 程序支持自动提升维度或降级维度到指定维度(接口中传递的 `dimensions` 参数, 默认为512)
- 下载模型文件,放置在 `./models` 目录下, 国内下载可以去 [魔搭社区](https://www.modelscope.cn/models/BAAI/bge-m3), 速度不受影响
## 环境变量参数
- `sk-key`: 服务的 `API KEY`,默认为 `sk-aaabbbcccdddeeefffggghhhiiijjjkkk`
- `auto-dim`: 是否自动进行维度操作, 若为 `true` 则会自动提升或降级维度到512, 默认为 `false`
- `model-name`: 模型目录名称, 默认为 `bge-m3`, 注意必须在models文件夹下有对应的模型文件夹
## 运行服务
```shell
pip install -r requirements.txt
```
```shell
python app.py
```
```bash
curl --location --request POST 'http://127.0.0.1:6008/v1/embeddings' \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer sk-aaabbbcccdddeeefffggghhhiiijjjkkk' \
--data-raw '{
"input": [
"解析当前项目"
],
"model": "text-embedding-3-small",
"dimensions": 512
}'
```
## Docker 运行
将 [docker-compose.yml](docker-compose.yml) 文件放在任意目录下, 然后执行命令:
```bash
docker-compose up -d
```
镜像内已经将模型打包好了, 所以首次执行会比较慢, 如果需要更新或新增模型, 直接将新的模型文件放在 `./models` 目录下, 更改环境变量 `model-name` 即可
然后重新启动服务即可:
```bash
docker-compose restart
# 如果未生效, 可以先停止再启动
docker-compose down
docker-compose up -d
```

View File

@@ -0,0 +1,14 @@
version: '3.3'
services:
embeddings2openai:
container_name: embeddings2openai
image: registry.cn-hangzhou.aliyuncs.com/ripper/embeddings2openai:latest
restart: always
ports:
- 6008:6008
environment:
- sk-key=sk-aaabbbcccdddeeefffggghhhiiijjjkkk
- auto-dim=false
- model-name=bge-m3
# volumes:
# - ./models:/app/models # 如果需要自己更改模型, 将模型文件放到models文件夹下, 并取消注释

171
embeddings/main.py Normal file
View File

@@ -0,0 +1,171 @@
from fastapi import FastAPI, Depends, HTTPException, status
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from sentence_transformers import SentenceTransformer
from pydantic import BaseModel
from fastapi.middleware.cors import CORSMiddleware
import uvicorn
import tiktoken
import numpy as np
from scipy.interpolate import interp1d
from typing import List, Optional
from sklearn.preprocessing import PolynomialFeatures
from sklearn.decomposition import PCA
import torch
import os
# 接口秘钥环境变量传入
sk_key = os.environ.get('sk-key', 'sk-aaabbbcccdddeeefffggghhhiiijjjkkk')
# 是否自动进行维度操作的环境变量默认为false
auto_dim = os.environ.get('auto-dim', 'false').lower() == 'true'
# 模型名称, 必须在models文件夹下有对应的模型文件夹
model_name = os.environ.get('model-name', 'bge-m3')
# 创建一个FastAPI实例
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# 创建一个HTTPBearer实例
security = HTTPBearer()
# 预加载模型
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 检测是否有GPU可用如果有则使用cuda设备否则使用cpu设备
if torch.cuda.is_available():
print('本次加载模型的设备为GPU: ', torch.cuda.get_device_name(0))
else:
print('本次加载模型的设备为CPU.')
print(f'加载模型: {model_name}')
model = SentenceTransformer(f'./models/{model_name}',device=device)
# 创建PCA降维模型
pca = None
class EmbeddingRequest(BaseModel):
input: List[str]
model: str
dimensions: Optional[int] = 512
class EmbeddingResponse(BaseModel):
data: list
model: str
object: str
usage: dict
def num_tokens_from_string(string: str) -> int:
"""Returns the number of tokens in a text string."""
encoding = tiktoken.get_encoding('cl100k_base')
num_tokens = len(encoding.encode(string))
return num_tokens
# 插值法
def interpolate_vector(vector, target_length):
original_indices = np.arange(len(vector))
target_indices = np.linspace(0, len(vector)-1, target_length)
f = interp1d(original_indices, vector, kind='linear')
return f(target_indices)
def expand_features(embedding, target_length):
poly = PolynomialFeatures(degree=2)
expanded_embedding = poly.fit_transform(embedding.reshape(1, -1))
expanded_embedding = expanded_embedding.flatten()
if len(expanded_embedding) > target_length:
# 如果扩展后的特征超过目标长度,可以通过截断或其他方法来减少维度
expanded_embedding = expanded_embedding[:target_length]
elif len(expanded_embedding) < target_length:
# 如果扩展后的特征少于目标长度,可以通过填充或其他方法来增加维度
expanded_embedding = np.pad(expanded_embedding, (0, target_length - len(expanded_embedding)))
return expanded_embedding
# 降维方法使用PCA将向量从1024维降到512维
def reduce_dimensions(embeddings, target_dim=512):
global pca
# 将列表转换为numpy数组
embeddings_array = np.array(embeddings)
# 检查样本数量
n_samples = embeddings_array.shape[0]
n_features = embeddings_array.shape[1]
# 如果只有一个样本无法使用PCA改用插值法
if n_samples == 1:
return [interpolate_vector(embeddings_array[0], target_dim)]
# 确保目标维度不超过可能的最大值
actual_target_dim = min(target_dim, n_samples, n_features)
if actual_target_dim < target_dim:
print(f"警告:目标维度{target_dim}超过了可能的最大值,已调整为{actual_target_dim}")
# 如果是第一次运行或者输入维度变化重新初始化PCA
if pca is None or pca.n_components != actual_target_dim:
pca = PCA(n_components=actual_target_dim)
# 先拟合再转换
reduced_embeddings = pca.fit_transform(embeddings_array)
else:
# 直接使用已训练的PCA模型转换
reduced_embeddings = pca.transform(embeddings_array)
# 如果实际降维后的维度小于目标维度,使用插值法扩展
if actual_target_dim < target_dim:
reduced_embeddings = [interpolate_vector(embedding, target_dim) for embedding in reduced_embeddings]
return list(reduced_embeddings)
@app.post("/v1/embeddings", response_model=EmbeddingResponse)
async def get_embeddings(request: EmbeddingRequest, credentials: HTTPAuthorizationCredentials = Depends(security)):
if credentials.credentials != sk_key:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid authorization code",
)
# 计算嵌入向量和tokens数量
embeddings = [model.encode(text) for text in request.input]
# 检查是否需要进行维度操作
if auto_dim:
# 检查向量维度
embedding_dim = len(embeddings[0])
# 如果维度大于512则降维到512
if embedding_dim > request.dimensions:
embeddings = reduce_dimensions(embeddings, target_dim=request.dimensions)
# 如果维度小于512则使用插值法扩展到512
elif embedding_dim < request.dimensions:
embeddings = [interpolate_vector(embedding, request.dimensions) for embedding in embeddings]
# 归一化处理
embeddings = [embedding / np.linalg.norm(embedding) for embedding in embeddings]
# 将numpy数组转换为列表
embeddings = [embedding.tolist() for embedding in embeddings]
prompt_tokens = sum(len(text.split()) for text in request.input)
total_tokens = sum(num_tokens_from_string(text) for text in request.input)
response = {
"data": [
{
"embedding": embedding,
"index": index,
"object": "embedding"
} for index, embedding in enumerate(embeddings)
],
"model": model_name,
"object": "list",
"usage": {
"prompt_tokens": prompt_tokens,
"total_tokens": total_tokens,
}
}
return response
if __name__ == "__main__":
uvicorn.run("main:app", host='0.0.0.0', port=6008, workers=1)

View File

@@ -0,0 +1,11 @@
fastapi==0.100.0
pydantic==1.10.7
sentence-transformers==3.4.1
uvicorn==0.23.1
tiktoken==0.4.0
numpy==2.0.2
scipy==1.13.1
scikit-learn==1.6.1
torch
torchvision
torchaudio