提交
This commit is contained in:
37
embeddings/.gitignore
vendored
Normal file
37
embeddings/.gitignore
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
*/target
|
||||
*/tomcat
|
||||
*.iml
|
||||
.idea/
|
||||
*.class
|
||||
target/
|
||||
tomcat/
|
||||
.project
|
||||
.settings/
|
||||
.classpath
|
||||
src/main/resources/META-INF
|
||||
.DS_Store
|
||||
logs/*
|
||||
.idea/*
|
||||
application-local.properties
|
||||
rebel.xml
|
||||
LOG_DIR_IS_UNDEFINED
|
||||
index/*
|
||||
lora.json
|
||||
ptuning.json
|
||||
lora
|
||||
applogs
|
||||
__pycache__
|
||||
answers.json
|
||||
answers.jsonl
|
||||
answers_back.json
|
||||
keys.pkl
|
||||
data.pkl
|
||||
make_dataset_schedule.d
|
||||
config.json
|
||||
venv/
|
||||
.env
|
||||
temp/
|
||||
.ipynb_checkpoints
|
||||
/modules/*
|
||||
cert_cache/*
|
||||
models
|
||||
18
embeddings/Dockerfile
Normal file
18
embeddings/Dockerfile
Normal file
@@ -0,0 +1,18 @@
|
||||
# 使用官方Python运行时作为父镜像
|
||||
FROM registry.cn-hangzhou.aliyuncs.com/ripper/python:3.9-slim
|
||||
|
||||
# 设置工作目录
|
||||
WORKDIR /app
|
||||
|
||||
# 将当前目录内容复制到容器的/app中
|
||||
ADD . /app
|
||||
|
||||
RUN pip install --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
# 安装程序需要的包
|
||||
RUN pip install --no-cache-dir -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
|
||||
# 运行时监听的端口
|
||||
EXPOSE 6008
|
||||
|
||||
# 运行app.py时的命令及其参数
|
||||
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "6008"]
|
||||
55
embeddings/README.md
Normal file
55
embeddings/README.md
Normal file
@@ -0,0 +1,55 @@
|
||||
# 本地 Embeddings 模型API服务
|
||||
|
||||
下载本地 `Embedding模型` 并转为 `OpenAI` 接口格式的 API 服务。
|
||||
|
||||
## 准备工作
|
||||
- Python 3.9+
|
||||
- 选择合适的模型文件 (根据效果自行测试), 程序支持自动提升维度或降级维度到指定维度(接口中传递的 `dimensions` 参数, 默认为512)
|
||||
- 下载模型文件,放置在 `./models` 目录下, 国内下载可以去 [魔搭社区](https://www.modelscope.cn/models/BAAI/bge-m3), 速度不受影响
|
||||
|
||||
## 环境变量参数
|
||||
- `sk-key`: 服务的 `API KEY`,默认为 `sk-aaabbbcccdddeeefffggghhhiiijjjkkk`
|
||||
- `auto-dim`: 是否自动进行维度操作, 若为 `true` 则会自动提升或降级维度到512, 默认为 `false`
|
||||
- `model-name`: 模型目录名称, 默认为 `bge-m3`, 注意必须在models文件夹下有对应的模型文件夹
|
||||
|
||||
|
||||
## 运行服务
|
||||
```shell
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
```shell
|
||||
python app.py
|
||||
```
|
||||
|
||||
```bash
|
||||
curl --location --request POST 'http://127.0.0.1:6008/v1/embeddings' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--header 'Authorization: Bearer sk-aaabbbcccdddeeefffggghhhiiijjjkkk' \
|
||||
--data-raw '{
|
||||
"input": [
|
||||
"解析当前项目"
|
||||
],
|
||||
"model": "text-embedding-3-small",
|
||||
"dimensions": 512
|
||||
}'
|
||||
```
|
||||
|
||||
## Docker 运行
|
||||
|
||||
将 [docker-compose.yml](docker-compose.yml) 文件放在任意目录下, 然后执行命令:
|
||||
```bash
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
镜像内已经将模型打包好了, 所以首次执行会比较慢, 如果需要更新或新增模型, 直接将新的模型文件放在 `./models` 目录下, 更改环境变量 `model-name` 即可
|
||||
|
||||
然后重新启动服务即可:
|
||||
```bash
|
||||
docker-compose restart
|
||||
|
||||
# 如果未生效, 可以先停止再启动
|
||||
docker-compose down
|
||||
|
||||
docker-compose up -d
|
||||
```
|
||||
14
embeddings/docker-compose.yml
Normal file
14
embeddings/docker-compose.yml
Normal file
@@ -0,0 +1,14 @@
|
||||
version: '3.3'
|
||||
services:
|
||||
embeddings2openai:
|
||||
container_name: embeddings2openai
|
||||
image: registry.cn-hangzhou.aliyuncs.com/ripper/embeddings2openai:latest
|
||||
restart: always
|
||||
ports:
|
||||
- 6008:6008
|
||||
environment:
|
||||
- sk-key=sk-aaabbbcccdddeeefffggghhhiiijjjkkk
|
||||
- auto-dim=false
|
||||
- model-name=bge-m3
|
||||
# volumes:
|
||||
# - ./models:/app/models # 如果需要自己更改模型, 将模型文件放到models文件夹下, 并取消注释
|
||||
171
embeddings/main.py
Normal file
171
embeddings/main.py
Normal file
@@ -0,0 +1,171 @@
|
||||
from fastapi import FastAPI, Depends, HTTPException, status
|
||||
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from pydantic import BaseModel
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
import uvicorn
|
||||
import tiktoken
|
||||
import numpy as np
|
||||
from scipy.interpolate import interp1d
|
||||
from typing import List, Optional
|
||||
from sklearn.preprocessing import PolynomialFeatures
|
||||
from sklearn.decomposition import PCA
|
||||
import torch
|
||||
import os
|
||||
|
||||
# 接口秘钥环境变量传入
|
||||
sk_key = os.environ.get('sk-key', 'sk-aaabbbcccdddeeefffggghhhiiijjjkkk')
|
||||
# 是否自动进行维度操作的环境变量,默认为false
|
||||
auto_dim = os.environ.get('auto-dim', 'false').lower() == 'true'
|
||||
# 模型名称, 必须在models文件夹下有对应的模型文件夹
|
||||
model_name = os.environ.get('model-name', 'bge-m3')
|
||||
|
||||
# 创建一个FastAPI实例
|
||||
app = FastAPI()
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# 创建一个HTTPBearer实例
|
||||
security = HTTPBearer()
|
||||
|
||||
# 预加载模型
|
||||
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 检测是否有GPU可用,如果有则使用cuda设备,否则使用cpu设备
|
||||
if torch.cuda.is_available():
|
||||
print('本次加载模型的设备为GPU: ', torch.cuda.get_device_name(0))
|
||||
else:
|
||||
print('本次加载模型的设备为CPU.')
|
||||
|
||||
print(f'加载模型: {model_name}')
|
||||
model = SentenceTransformer(f'./models/{model_name}',device=device)
|
||||
|
||||
# 创建PCA降维模型
|
||||
pca = None
|
||||
|
||||
class EmbeddingRequest(BaseModel):
|
||||
input: List[str]
|
||||
model: str
|
||||
dimensions: Optional[int] = 512
|
||||
|
||||
class EmbeddingResponse(BaseModel):
|
||||
data: list
|
||||
model: str
|
||||
object: str
|
||||
usage: dict
|
||||
|
||||
def num_tokens_from_string(string: str) -> int:
|
||||
"""Returns the number of tokens in a text string."""
|
||||
encoding = tiktoken.get_encoding('cl100k_base')
|
||||
num_tokens = len(encoding.encode(string))
|
||||
return num_tokens
|
||||
|
||||
# 插值法
|
||||
def interpolate_vector(vector, target_length):
|
||||
original_indices = np.arange(len(vector))
|
||||
target_indices = np.linspace(0, len(vector)-1, target_length)
|
||||
f = interp1d(original_indices, vector, kind='linear')
|
||||
return f(target_indices)
|
||||
|
||||
def expand_features(embedding, target_length):
|
||||
poly = PolynomialFeatures(degree=2)
|
||||
expanded_embedding = poly.fit_transform(embedding.reshape(1, -1))
|
||||
expanded_embedding = expanded_embedding.flatten()
|
||||
if len(expanded_embedding) > target_length:
|
||||
# 如果扩展后的特征超过目标长度,可以通过截断或其他方法来减少维度
|
||||
expanded_embedding = expanded_embedding[:target_length]
|
||||
elif len(expanded_embedding) < target_length:
|
||||
# 如果扩展后的特征少于目标长度,可以通过填充或其他方法来增加维度
|
||||
expanded_embedding = np.pad(expanded_embedding, (0, target_length - len(expanded_embedding)))
|
||||
return expanded_embedding
|
||||
|
||||
# 降维方法:使用PCA将向量从1024维降到512维
|
||||
def reduce_dimensions(embeddings, target_dim=512):
|
||||
global pca
|
||||
|
||||
# 将列表转换为numpy数组
|
||||
embeddings_array = np.array(embeddings)
|
||||
|
||||
# 检查样本数量
|
||||
n_samples = embeddings_array.shape[0]
|
||||
n_features = embeddings_array.shape[1]
|
||||
|
||||
# 如果只有一个样本,无法使用PCA,改用插值法
|
||||
if n_samples == 1:
|
||||
return [interpolate_vector(embeddings_array[0], target_dim)]
|
||||
|
||||
# 确保目标维度不超过可能的最大值
|
||||
actual_target_dim = min(target_dim, n_samples, n_features)
|
||||
if actual_target_dim < target_dim:
|
||||
print(f"警告:目标维度{target_dim}超过了可能的最大值,已调整为{actual_target_dim}")
|
||||
|
||||
# 如果是第一次运行或者输入维度变化,重新初始化PCA
|
||||
if pca is None or pca.n_components != actual_target_dim:
|
||||
pca = PCA(n_components=actual_target_dim)
|
||||
# 先拟合再转换
|
||||
reduced_embeddings = pca.fit_transform(embeddings_array)
|
||||
else:
|
||||
# 直接使用已训练的PCA模型转换
|
||||
reduced_embeddings = pca.transform(embeddings_array)
|
||||
|
||||
# 如果实际降维后的维度小于目标维度,使用插值法扩展
|
||||
if actual_target_dim < target_dim:
|
||||
reduced_embeddings = [interpolate_vector(embedding, target_dim) for embedding in reduced_embeddings]
|
||||
|
||||
return list(reduced_embeddings)
|
||||
|
||||
@app.post("/v1/embeddings", response_model=EmbeddingResponse)
|
||||
async def get_embeddings(request: EmbeddingRequest, credentials: HTTPAuthorizationCredentials = Depends(security)):
|
||||
|
||||
if credentials.credentials != sk_key:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid authorization code",
|
||||
)
|
||||
|
||||
# 计算嵌入向量和tokens数量
|
||||
embeddings = [model.encode(text) for text in request.input]
|
||||
|
||||
# 检查是否需要进行维度操作
|
||||
if auto_dim:
|
||||
# 检查向量维度
|
||||
embedding_dim = len(embeddings[0])
|
||||
|
||||
# 如果维度大于512,则降维到512
|
||||
if embedding_dim > request.dimensions:
|
||||
embeddings = reduce_dimensions(embeddings, target_dim=request.dimensions)
|
||||
# 如果维度小于512,则使用插值法扩展到512
|
||||
elif embedding_dim < request.dimensions:
|
||||
embeddings = [interpolate_vector(embedding, request.dimensions) for embedding in embeddings]
|
||||
|
||||
# 归一化处理
|
||||
embeddings = [embedding / np.linalg.norm(embedding) for embedding in embeddings]
|
||||
# 将numpy数组转换为列表
|
||||
embeddings = [embedding.tolist() for embedding in embeddings]
|
||||
prompt_tokens = sum(len(text.split()) for text in request.input)
|
||||
total_tokens = sum(num_tokens_from_string(text) for text in request.input)
|
||||
|
||||
response = {
|
||||
"data": [
|
||||
{
|
||||
"embedding": embedding,
|
||||
"index": index,
|
||||
"object": "embedding"
|
||||
} for index, embedding in enumerate(embeddings)
|
||||
],
|
||||
"model": model_name,
|
||||
"object": "list",
|
||||
"usage": {
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"total_tokens": total_tokens,
|
||||
}
|
||||
}
|
||||
|
||||
return response
|
||||
|
||||
if __name__ == "__main__":
|
||||
uvicorn.run("main:app", host='0.0.0.0', port=6008, workers=1)
|
||||
11
embeddings/requirements.txt
Normal file
11
embeddings/requirements.txt
Normal file
@@ -0,0 +1,11 @@
|
||||
fastapi==0.100.0
|
||||
pydantic==1.10.7
|
||||
sentence-transformers==3.4.1
|
||||
uvicorn==0.23.1
|
||||
tiktoken==0.4.0
|
||||
numpy==2.0.2
|
||||
scipy==1.13.1
|
||||
scikit-learn==1.6.1
|
||||
torch
|
||||
torchvision
|
||||
torchaudio
|
||||
Reference in New Issue
Block a user