172 lines
6.2 KiB
Python
172 lines
6.2 KiB
Python
from fastapi import FastAPI, Depends, HTTPException, status
|
||
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
||
from sentence_transformers import SentenceTransformer
|
||
from pydantic import BaseModel
|
||
from fastapi.middleware.cors import CORSMiddleware
|
||
import uvicorn
|
||
import tiktoken
|
||
import numpy as np
|
||
from scipy.interpolate import interp1d
|
||
from typing import List, Optional
|
||
from sklearn.preprocessing import PolynomialFeatures
|
||
from sklearn.decomposition import PCA
|
||
import torch
|
||
import os
|
||
|
||
# 接口秘钥环境变量传入
|
||
sk_key = os.environ.get('sk-key', 'sk-aaabbbcccdddeeefffggghhhiiijjjkkk')
|
||
# 是否自动进行维度操作的环境变量,默认为false
|
||
auto_dim = os.environ.get('auto-dim', 'false').lower() == 'true'
|
||
# 模型名称, 必须在models文件夹下有对应的模型文件夹
|
||
model_name = os.environ.get('model-name', 'bge-m3')
|
||
|
||
# 创建一个FastAPI实例
|
||
app = FastAPI()
|
||
|
||
app.add_middleware(
|
||
CORSMiddleware,
|
||
allow_origins=["*"],
|
||
allow_credentials=True,
|
||
allow_methods=["*"],
|
||
allow_headers=["*"],
|
||
)
|
||
|
||
# 创建一个HTTPBearer实例
|
||
security = HTTPBearer()
|
||
|
||
# 预加载模型
|
||
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 检测是否有GPU可用,如果有则使用cuda设备,否则使用cpu设备
|
||
if torch.cuda.is_available():
|
||
print('本次加载模型的设备为GPU: ', torch.cuda.get_device_name(0))
|
||
else:
|
||
print('本次加载模型的设备为CPU.')
|
||
|
||
print(f'加载模型: {model_name}')
|
||
model = SentenceTransformer(f'./models/{model_name}',device=device)
|
||
|
||
# 创建PCA降维模型
|
||
pca = None
|
||
|
||
class EmbeddingRequest(BaseModel):
|
||
input: List[str]
|
||
model: str
|
||
dimensions: Optional[int] = 512
|
||
|
||
class EmbeddingResponse(BaseModel):
|
||
data: list
|
||
model: str
|
||
object: str
|
||
usage: dict
|
||
|
||
def num_tokens_from_string(string: str) -> int:
|
||
"""Returns the number of tokens in a text string."""
|
||
encoding = tiktoken.get_encoding('cl100k_base')
|
||
num_tokens = len(encoding.encode(string))
|
||
return num_tokens
|
||
|
||
# 插值法
|
||
def interpolate_vector(vector, target_length):
|
||
original_indices = np.arange(len(vector))
|
||
target_indices = np.linspace(0, len(vector)-1, target_length)
|
||
f = interp1d(original_indices, vector, kind='linear')
|
||
return f(target_indices)
|
||
|
||
def expand_features(embedding, target_length):
|
||
poly = PolynomialFeatures(degree=2)
|
||
expanded_embedding = poly.fit_transform(embedding.reshape(1, -1))
|
||
expanded_embedding = expanded_embedding.flatten()
|
||
if len(expanded_embedding) > target_length:
|
||
# 如果扩展后的特征超过目标长度,可以通过截断或其他方法来减少维度
|
||
expanded_embedding = expanded_embedding[:target_length]
|
||
elif len(expanded_embedding) < target_length:
|
||
# 如果扩展后的特征少于目标长度,可以通过填充或其他方法来增加维度
|
||
expanded_embedding = np.pad(expanded_embedding, (0, target_length - len(expanded_embedding)))
|
||
return expanded_embedding
|
||
|
||
# 降维方法:使用PCA将向量从1024维降到512维
|
||
def reduce_dimensions(embeddings, target_dim=512):
|
||
global pca
|
||
|
||
# 将列表转换为numpy数组
|
||
embeddings_array = np.array(embeddings)
|
||
|
||
# 检查样本数量
|
||
n_samples = embeddings_array.shape[0]
|
||
n_features = embeddings_array.shape[1]
|
||
|
||
# 如果只有一个样本,无法使用PCA,改用插值法
|
||
if n_samples == 1:
|
||
return [interpolate_vector(embeddings_array[0], target_dim)]
|
||
|
||
# 确保目标维度不超过可能的最大值
|
||
actual_target_dim = min(target_dim, n_samples, n_features)
|
||
if actual_target_dim < target_dim:
|
||
print(f"警告:目标维度{target_dim}超过了可能的最大值,已调整为{actual_target_dim}")
|
||
|
||
# 如果是第一次运行或者输入维度变化,重新初始化PCA
|
||
if pca is None or pca.n_components != actual_target_dim:
|
||
pca = PCA(n_components=actual_target_dim)
|
||
# 先拟合再转换
|
||
reduced_embeddings = pca.fit_transform(embeddings_array)
|
||
else:
|
||
# 直接使用已训练的PCA模型转换
|
||
reduced_embeddings = pca.transform(embeddings_array)
|
||
|
||
# 如果实际降维后的维度小于目标维度,使用插值法扩展
|
||
if actual_target_dim < target_dim:
|
||
reduced_embeddings = [interpolate_vector(embedding, target_dim) for embedding in reduced_embeddings]
|
||
|
||
return list(reduced_embeddings)
|
||
|
||
@app.post("/v1/embeddings", response_model=EmbeddingResponse)
|
||
async def get_embeddings(request: EmbeddingRequest, credentials: HTTPAuthorizationCredentials = Depends(security)):
|
||
|
||
if credentials.credentials != sk_key:
|
||
raise HTTPException(
|
||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||
detail="Invalid authorization code",
|
||
)
|
||
|
||
# 计算嵌入向量和tokens数量
|
||
embeddings = [model.encode(text) for text in request.input]
|
||
|
||
# 检查是否需要进行维度操作
|
||
if auto_dim:
|
||
# 检查向量维度
|
||
embedding_dim = len(embeddings[0])
|
||
|
||
# 如果维度大于512,则降维到512
|
||
if embedding_dim > request.dimensions:
|
||
embeddings = reduce_dimensions(embeddings, target_dim=request.dimensions)
|
||
# 如果维度小于512,则使用插值法扩展到512
|
||
elif embedding_dim < request.dimensions:
|
||
embeddings = [interpolate_vector(embedding, request.dimensions) for embedding in embeddings]
|
||
|
||
# 归一化处理
|
||
embeddings = [embedding / np.linalg.norm(embedding) for embedding in embeddings]
|
||
# 将numpy数组转换为列表
|
||
embeddings = [embedding.tolist() for embedding in embeddings]
|
||
prompt_tokens = sum(len(text.split()) for text in request.input)
|
||
total_tokens = sum(num_tokens_from_string(text) for text in request.input)
|
||
|
||
response = {
|
||
"data": [
|
||
{
|
||
"embedding": embedding,
|
||
"index": index,
|
||
"object": "embedding"
|
||
} for index, embedding in enumerate(embeddings)
|
||
],
|
||
"model": model_name,
|
||
"object": "list",
|
||
"usage": {
|
||
"prompt_tokens": prompt_tokens,
|
||
"total_tokens": total_tokens,
|
||
}
|
||
}
|
||
|
||
return response
|
||
|
||
if __name__ == "__main__":
|
||
uvicorn.run("main:app", host='0.0.0.0', port=6008, workers=1)
|