feat: enhance TTS request handling by refactoring methods, adding OpenAI request support, and improving text segmentation

This commit is contained in:
王锦强
2025-03-16 20:24:04 +08:00
parent 6fa5c1f467
commit 9100930d3a
5 changed files with 348 additions and 348 deletions

3
go.mod
View File

@@ -5,6 +5,7 @@ go 1.23.0
toolchain go1.24.0 toolchain go1.24.0
require ( require (
github.com/gin-gonic/gin v1.10.0
github.com/google/uuid v1.6.0 github.com/google/uuid v1.6.0
github.com/sirupsen/logrus v1.9.3 github.com/sirupsen/logrus v1.9.3
github.com/spf13/viper v1.19.0 github.com/spf13/viper v1.19.0
@@ -14,11 +15,9 @@ require (
github.com/bytedance/sonic v1.13.1 // indirect github.com/bytedance/sonic v1.13.1 // indirect
github.com/bytedance/sonic/loader v0.2.4 // indirect github.com/bytedance/sonic/loader v0.2.4 // indirect
github.com/cloudwego/base64x v0.1.5 // indirect github.com/cloudwego/base64x v0.1.5 // indirect
github.com/cloudwego/iasm v0.2.0 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/gabriel-vasile/mimetype v1.4.8 // indirect github.com/gabriel-vasile/mimetype v1.4.8 // indirect
github.com/gin-contrib/sse v1.0.0 // indirect github.com/gin-contrib/sse v1.0.0 // indirect
github.com/gin-gonic/gin v1.10.0 // indirect
github.com/go-playground/locales v0.14.1 // indirect github.com/go-playground/locales v0.14.1 // indirect
github.com/go-playground/universal-translator v0.18.1 // indirect github.com/go-playground/universal-translator v0.18.1 // indirect
github.com/go-playground/validator/v10 v10.25.0 // indirect github.com/go-playground/validator/v10 v10.25.0 // indirect

13
go.sum
View File

@@ -5,7 +5,6 @@ github.com/bytedance/sonic/loader v0.2.4 h1:ZWCw4stuXUsn1/+zQDqeE7JKP+QO47tz7QCN
github.com/bytedance/sonic/loader v0.2.4/go.mod h1:N8A3vUdtUebEY2/VQC0MyhYeKUFosQU6FxH2JmUe6VI= github.com/bytedance/sonic/loader v0.2.4/go.mod h1:N8A3vUdtUebEY2/VQC0MyhYeKUFosQU6FxH2JmUe6VI=
github.com/cloudwego/base64x v0.1.5 h1:XPciSp1xaq2VCSt6lF0phncD4koWyULpl5bUxbfCyP4= github.com/cloudwego/base64x v0.1.5 h1:XPciSp1xaq2VCSt6lF0phncD4koWyULpl5bUxbfCyP4=
github.com/cloudwego/base64x v0.1.5/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w= github.com/cloudwego/base64x v0.1.5/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY= github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -21,6 +20,8 @@ github.com/gin-contrib/sse v1.0.0 h1:y3bT1mUWUxDpW4JLQg/HnTqV4rozuW4tC9eFKTxYI9E
github.com/gin-contrib/sse v1.0.0/go.mod h1:zNuFdwarAygJBht0NTKiSi3jRf6RbqeILZ9Sp6Slhe0= github.com/gin-contrib/sse v1.0.0/go.mod h1:zNuFdwarAygJBht0NTKiSi3jRf6RbqeILZ9Sp6Slhe0=
github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU= github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y= github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
@@ -59,8 +60,6 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM=
github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M= github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M=
github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc= github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
@@ -94,8 +93,7 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8=
github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU=
@@ -117,12 +115,8 @@ golang.org/x/net v0.37.0 h1:1zLorHbz+LYj7MQlSf1+2tPIIgibq2eL5xkrGk6f+2c=
golang.org/x/net v0.37.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= golang.org/x/net v0.37.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik= golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM= google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
@@ -136,4 +130,3 @@ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50= nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=

View File

@@ -13,6 +13,7 @@ import (
"tts/internal/config" "tts/internal/config"
"tts/internal/models" "tts/internal/models"
"tts/internal/tts" "tts/internal/tts"
"tts/internal/utils"
"unicode/utf8" "unicode/utf8"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
@@ -106,151 +107,8 @@ func NewTTSHandler(service tts.Service, cfg *config.Config) *TTSHandler {
} }
} }
// HandleOpenAITTS 处理OpenAI兼容的TTS请求 // processTTSRequest 处理TTS请求的核心逻辑
func (h *TTSHandler) HandleOpenAITTS(c *gin.Context) { func (h *TTSHandler) processTTSRequest(c *gin.Context, req models.TTSRequest, startTime time.Time, parseTime time.Duration, requestType string) {
// 记录请求开始时间
startTime := time.Now()
// 只支持POST请求
if c.Request.Method != http.MethodPost {
c.AbortWithStatusJSON(http.StatusMethodNotAllowed, gin.H{"error": "仅支持POST请求"})
return
}
// 解析请求
var openaiReq struct {
Model string `json:"model"`
Input string `json:"input"`
Voice string `json:"voice"`
Speed float64 `json:"speed"`
}
if err := c.ShouldBindJSON(&openaiReq); err != nil {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "无效的JSON请求: " + err.Error()})
return
}
// 记录解析时间
parseTime := time.Since(startTime)
// 检查必需字段
if openaiReq.Input == "" {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "input字段不能为空"})
return
}
// 映射OpenAI声音到Microsoft声音
msVoice := openaiReq.Voice
if openaiReq.Voice != "" && h.config.TTS.VoiceMapping[openaiReq.Voice] != "" {
msVoice = h.config.TTS.VoiceMapping[openaiReq.Voice]
}
// 转换速度参数到微软格式
msRate := h.config.TTS.DefaultRate
if openaiReq.Speed != 0 {
// OpenAI速度转换为微软速度格式
// OpenAI: 0.5(慢速), 1.0(正常), 2.0(快速)
// 微软: "-50%"(慢), "+0%"(中), "+100%"(快)
speedPercentage := (openaiReq.Speed - 1.0) * 100
if speedPercentage >= 0 {
msRate = fmt.Sprintf("+%.0f", speedPercentage)
} else {
msRate = fmt.Sprintf("%.0f", speedPercentage)
}
}
// 创建内部TTS请求
req := models.TTSRequest{
Text: openaiReq.Input,
Voice: msVoice,
Rate: msRate,
Pitch: h.config.TTS.DefaultPitch,
Style: openaiReq.Model,
}
log.Printf("OpenAI TTS请求: model=%s, voice=%s → %s, speed=%.2f → %s, 文本长度=%d",
openaiReq.Model, openaiReq.Voice, msVoice, openaiReq.Speed, msRate, len(req.Text))
// 检查文本长度
if len(req.Text) > h.config.TTS.MaxTextLength {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "文本长度超过限制"})
return
}
// 检查是否需要分段处理
segmentThreshold := h.config.TTS.SegmentThreshold
if len(req.Text) > segmentThreshold && len(req.Text) <= h.config.TTS.MaxTextLength {
log.Printf("文本长度 %d 超过阈值 %d使用分段处理", len(req.Text), segmentThreshold)
// 使用分段处理
h.handleSegmentedTTS(c, req)
return
}
// 非流式模式处理
synthStart := time.Now()
resp, err := h.ttsService.SynthesizeSpeech(c.Request.Context(), req)
synthTime := time.Since(synthStart)
log.Printf("TTS合成耗时: %v, 文本长度: %d", synthTime, len(req.Text))
if err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "语音合成失败: " + err.Error()})
return
}
// 设置响应
c.Header("Content-Type", "audio/mpeg")
writeStart := time.Now()
c.Writer.Write(resp.AudioContent)
writeTime := time.Since(writeStart)
// 记录总耗时
totalTime := time.Since(startTime)
log.Printf("OpenAI TTS请求总耗时: %v (解析: %v, 合成: %v, 写入: %v), 音频大小: %s",
totalTime, parseTime, synthTime, writeTime, formatFileSize(len(resp.AudioContent)))
}
// HandleTTS 处理TTS请求
func (h *TTSHandler) HandleTTS(c *gin.Context) {
// 记录请求开始时间
startTime := time.Now()
// 解析请求参数
var req models.TTSRequest
switch c.Request.Method {
case http.MethodGet:
// 从URL参数获取
req = models.TTSRequest{
Text: c.Query("t"),
Voice: c.Query("v"),
Rate: c.Query("r"),
Pitch: c.Query("p"),
Style: c.Query("s"),
}
case http.MethodPost:
// 从POST JSON体获取
if c.ContentType() == "application/json" {
if err := c.ShouldBindJSON(&req); err != nil {
log.Printf("JSON解析错误: %v", err)
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "无效的JSON请求"})
return
}
} else {
// 表单数据
if err := c.ShouldBind(&req); err != nil {
log.Printf("表单解析错误: %v", err)
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "无法解析表单数据"})
return
}
}
default:
log.Printf("不支持的HTTP方法: %s", c.Request.Method)
c.AbortWithStatusJSON(http.StatusMethodNotAllowed, gin.H{"error": "仅支持GET和POST请求"})
return
}
parseTime := time.Since(startTime)
// 验证必要参数 // 验证必要参数
if req.Text == "" { if req.Text == "" {
log.Print("错误: 未提供文本参数") log.Print("错误: 未提供文本参数")
@@ -259,6 +117,51 @@ func (h *TTSHandler) HandleTTS(c *gin.Context) {
} }
// 使用默认值填充空白参数 // 使用默认值填充空白参数
h.fillDefaultValues(&req)
// 检查文本长度
reqTextLength := utf8.RuneCountInString(req.Text)
if reqTextLength > h.config.TTS.MaxTextLength {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "文本长度超过限制"})
return
}
// 检查是否需要分段处理
segmentThreshold := h.config.TTS.SegmentThreshold
if reqTextLength > segmentThreshold && reqTextLength <= h.config.TTS.MaxTextLength {
log.Printf("文本长度 %d 超过阈值 %d使用分段处理", reqTextLength, segmentThreshold)
h.handleSegmentedTTS(c, req)
return
}
synthStart := time.Now()
resp, err := h.ttsService.SynthesizeSpeech(c.Request.Context(), req)
synthTime := time.Since(synthStart)
log.Printf("TTS合成耗时: %v, 文本长度: %d", synthTime, reqTextLength)
if err != nil {
log.Printf("TTS合成失败: %v", err)
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "语音合成失败: " + err.Error()})
return
}
// 设置响应
c.Header("Content-Type", "audio/mpeg")
writeStart := time.Now()
if _, err := c.Writer.Write(resp.AudioContent); err != nil {
log.Printf("写入响应失败: %v", err)
return
}
writeTime := time.Since(writeStart)
// 记录总耗时
totalTime := time.Since(startTime)
log.Printf("%s请求总耗时: %v (解析: %v, 合成: %v, 写入: %v), 音频大小: %s",
requestType, totalTime, parseTime, synthTime, writeTime, formatFileSize(len(resp.AudioContent)))
}
// fillDefaultValues 填充默认值
func (h *TTSHandler) fillDefaultValues(req *models.TTSRequest) {
if req.Voice == "" { if req.Voice == "" {
req.Voice = h.config.TTS.DefaultVoice req.Voice = h.config.TTS.DefaultVoice
} }
@@ -268,81 +171,182 @@ func (h *TTSHandler) HandleTTS(c *gin.Context) {
if req.Pitch == "" { if req.Pitch == "" {
req.Pitch = h.config.TTS.DefaultPitch req.Pitch = h.config.TTS.DefaultPitch
} }
// 检查文本长度
if len(req.Text) > h.config.TTS.MaxTextLength {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "文本长度超过限制"})
return
}
// 检查是否需要分段处理
segmentThreshold := h.config.TTS.SegmentThreshold
if len(req.Text) > segmentThreshold && len(req.Text) <= h.config.TTS.MaxTextLength {
log.Printf("文本长度 %d 超过阈值 %d使用分段处理", len(req.Text), segmentThreshold)
// 如果文本长度超过阈值但小于最大限制,使用分段处理
h.handleSegmentedTTS(c, req)
return
}
synthStart := time.Now()
resp, err := h.ttsService.SynthesizeSpeech(c.Request.Context(), req)
synthTime := time.Since(synthStart)
log.Printf("TTS合成耗时: %v, 文本长度: %d", synthTime, len(req.Text))
if err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "语音合成失败: " + err.Error()})
return
}
// 设置响应
c.Header("Content-Type", "audio/mpeg")
writeStart := time.Now()
c.Writer.Write(resp.AudioContent)
writeTime := time.Since(writeStart)
// 记录总耗时
totalTime := time.Since(startTime)
log.Printf("TTS请求总耗时: %v (解析: %v, 合成: %v, 写入: %v), 音频大小: %s",
totalTime, parseTime, synthTime, writeTime, formatFileSize(len(resp.AudioContent)))
} }
// handleSegmentedTTS 处理长文本的分段TTS请求 // HandleTTS 处理TTS请求
func (h *TTSHandler) HandleTTS(c *gin.Context) {
switch c.Request.Method {
case http.MethodGet:
h.HandleTTSGet(c)
case http.MethodPost:
h.HandleTTSPost(c)
default:
c.AbortWithStatusJSON(http.StatusMethodNotAllowed, gin.H{"error": "仅支持GET和POST请求"})
}
}
// HandleTTSGet 处理GET方式的TTS请求
func (h *TTSHandler) HandleTTSGet(c *gin.Context) {
startTime := time.Now()
// 从URL参数获取
req := models.TTSRequest{
Text: c.Query("t"),
Voice: c.Query("v"),
Rate: c.Query("r"),
Pitch: c.Query("p"),
Style: c.Query("s"),
}
parseTime := time.Since(startTime)
h.processTTSRequest(c, req, startTime, parseTime, "TTS GET")
}
// HandleTTSPost 处理POST方式的TTS请求
func (h *TTSHandler) HandleTTSPost(c *gin.Context) {
startTime := time.Now()
// 从POST JSON体或表单数据获取
var req models.TTSRequest
var err error
if c.ContentType() == "application/json" {
err = c.ShouldBindJSON(&req)
if err != nil {
log.Printf("JSON解析错误: %v", err)
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "无效的JSON请求"})
return
}
} else {
err = c.ShouldBind(&req)
if err != nil {
log.Printf("表单解析错误: %v", err)
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "无法解析表单数据"})
return
}
}
parseTime := time.Since(startTime)
h.processTTSRequest(c, req, startTime, parseTime, "TTS POST")
}
// HandleOpenAITTS 处理OpenAI兼容的TTS请求
func (h *TTSHandler) HandleOpenAITTS(c *gin.Context) {
startTime := time.Now()
// 只支持POST请求
if c.Request.Method != http.MethodPost {
c.AbortWithStatusJSON(http.StatusMethodNotAllowed, gin.H{"error": "仅支持POST请求"})
return
}
// 解析请求
var openaiReq models.OpenAIRequest
if err := c.ShouldBindJSON(&openaiReq); err != nil {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "无效的JSON请求: " + err.Error()})
return
}
parseTime := time.Since(startTime)
// 检查必需字段
if openaiReq.Input == "" {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "input字段不能为空"})
return
}
// 创建内部TTS请求
req := h.convertOpenAIRequest(openaiReq)
log.Printf("OpenAI TTS请求: model=%s, voice=%s → %s, speed=%.2f → %s, 文本长度=%d",
openaiReq.Model, openaiReq.Voice, req.Voice, openaiReq.Speed, req.Rate, utf8.RuneCountInString(req.Text))
h.processTTSRequest(c, req, startTime, parseTime, "OpenAI TTS")
}
// convertOpenAIRequest 将OpenAI请求转换为内部请求格式
func (h *TTSHandler) convertOpenAIRequest(openaiReq models.OpenAIRequest) models.TTSRequest {
// 映射OpenAI声音到Microsoft声音
msVoice := openaiReq.Voice
if openaiReq.Voice != "" && h.config.TTS.VoiceMapping[openaiReq.Voice] != "" {
msVoice = h.config.TTS.VoiceMapping[openaiReq.Voice]
}
// 转换速度参数到微软格式
msRate := h.config.TTS.DefaultRate
if openaiReq.Speed != 0 {
speedPercentage := (openaiReq.Speed - 1.0) * 100
if speedPercentage >= 0 {
msRate = fmt.Sprintf("+%.0f", speedPercentage)
} else {
msRate = fmt.Sprintf("%.0f", speedPercentage)
}
}
return models.TTSRequest{
Text: openaiReq.Input,
Voice: msVoice,
Rate: msRate,
Pitch: h.config.TTS.DefaultPitch,
Style: openaiReq.Model,
}
}
// Add this struct to store synthesis results
type sentenceSynthesisResult struct {
index int
length int
audioSize int
content string
duration time.Duration
}
// Modify the handleSegmentedTTS function to collect and display results in a table
func (h *TTSHandler) handleSegmentedTTS(c *gin.Context, req models.TTSRequest) { func (h *TTSHandler) handleSegmentedTTS(c *gin.Context, req models.TTSRequest) {
segmentStart := time.Now() // 分段处理开始时间 segmentStart := time.Now()
text := req.Text text := req.Text
// 开始计时:分割文本 // 开始计时:分割文本
splitStart := time.Now() splitStart := time.Now()
// 按句子分段处理
sentences := splitTextBySentences(text) sentences := splitTextBySentences(text)
segmentCount := len(sentences) segmentCount := len(sentences)
splitTime := time.Since(splitStart) splitTime := time.Since(splitStart)
log.Printf("分割文本耗时: %v, 文本总长度: %d, 分段数: %d, 平均句子长度: %.2f", log.Printf("分割文本耗时: %v, 文本总长度: %d, 分段数: %d, 平均句子长度: %.2f",
splitTime, len(text), segmentCount, float64(len(text))/float64(segmentCount)) splitTime, utf8.RuneCountInString(text), segmentCount, float64(utf8.RuneCountInString(text))/float64(segmentCount))
// 创建用于存储每段音频的切片 // 创建用于存储每段音频的切片
results := make([][]byte, segmentCount) results := make([][]byte, segmentCount)
errChan := make(chan error, segmentCount) // 创建用于收集合成结果信息的切片
var wg sync.WaitGroup synthResults := make([]sentenceSynthesisResult, segmentCount)
// 限制并发数量避免创建过多goroutine errChan := make(chan error, 1)
var wg sync.WaitGroup
var synthMutex sync.Mutex
// 限制并发数量
maxConcurrent := h.config.TTS.MaxConcurrent maxConcurrent := h.config.TTS.MaxConcurrent
semaphore := make(chan struct{}, maxConcurrent) semaphore := make(chan struct{}, maxConcurrent)
// 用于记录每个分段处理的时间
segmentTimes := make([]time.Duration, segmentCount)
// 合成阶段开始时间 // 合成阶段开始时间
synthesisStart := time.Now() synthesisStart := time.Now()
// 并发处理每一个句子 // 并发处理每一个句子
for i := 0; i < segmentCount; i++ { for i := 0; i < segmentCount; i++ {
wg.Add(1) wg.Add(1)
semaphore <- struct{}{} // 获取信号量
go func(index int) { go func(index int) {
defer wg.Done() defer wg.Done()
defer func() { <-semaphore }() // 释放信号量
select {
case semaphore <- struct{}{}: // 获取信号量
defer func() { <-semaphore }() // 释放信号量
case <-c.Request.Context().Done():
select {
case errChan <- c.Request.Context().Err():
default:
}
return
}
// 创建该句的请求 // 创建该句的请求
segReq := models.TTSRequest{ segReq := models.TTSRequest{
@@ -350,193 +354,129 @@ func (h *TTSHandler) handleSegmentedTTS(c *gin.Context, req models.TTSRequest) {
Voice: req.Voice, Voice: req.Voice,
Rate: req.Rate, Rate: req.Rate,
Pitch: req.Pitch, Pitch: req.Pitch,
Style: req.Style,
} }
log.Printf("开始处理句子 #%d: 长度=%d, 内容='%s'", startTime := time.Now()
index+1,
utf8.RuneCountInString(sentences[index]),
truncateForLog(sentences[index], 20))
// 记录该段合成开始时间
segStart := time.Now()
// 合成该段音频 // 合成该段音频
resp, err := h.ttsService.SynthesizeSpeech(c.Request.Context(), segReq) resp, err := h.ttsService.SynthesizeSpeech(c.Request.Context(), segReq)
synthDuration := time.Since(startTime)
// 记录该段合成耗时
segTime := time.Since(segStart)
segmentTimes[index] = segTime
if err != nil { if err != nil {
log.Printf("句子 #%d 合成失败,耗时: %v, 错误: %v", index+1, segTime, err)
select { select {
case errChan <- fmt.Errorf("句子 %d 合成失败: %w", index+1, err): case errChan <- fmt.Errorf("句子 %d 合成失败: %w", index+1, err):
default: default:
// 已经有错误了,忽略
} }
return return
} }
log.Printf("句子 #%d 合成成功:长度=%d, 耗时=%v, 音频大小=%s", // 收集合成结果信息,而不是立即打印
index+1, utf8.RuneCountInString(sentences[index]), segTime, formatFileSize(len(resp.AudioContent))) result := sentenceSynthesisResult{
index: index,
length: utf8.RuneCountInString(sentences[index]),
audioSize: len(resp.AudioContent),
content: truncateForLog(sentences[index], 20),
duration: synthDuration,
}
// 存储该段结果 synthMutex.Lock()
synthResults[index] = result
results[index] = resp.AudioContent results[index] = resp.AudioContent
synthMutex.Unlock()
}(i) }(i)
} }
// 等待所有goroutine完成 // 等待所有goroutine完成或出错
wg.Wait() done := make(chan struct{})
close(errChan) go func() {
wg.Wait()
close(done)
}()
// 记录所有分段合成总耗时 select {
case <-done:
// 所有goroutine正常完成
case err := <-errChan:
// 发生错误
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
case <-c.Request.Context().Done():
// 请求被取消
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "请求被取消"})
return
}
// 打印表格格式的合成结果
log.Println("句子合成结果表:")
log.Println("-------------------------------------------------------------")
log.Println("序号 | 长度 | 音频大小 | 耗时 | 内容")
log.Println("-------------------------------------------------------------")
for i := 0; i < segmentCount; i++ {
result := synthResults[i]
log.Printf("#%-3d | %4d | %12s | %10v | %s",
i+1,
result.length,
formatFileSize(result.audioSize),
result.duration.Round(time.Millisecond),
result.content)
}
log.Println("-------------------------------------------------------------")
// 记录合成总耗时
synthesisTime := time.Since(synthesisStart) synthesisTime := time.Since(synthesisStart)
log.Printf("所有分段合成总耗时: %v, 平均每段耗时: %v", log.Printf("所有分段合成总耗时: %v, 平均每段耗时: %v",
synthesisTime, synthesisTime/time.Duration(segmentCount)) synthesisTime, synthesisTime/time.Duration(segmentCount))
// 检查是否有错误发生 // 合并音频
if err := <-errChan; err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "语音合成失败: " + err.Error()})
return
}
// 记录写入开始时间
writeStart := time.Now() writeStart := time.Now()
audioData, err := audioMerge(results)
var audioData []byte
var err error
audioData, err = audioMerge(results)
if err != nil { if err != nil {
log.Printf("合并音频失败: %v", err) log.Printf("合并音频失败: %v", err)
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "音频合并失败: " + err.Error()}) c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "音频合并失败: " + err.Error()})
return return
} }
// 设置响应内容类型 // 设置响应内容类型并写入数据
c.Header("Content-Type", "audio/mpeg") c.Header("Content-Type", "audio/mpeg")
if _, err := c.Writer.Write(audioData); err != nil {
// 写入合并后的音频数据 log.Printf("写入响应失败: %v", err)
totalSize := len(audioData) return
if _, writeErr := c.Writer.Write(audioData); writeErr != nil {
log.Printf("写入响应失败: %v", writeErr)
} }
// 记录写入耗时 // 记录写入耗时和总耗时
writeTime := time.Since(writeStart) writeTime := time.Since(writeStart)
// 记录总耗时
totalTime := time.Since(segmentStart) totalTime := time.Since(segmentStart)
log.Printf("分段TTS请求总耗时: %v (分割: %v, 合成: %v, 写入: %v), 总音频大小: %s", log.Printf("分段TTS请求总耗时: %v (分割: %v, 合成: %v, 写入: %v), 总音频大小: %s",
totalTime, splitTime, synthesisTime, writeTime, formatFileSize(totalSize)) totalTime, splitTime, synthesisTime, writeTime, formatFileSize(len(audioData)))
}
// sentenceEnders 定义句子结束的标点符号
var sentenceEnders = map[rune]bool{
'。': true,
'': true,
'': true,
'…': true,
'.': true,
'!': true,
'?': true,
'\n': true,
} }
// splitTextBySentences 将文本按句子分割 // splitTextBySentences 将文本按句子分割
func splitTextBySentences(text string) []string { func splitTextBySentences(text string) []string {
// 定义句子结束的标点符号
sentenceEnders := []string{"。", "", "", "…", ".", "!", "?", "…", "\n"}
// 如果文本过短,直接作为一个句子返回 // 如果文本过短,直接作为一个句子返回
if utf8.RuneCountInString(text) < 100 { if utf8.RuneCountInString(text) < 100 {
return []string{text} return []string{text}
} }
var sentences []string cfg := config.Get().TTS
var currentSentence strings.Builder maxLen := cfg.MaxSentenceLength
maxSentenceLength := config.Get().TTS.MaxSentenceLength // 设置单个句子的最大长度,避免过长句子 minLen := cfg.MinSentenceLength
runeCount := 0 // 当前句子的实际字符数量
for _, char := range text { // 第一次分割:按标点和长度限制分割
currentSentence.WriteRune(char) sentences := utils.SplitAndFilterEmptyLines(text)
runeCount++ // 第二次处理:合并过短的句子
shortSentences := utils.MergeStringsWithLimit(sentences, minLen, maxLen)
// 检查是否到达句子结束标点 log.Printf("分割后的句子数: %d → %d", len(sentences), len(shortSentences))
lastChar := string(char) return shortSentences
isSentenceEnder := false
for _, ender := range sentenceEnders {
if lastChar == ender {
isSentenceEnder = true
break
}
}
// 判断是否结束一个句子 - 使用字符数量而非字节长度
if isSentenceEnder || runeCount >= maxSentenceLength {
// 添加当前句子到结果中
sentence := currentSentence.String()
if len(sentence) > 0 {
sentences = append(sentences, sentence)
}
currentSentence.Reset() // 重置构建器
runeCount = 0 // 重置字符计数器
}
}
// 处理可能的最后一个句子
if currentSentence.Len() > 0 {
lastSentence := currentSentence.String()
sentences = append(sentences, lastSentence)
}
// 合并过短的句子
minSentenceLength := config.Get().TTS.MinSentenceLength // 设置最小句子长度阈值
if len(sentences) > 1 {
mergedSentences := []string{}
var currentMerged strings.Builder
currentMergedLength := 0
for i, sentence := range sentences {
sentenceLength := utf8.RuneCountInString(sentence)
// 如果当前句子太短,且不是最后一个,考虑合并
if sentenceLength < minSentenceLength && i < len(sentences)-1 {
// 检查合并后是否会超过最大长度
if currentMergedLength+sentenceLength > maxSentenceLength {
// 合并后会超长,先保存当前内容
if currentMerged.Len() > 0 {
mergedSentences = append(mergedSentences, currentMerged.String())
currentMerged.Reset()
currentMergedLength = 0
}
}
// 当前句子过短,添加到合并缓冲区
currentMerged.WriteString(sentence)
currentMergedLength += sentenceLength
} else {
// 句子足够长或是最后一句
if currentMerged.Len() > 0 {
// 检查合并后是否会超过最大长度
if currentMergedLength+sentenceLength <= maxSentenceLength {
// 有待合并的内容,将当前句子也合并进去
currentMerged.WriteString(sentence)
mergedSentence := currentMerged.String()
mergedSentences = append(mergedSentences, mergedSentence)
} else {
// 合并后会超长,分别添加
mergedSentence := currentMerged.String()
mergedSentences = append(mergedSentences, mergedSentence)
mergedSentences = append(mergedSentences, sentence)
}
currentMerged.Reset()
currentMergedLength = 0
} else {
// 没有待合并内容,直接添加当前句子
mergedSentences = append(mergedSentences, sentence)
}
}
}
// 处理可能剩余的合并内容
if currentMerged.Len() > 0 {
mergedSentence := currentMerged.String()
mergedSentences = append(mergedSentences, mergedSentence)
}
return mergedSentences
}
return sentences
} }

View File

@@ -15,3 +15,11 @@ type TTSResponse struct {
ContentType string `json:"content_type"` // MIME类型 ContentType string `json:"content_type"` // MIME类型
CacheHit bool `json:"cache_hit"` // 是否命中缓存 CacheHit bool `json:"cache_hit"` // 是否命中缓存
} }
// OpenAIRequest OpenAI TTS请求结构体
type OpenAIRequest struct {
Model string `json:"model"`
Input string `json:"input"`
Voice string `json:"voice"`
Speed float64 `json:"speed"`
}

View File

@@ -12,6 +12,7 @@ import (
"net/url" "net/url"
"strings" "strings"
"time" "time"
"unicode/utf8"
"github.com/google/uuid" "github.com/google/uuid"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
@@ -97,3 +98,62 @@ func Sign(urlStr string) string {
signBase64 := base64.StdEncoding.EncodeToString(secretKey) signBase64 := base64.StdEncoding.EncodeToString(secretKey)
return fmt.Sprintf("MSTranslatorAndroidApp::%s::%s::%s", signBase64, formattedDate, uuidStr) return fmt.Sprintf("MSTranslatorAndroidApp::%s::%s::%s", signBase64, formattedDate, uuidStr)
} }
// SplitAndFilterEmptyLines 拆分文本并过滤掉空行
func SplitAndFilterEmptyLines(text string) []string {
// 按换行符拆分
lines := strings.Split(text, "\n")
var result []string
for _, line := range lines {
trimmed := strings.TrimSpace(line)
if trimmed != "" {
result = append(result, trimmed)
}
}
return result
}
// MergeStringsWithLimit 会将字符串切片依次累加,直到总长度 ≥ minLen。
// 但如果再合并下一段后会超过 maxLen则提前结束本段合并放入结果。
// 然后继续新的一段合并。
func MergeStringsWithLimit(strs []string, minLen int, maxLen int) []string {
var result []string
for i := 0; i < len(strs); {
// 如果已经没有更多段落,直接退出
if i >= len(strs) {
break
}
// 从当前段开始合并
currentBuilder := strings.Builder{}
currentBuilder.WriteString(strs[i])
i++
for i < len(strs) {
currentLen := utf8.RuneCountInString(currentBuilder.String())
// 如果当前已达(或超过) minLen先行结束本段合并
if currentLen >= minLen {
break
}
// 检查添加下一个段落后是否会超过 1.2 × minLen
nextLen := utf8.RuneCountInString(strs[i])
if currentLen+nextLen > int(float64(minLen)*1.2) {
// 加上下一个会超标,则结束合并
break
}
// 如果未超标,则继续合并这个段
currentBuilder.WriteString("\n")
currentBuilder.WriteString(strs[i])
i++
}
// 本段合并结束,加入结果
result = append(result, currentBuilder.String())
}
return result
}