feat: 重构项目以符合 Go 规范,添加 OpenAI 接口适配,优化长文本朗读功能(切割后合并)

This commit is contained in:
王锦强
2025-03-09 13:02:28 +08:00
parent 539f6d9ef5
commit 8f2fd68ebe
31 changed files with 2487 additions and 647 deletions

27
.gitignore vendored Normal file
View File

@@ -0,0 +1,27 @@
### Go template
# If you prefer the allow list template instead of the deny list, see community template:
# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
#
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib
# Test binary, built with `go test -c`
*.test
# Output of the go coverage tool, specifically when used with LiteIDE
*.out
# Dependency directories (remove the comment below to include it)
# vendor/
# Go workspace file
go.work
go.work.sum
# env file
.env

6
.idea/git_toolbox_blame.xml generated Normal file
View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="GitToolBoxBlameSettings">
<option name="version" value="2" />
</component>
</project>

58
cmd/api/main.go Normal file
View File

@@ -0,0 +1,58 @@
package main
import (
"flag"
"log"
"os"
"path/filepath"
"tts/internal/http/server"
)
func main() {
// 解析命令行参数
configPath := flag.String("config", "", "配置文件路径")
flag.Parse()
// 如果没有指定配置文件,尝试默认位置
if *configPath == "" {
// 尝试多个位置查找配置文件
possiblePaths := []string{
"./configs/config.yaml",
"../configs/config.yaml",
"/etc/tts/config.yaml",
}
for _, path := range possiblePaths {
if _, err := os.Stat(path); err == nil {
*configPath = path
break
}
}
// 如果还是没找到,使用默认位置
if *configPath == "" {
*configPath = "./configs/config.yaml"
}
}
// 确保配置文件路径是绝对路径
absConfigPath, err := filepath.Abs(*configPath)
if err != nil {
log.Fatalf("无法获取配置文件的绝对路径: %v", err)
}
// 打印使用的配置文件路径
log.Printf("使用配置文件: %s", absConfigPath)
// 创建并启动应用
app, err := server.NewApp(absConfigPath)
if err != nil {
log.Fatalf("初始化应用失败: %v", err)
}
// 启动应用并处理错误
if err := app.Start(); err != nil {
log.Fatalf("应用运行出错: %v", err)
}
}

27
configs/config.yaml Normal file
View File

@@ -0,0 +1,27 @@
server:
port: 8080
read_timeout: 30
write_timeout: 30
base_path: ""
tts:
region: "eastasia"
default_voice: "zh-CN-XiaoxiaoNeural"
default_rate: "0"
default_pitch: "0"
default_format: "audio-24khz-48kbitrate-mono-mp3"
max_text_length: 65535
request_timeout: 30
max_concurrent: 10
segment_threshold: 300
min_sentence_length: 200
max_sentence_length: 300
# OpenAI 到微软 TTS 中文语音的映射
voice_mapping:
alloy: "zh-CN-XiaoyiNeural" # 中性女声
echo: "zh-CN-YunxiNeural" # 年轻男声
fable: "zh-CN-XiaochenNeural" # 儿童声
onyx: "zh-CN-YunjianNeural" # 成熟男声
nova: "zh-CN-XiaohanNeural" # 活力女声
shimmer: "zh-CN-XiaomoNeural" # 温柔女声

28
go.mod
View File

@@ -3,36 +3,12 @@ module tts
go 1.22
require (
github.com/gin-gonic/gin v1.10.0
github.com/google/uuid v1.6.0
github.com/sirupsen/logrus v1.9.3
gopkg.in/yaml.v3 v3.0.1
)
require (
github.com/bytedance/sonic v1.11.6 // indirect
github.com/bytedance/sonic/loader v0.1.1 // indirect
github.com/cloudwego/base64x v0.1.4 // indirect
github.com/cloudwego/iasm v0.2.0 // indirect
github.com/gabriel-vasile/mimetype v1.4.3 // indirect
github.com/gin-contrib/sse v0.1.0 // indirect
github.com/go-playground/locales v0.14.1 // indirect
github.com/go-playground/universal-translator v0.18.1 // indirect
github.com/go-playground/validator/v10 v10.20.0 // indirect
github.com/goccy/go-json v0.10.2 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/cpuid/v2 v2.2.7 // indirect
github.com/leodido/go-urn v1.4.0 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.12 // indirect
golang.org/x/arch v0.8.0 // indirect
golang.org/x/crypto v0.23.0 // indirect
golang.org/x/net v0.25.0 // indirect
github.com/stretchr/testify v1.9.0 // indirect
golang.org/x/sys v0.20.0 // indirect
golang.org/x/text v0.15.0 // indirect
google.golang.org/protobuf v1.34.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

103
go.sum
View File

@@ -1,124 +1,21 @@
github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM=
github.com/bytedance/sonic v1.10.0-rc/go.mod h1:ElCzW+ufi8qKqNW0FY314xriJhyJhuoJ3gFZdAHF7NM=
github.com/bytedance/sonic v1.11.4 h1:8+OMLSSDDm2/qJc6ld5K5Sm62NK9VHcUKk0NzBoMAM4=
github.com/bytedance/sonic v1.11.4/go.mod h1:YrWEqYtlBPS6LUA0vpuG79a1trsh4Ae41uWUWUreHhE=
github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY=
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk=
github.com/chenzhuoyu/iasm v0.9.0/go.mod h1:Xjy2NpN3h7aUqeqM+woSuuvxmIe6+DDsiNLIrkAmYog=
github.com/cloudwego/base64x v0.1.0 h1:Tg5q9tq1khq9Y9UwfoC6zkHK0FypN2GLDvhqFceOL8U=
github.com/cloudwego/base64x v0.1.0/go.mod h1:lM8nFiNbg74QgesNo6EAtv8N9tlRjBWExmHoNDa3PkU=
github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
github.com/cloudwego/iasm v0.0.9/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
github.com/cloudwego/iasm v0.1.1 h1:Py/XoYVR3xFd2pXmvmOnoS5vHTlYT9SnGK28ES8JOIk=
github.com/cloudwego/iasm v0.1.1/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg=
github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU=
github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
github.com/go-playground/validator/v10 v10.19.0 h1:ol+5Fu+cSq9JD7SoSqe04GMI92cbn0+wvQ3bZ8b/AU4=
github.com/go-playground/validator/v10 v10.19.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8=
github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/pelletier/go-toml/v2 v2.2.1 h1:9TA9+T8+8CUCO2+WYnDLCgrYi9+omqKXyjDtosvtEhg=
github.com/pelletier/go-toml/v2 v2.2.1/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM=
github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
golang.org/x/arch v0.7.0 h1:pskyeJh/3AmoQ8CPE95vxHLqp1G1GfGNXTmcl9NEKTc=
golang.org/x/arch v0.7.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30=
golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M=
golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI=
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w=
golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8=
golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o=
golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=

View File

@@ -1,119 +0,0 @@
package handlers
import (
"github.com/gin-gonic/gin"
"net/http"
"strings"
"tts/utils"
)
func GetVoiceList(c *gin.Context) {
locale := c.Query("l")
voices, err := utils.VoiceList()
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
if locale != "" {
filteredVoices := make([]interface{}, 0)
for _, voice := range voices {
if strings.Contains(voice.(map[string]interface{})["Locale"].(string), locale) {
filteredVoices = append(filteredVoices, voice)
}
}
voices = filteredVoices
}
_, detail := c.GetQuery("d")
if detail {
c.JSON(http.StatusOK, gin.H{"voices": voices})
} else {
voiceSimpleList := make([]map[string]string, 0)
for _, voice := range voices {
localName := voice.(map[string]interface{})["LocalName"].(string)
shortName := voice.(map[string]interface{})["ShortName"].(string)
voiceSimpleList = append(voiceSimpleList, map[string]string{
"LocalName": localName,
"ShortName": shortName,
})
}
c.JSON(http.StatusOK, gin.H{"voices": voiceSimpleList})
}
}
func SynthesizeVoice(c *gin.Context) {
text := c.Query("t")
voiceName := c.DefaultQuery("v", "zh-CN-XiaoxiaoMultilingualNeural")
rate := c.DefaultQuery("r", "0")
pitch := c.DefaultQuery("p", "0")
outputFormat := c.DefaultQuery("o", "audio-24khz-48kbitrate-mono-mp3")
voice, err := utils.GetVoice(text, voiceName, rate, pitch, outputFormat, c.Query("s"))
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
c.Data(http.StatusOK, "audio/mpeg", voice)
}
func Index(c *gin.Context) {
c.HTML(http.StatusOK, "index.html", gin.H{
"title": "TTS",
})
}
func ApiDoc(c *gin.Context) {
c.HTML(http.StatusOK, "api-doc.html", gin.H{
"title": "TTS",
})
}
type SynthesizeVoiceRequest struct {
Text string `json:"t"`
VoiceName string `json:"v"`
Rate string `json:"r"`
Pitch string `json:"p"`
OutputFormat string `json:"o"`
Style string `json:"s"`
}
type SynthesizeVoiceOpenAIRequest struct {
Model string `json:"model"`
Input string `json:"input"`
Voice string `json:"voice"`
}
func SynthesizeVoicePost(c *gin.Context) {
var request SynthesizeVoiceRequest
if err := c.BindJSON(&request); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
voice, err := utils.GetVoice(request.Text, request.VoiceName, request.Rate, request.Pitch, request.OutputFormat, request.Style)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
c.Data(http.StatusOK, "audio/mpeg", voice)
}
func SynthesizeVoiceOpenAI(c *gin.Context) {
var request SynthesizeVoiceOpenAIRequest
if err := c.BindJSON(&request); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
voice, err := utils.GetVoice(request.Input, request.Voice, c.Query("r"), c.Query("p"), c.Query("o"), c.Query("s"))
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
c.Data(http.StatusOK, "audio/mpeg", voice)
}

129
internal/config/config.go Normal file
View File

@@ -0,0 +1,129 @@
package config
import (
"fmt"
"os"
"sync"
"gopkg.in/yaml.v3"
)
// Config 包含应用程序的所有配置
type Config struct {
Server ServerConfig `yaml:"server"`
TTS TTSConfig `yaml:"tts"`
}
// ServerConfig 包含HTTP服务器配置
type ServerConfig struct {
Port int `yaml:"port"`
ReadTimeout int `yaml:"read_timeout"` // 单位:秒
WriteTimeout int `yaml:"write_timeout"` // 单位:秒
BasePath string `yaml:"base_path"`
}
// TTSConfig 包含Microsoft TTS API配置
type TTSConfig struct {
APIKey string `yaml:"api_key"`
Region string `yaml:"region"`
DefaultVoice string `yaml:"default_voice"`
DefaultRate string `yaml:"default_rate"`
DefaultPitch string `yaml:"default_pitch"`
DefaultFormat string `yaml:"default_format"`
MaxTextLength int `yaml:"max_text_length"`
RequestTimeout int `yaml:"request_timeout"` // 单位:秒
MaxConcurrent int `yaml:"max_concurrent"`
SegmentThreshold int `yaml:"segment_threshold"`
MinSentenceLength int `yaml:"min_sentence_length"`
MaxSentenceLength int `yaml:"max_sentence_length"`
VoiceMapping map[string]string `yaml:"voice_mapping"` // OpenAI声音到Azure声音的映射
}
var (
config Config
once sync.Once
)
// Load 从指定路径加载配置文件
func Load(configPath string) (*Config, error) {
var err error
once.Do(func() {
// 设置默认配置
setDefaults()
// 从配置文件加载
if configPath != "" {
err = loadFromFile(configPath)
if err != nil {
err = fmt.Errorf("加载配置文件失败: %w", err)
return
}
}
// 从环境变量覆盖
overrideFromEnv()
})
if err != nil {
return nil, err
}
return &config, nil
}
// 设置默认配置值
func setDefaults() {
config = Config{
Server: ServerConfig{
Port: 8080,
ReadTimeout: 30,
WriteTimeout: 30,
BasePath: "",
},
TTS: TTSConfig{
DefaultVoice: "zh-CN-XiaoxiaoNeural",
DefaultRate: "0%",
DefaultPitch: "0%",
DefaultFormat: "audio-24khz-48kbitrate-mono-mp3",
MaxTextLength: 5000,
RequestTimeout: 30,
MaxConcurrent: 10,
SegmentThreshold: 500,
MinSentenceLength: 200,
MaxSentenceLength: 300,
VoiceMapping: make(map[string]string),
},
}
}
// 从配置文件加载配置
func loadFromFile(path string) error {
data, err := os.ReadFile(path)
if err != nil {
return err
}
return yaml.Unmarshal(data, &config)
}
// 从环境变量中覆盖配置
func overrideFromEnv() {
if port := os.Getenv("TTS_SERVER_PORT"); port != "" {
fmt.Sscanf(port, "%d", &config.Server.Port)
}
if apiKey := os.Getenv("TTS_API_KEY"); apiKey != "" {
config.TTS.APIKey = apiKey
}
if region := os.Getenv("TTS_API_REGION"); region != "" {
config.TTS.Region = region
}
// 可以添加更多环境变量覆盖
}
// Get 返回已加载的配置
func Get() *Config {
return &config
}

View File

@@ -0,0 +1,76 @@
package handlers
import (
"html/template"
"net/http"
"path/filepath"
"tts/internal/config"
)
// PagesHandler 处理页面请求
type PagesHandler struct {
templates *template.Template
config *config.Config
}
// NewPagesHandler 创建一个新的页面处理器
func NewPagesHandler(templatesDir string, cfg *config.Config) (*PagesHandler, error) {
// 解析所有模板文件
templates, err := template.ParseGlob(filepath.Join(templatesDir, "*.html"))
if err != nil {
return nil, err
}
return &PagesHandler{
templates: templates,
config: cfg,
}, nil
}
// HandleIndex 处理首页请求
func (h *PagesHandler) HandleIndex(w http.ResponseWriter, r *http.Request) {
// 如果不是根路径返回404
if r.URL.Path != "/" && r.URL.Path != "/index.html" {
http.NotFound(w, r)
return
}
// 准备模板数据
data := map[string]interface{}{
"BasePath": h.config.Server.BasePath,
"DefaultVoice": h.config.TTS.DefaultVoice,
"DefaultRate": h.config.TTS.DefaultRate,
"DefaultPitch": h.config.TTS.DefaultPitch,
}
// 设置内容类型
w.Header().Set("Content-Type", "text/html; charset=utf-8")
// 渲染模板
if err := h.templates.ExecuteTemplate(w, "index.html", data); err != nil {
http.Error(w, "模板渲染失败: "+err.Error(), http.StatusInternalServerError)
return
}
}
// HandleAPIDoc 处理API文档请求
func (h *PagesHandler) HandleAPIDoc(w http.ResponseWriter, r *http.Request) {
// 准备模板数据
data := map[string]interface{}{
"BasePath": h.config.Server.BasePath,
"DefaultVoice": h.config.TTS.DefaultVoice,
"DefaultRate": h.config.TTS.DefaultRate,
"DefaultPitch": h.config.TTS.DefaultPitch,
"DefaultFormat": h.config.TTS.DefaultFormat,
}
// 设置内容类型
w.Header().Set("Content-Type", "text/html; charset=utf-8")
// 渲染模板
if err := h.templates.ExecuteTemplate(w, "api-doc.html", data); err != nil {
http.Error(w, "模板渲染失败: "+err.Error(), http.StatusInternalServerError)
return
}
}

View File

@@ -0,0 +1,553 @@
package handlers
import (
"encoding/json"
"fmt"
"log"
"net/http"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"time"
"tts/internal/config"
"tts/internal/models"
"tts/internal/tts"
"unicode/utf8"
)
// TTSHandler 处理TTS请求
type TTSHandler struct {
ttsService tts.Service
config *config.Config
}
// NewTTSHandler 创建一个新的TTS处理器
func NewTTSHandler(service tts.Service, cfg *config.Config) *TTSHandler {
return &TTSHandler{
ttsService: service,
config: cfg,
}
}
// HandleOpenAITTS 处理OpenAI兼容的TTS请求
func (h *TTSHandler) HandleOpenAITTS(w http.ResponseWriter, r *http.Request) {
// 记录请求开始时间
startTime := time.Now()
// 只支持POST请求
if r.Method != http.MethodPost {
http.Error(w, "仅支持POST请求", http.StatusMethodNotAllowed)
return
}
// 解析请求
var openaiReq struct {
Model string `json:"model"`
Input string `json:"input"`
Voice string `json:"voice"`
Speed float64 `json:"speed"`
}
if err := json.NewDecoder(r.Body).Decode(&openaiReq); err != nil {
http.Error(w, "无效的JSON请求: "+err.Error(), http.StatusBadRequest)
return
}
// 记录解析时间
parseTime := time.Since(startTime)
// 检查必需字段
if openaiReq.Input == "" {
http.Error(w, "input字段不能为空", http.StatusBadRequest)
return
}
// 映射OpenAI声音到Microsoft声音
msVoice := h.config.TTS.DefaultVoice
if openaiReq.Voice != "" {
// 检查是否有配置映射
if mappedVoice, exists := h.config.TTS.VoiceMapping[openaiReq.Voice]; exists {
msVoice = mappedVoice
}
}
// 转换速度参数到微软格式
msRate := h.config.TTS.DefaultRate
if openaiReq.Speed != 0 {
// OpenAI速度转换为微软速度格式
// OpenAI: 0.5(慢速), 1.0(正常), 2.0(快速)
// 微软: "-50%"(慢), "+0%"(中), "+100%"(快)
speedPercentage := (openaiReq.Speed - 1.0) * 100
if speedPercentage >= 0 {
msRate = fmt.Sprintf("+%.0f", speedPercentage)
} else {
msRate = fmt.Sprintf("%.0f", speedPercentage)
}
}
// 创建内部TTS请求
req := models.TTSRequest{
Text: openaiReq.Input,
Voice: msVoice,
Rate: msRate,
Pitch: h.config.TTS.DefaultPitch,
}
log.Printf("OpenAI TTS请求: model=%s, voice=%s → %s, speed=%.2f → %s, 文本长度=%d",
openaiReq.Model, openaiReq.Voice, msVoice, openaiReq.Speed, msRate, len(req.Text))
// 检查文本长度
if len(req.Text) > h.config.TTS.MaxTextLength {
http.Error(w, "文本长度超过限制", http.StatusBadRequest)
return
}
// 检查是否需要分段处理
segmentThreshold := h.config.TTS.SegmentThreshold
if len(req.Text) > segmentThreshold && len(req.Text) <= h.config.TTS.MaxTextLength {
log.Printf("文本长度 %d 超过阈值 %d使用分段处理", len(req.Text), segmentThreshold)
// 使用分段处理
h.handleSegmentedTTS(w, r, req)
return
}
// 非流式模式处理
synthStart := time.Now()
resp, err := h.ttsService.SynthesizeSpeech(r.Context(), req)
synthTime := time.Since(synthStart)
log.Printf("TTS合成耗时: %v, 文本长度: %d", synthTime, len(req.Text))
if err != nil {
http.Error(w, "语音合成失败: "+err.Error(), http.StatusInternalServerError)
return
}
// 设置响应
w.Header().Set("Content-Type", "audio/mpeg")
writeStart := time.Now()
w.Write(resp.AudioContent)
writeTime := time.Since(writeStart)
// 记录总耗时
totalTime := time.Since(startTime)
log.Printf("OpenAI TTS请求总耗时: %v (解析: %v, 合成: %v, 写入: %v), 音频大小: %s",
totalTime, parseTime, synthTime, writeTime, formatFileSize(len(resp.AudioContent)))
}
// HandleTTS 处理TTS请求
func (h *TTSHandler) HandleTTS(w http.ResponseWriter, r *http.Request) {
// 记录请求开始时间
startTime := time.Now()
// 解析请求参数
var req models.TTSRequest
switch r.Method {
case http.MethodGet:
// 从URL参数获取
q := r.URL.Query()
req = models.TTSRequest{
Text: q.Get("t"),
Voice: q.Get("v"),
Rate: q.Get("r"),
Pitch: q.Get("p"),
}
case http.MethodPost:
// 从POST JSON体获取
if r.Header.Get("Content-Type") == "application/json" {
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
log.Printf("JSON解析错误: %v", err)
http.Error(w, "无效的JSON请求", http.StatusBadRequest)
return
}
} else {
// 表单数据
if err := r.ParseForm(); err != nil {
log.Printf("表单解析错误: %v", err)
http.Error(w, "无法解析表单数据", http.StatusBadRequest)
return
}
req = models.TTSRequest{
Text: r.FormValue("text"),
Voice: r.FormValue("voice"),
Rate: r.FormValue("rate"),
Pitch: r.FormValue("pitch"),
}
}
default:
log.Printf("不支持的HTTP方法: %s", r.Method)
http.Error(w, "仅支持GET和POST请求", http.StatusMethodNotAllowed)
return
}
// 记录参数解析耗时
parseTime := time.Since(startTime)
log.Printf("请求参数解析耗时: %v", parseTime)
// 验证必要参数
if req.Text == "" {
log.Print("错误: 未提供文本参数")
http.Error(w, "必须提供文本参数", http.StatusBadRequest)
return
}
// 使用默认值填充空白参数
if req.Voice == "" {
req.Voice = h.config.TTS.DefaultVoice
}
if req.Rate == "" {
req.Rate = h.config.TTS.DefaultRate
}
if req.Pitch == "" {
req.Pitch = h.config.TTS.DefaultPitch
}
// 检查文本长度
if len(req.Text) > h.config.TTS.MaxTextLength {
http.Error(w, "文本长度超过限制", http.StatusBadRequest)
return
}
// 检查是否需要分段处理
segmentThreshold := h.config.TTS.SegmentThreshold
if len(req.Text) > segmentThreshold && len(req.Text) <= h.config.TTS.MaxTextLength {
log.Printf("文本长度 %d 超过阈值 %d使用分段处理", len(req.Text), segmentThreshold)
// 如果文本长度超过阈值但小于最大限制,使用分段处理
h.handleSegmentedTTS(w, r, req)
return
}
// 非流式模式处理(保持原有逻辑)
synthStart := time.Now()
resp, err := h.ttsService.SynthesizeSpeech(r.Context(), req)
synthTime := time.Since(synthStart)
log.Printf("TTS合成耗时: %v, 文本长度: %d", synthTime, len(req.Text))
if err != nil {
http.Error(w, "语音合成失败: "+err.Error(), http.StatusInternalServerError)
return
}
// 设置响应
w.Header().Set("Content-Type", "audio/mpeg")
writeStart := time.Now()
w.Write(resp.AudioContent)
writeTime := time.Since(writeStart)
// 记录总耗时
totalTime := time.Since(startTime)
log.Printf("TTS请求总耗时: %v (解析: %v, 合成: %v, 写入: %v), 音频大小: %s",
totalTime, parseTime, synthTime, writeTime, formatFileSize(len(resp.AudioContent)))
}
// handleSegmentedTTS 处理长文本的分段TTS请求
func (h *TTSHandler) handleSegmentedTTS(w http.ResponseWriter, r *http.Request, req models.TTSRequest) {
segmentStart := time.Now() // 分段处理开始时间
text := req.Text
// 开始计时:分割文本
splitStart := time.Now()
// 按句子分段处理
sentences := splitTextBySentences(text)
segmentCount := len(sentences)
splitTime := time.Since(splitStart)
log.Printf("分割文本耗时: %v, 文本总长度: %d, 分段数: %d, 平均句子长度: %.2f",
splitTime, len(text), segmentCount, float64(len(text))/float64(segmentCount))
// 创建用于存储每段音频的切片
results := make([][]byte, segmentCount)
errChan := make(chan error, segmentCount)
var wg sync.WaitGroup
// 限制并发数量避免创建过多goroutine
maxConcurrent := h.config.TTS.MaxConcurrent
semaphore := make(chan struct{}, maxConcurrent)
// 用于记录每个分段处理的时间
segmentTimes := make([]time.Duration, segmentCount)
// 合成阶段开始时间
synthesisStart := time.Now()
// 并发处理每一个句子
for i := 0; i < segmentCount; i++ {
wg.Add(1)
semaphore <- struct{}{} // 获取信号量
go func(index int) {
defer wg.Done()
defer func() { <-semaphore }() // 释放信号量
// 创建该句的请求
segReq := models.TTSRequest{
Text: sentences[index],
Voice: req.Voice,
Rate: req.Rate,
Pitch: req.Pitch,
}
log.Printf("开始处理句子 #%d: 长度=%d, 内容='%s'",
index+1,
utf8.RuneCountInString(sentences[index]),
truncateForLog(sentences[index], 20))
// 记录该段合成开始时间
segStart := time.Now()
// 合成该段音频
resp, err := h.ttsService.SynthesizeSpeech(r.Context(), segReq)
// 记录该段合成耗时
segTime := time.Since(segStart)
segmentTimes[index] = segTime
if err != nil {
log.Printf("句子 #%d 合成失败,耗时: %v, 错误: %v", index+1, segTime, err)
select {
case errChan <- fmt.Errorf("句子 %d 合成失败: %w", index+1, err):
default:
// 已经有错误了,忽略
}
return
}
log.Printf("句子 #%d 合成成功:长度=%d, 耗时=%v, 音频大小=%s",
index+1, utf8.RuneCountInString(sentences[index]), segTime, formatFileSize(len(resp.AudioContent)))
// 存储该段结果
results[index] = resp.AudioContent
}(i)
}
// 等待所有goroutine完成
wg.Wait()
close(errChan)
// 记录所有分段合成总耗时
synthesisTime := time.Since(synthesisStart)
log.Printf("所有分段合成总耗时: %v, 平均每段耗时: %v",
synthesisTime, synthesisTime/time.Duration(segmentCount))
// 检查是否有错误发生
if err := <-errChan; err != nil {
http.Error(w, "语音合成失败: "+err.Error(), http.StatusInternalServerError)
return
}
// 记录写入开始时间
writeStart := time.Now()
var audioData []byte
var err error
audioData, err = audioMerge(results)
if err != nil {
log.Printf("合并音频失败: %v", err)
http.Error(w, "音频合并失败: "+err.Error(), http.StatusInternalServerError)
return
}
// 设置响应内容类型
w.Header().Set("Content-Type", "audio/mpeg")
// 写入合并后的音频数据
totalSize := len(audioData)
if _, writeErr := w.Write(audioData); writeErr != nil {
log.Printf("写入响应失败: %v", writeErr)
}
// 记录写入耗时
writeTime := time.Since(writeStart)
// 记录总耗时
totalTime := time.Since(segmentStart)
log.Printf("分段TTS请求总耗时: %v (分割: %v, 合成: %v, 写入: %v), 总音频大小: %s",
totalTime, splitTime, synthesisTime, writeTime, formatFileSize(totalSize))
}
// splitTextBySentences 将文本按句子分割
func splitTextBySentences(text string) []string {
// 定义句子结束的标点符号
sentenceEnders := []string{"。", "", "", "…", ".", "!", "?", "…", "\n"}
// 如果文本过短,直接作为一个句子返回
if utf8.RuneCountInString(text) < 100 {
return []string{text}
}
var sentences []string
var currentSentence strings.Builder
maxSentenceLength := config.Get().TTS.MaxSentenceLength // 设置单个句子的最大长度,避免过长句子
runeCount := 0 // 当前句子的实际字符数量
for _, char := range text {
currentSentence.WriteRune(char)
runeCount++
// 检查是否到达句子结束标点
lastChar := string(char)
isSentenceEnder := false
for _, ender := range sentenceEnders {
if lastChar == ender {
isSentenceEnder = true
break
}
}
// 判断是否结束一个句子 - 使用字符数量而非字节长度
if isSentenceEnder || runeCount >= maxSentenceLength {
// 添加当前句子到结果中
sentence := currentSentence.String()
if len(sentence) > 0 {
sentences = append(sentences, sentence)
}
currentSentence.Reset() // 重置构建器
runeCount = 0 // 重置字符计数器
}
}
// 处理可能的最后一个句子
if currentSentence.Len() > 0 {
lastSentence := currentSentence.String()
sentences = append(sentences, lastSentence)
}
// 合并过短的句子
minSentenceLength := config.Get().TTS.MinSentenceLength // 设置最小句子长度阈值
if len(sentences) > 1 {
mergedSentences := []string{}
var currentMerged strings.Builder
currentMergedLength := 0
for i, sentence := range sentences {
sentenceLength := utf8.RuneCountInString(sentence)
// 如果当前句子太短,且不是最后一个,考虑合并
if sentenceLength < minSentenceLength && i < len(sentences)-1 {
// 检查合并后是否会超过最大长度
if currentMergedLength+sentenceLength > maxSentenceLength {
// 合并后会超长,先保存当前内容
if currentMerged.Len() > 0 {
mergedSentences = append(mergedSentences, currentMerged.String())
currentMerged.Reset()
currentMergedLength = 0
}
}
// 当前句子过短,添加到合并缓冲区
currentMerged.WriteString(sentence)
currentMergedLength += sentenceLength
} else {
// 句子足够长或是最后一句
if currentMerged.Len() > 0 {
// 检查合并后是否会超过最大长度
if currentMergedLength+sentenceLength <= maxSentenceLength {
// 有待合并的内容,将当前句子也合并进去
currentMerged.WriteString(sentence)
mergedSentence := currentMerged.String()
mergedSentences = append(mergedSentences, mergedSentence)
} else {
// 合并后会超长,分别添加
mergedSentence := currentMerged.String()
mergedSentences = append(mergedSentences, mergedSentence)
mergedSentences = append(mergedSentences, sentence)
}
currentMerged.Reset()
currentMergedLength = 0
} else {
// 没有待合并内容,直接添加当前句子
mergedSentences = append(mergedSentences, sentence)
}
}
}
// 处理可能剩余的合并内容
if currentMerged.Len() > 0 {
mergedSentence := currentMerged.String()
mergedSentences = append(mergedSentences, mergedSentence)
log.Printf("添加最后剩余的合并句子,长度=%d", utf8.RuneCountInString(mergedSentence))
}
return mergedSentences
}
return sentences
}
// truncateForLog 截断文本用于日志显示,同时显示开头和结尾
func truncateForLog(text string, maxLength int) string {
// 先去除换行符
text = strings.ReplaceAll(text, "\n", " ")
text = strings.ReplaceAll(text, "\r", " ")
runes := []rune(text)
if len(runes) <= maxLength {
return text
}
// 计算开头和结尾各显示多少字符
halfLength := maxLength / 2
return string(runes[:halfLength]) + "..." + string(runes[len(runes)-halfLength:])
}
// audioMerge 音频合并
func audioMerge(audioSegments [][]byte) ([]byte, error) {
if len(audioSegments) == 0 {
return nil, fmt.Errorf("没有音频片段可合并")
}
// 使用 ffmpeg 合并音频
tempDir, err := os.MkdirTemp("", "audio_merge_")
if err != nil {
return nil, err
}
defer os.RemoveAll(tempDir)
listFile := filepath.Join(tempDir, "concat.txt")
lf, err := os.Create(listFile)
if err != nil {
return nil, err
}
for i, seg := range audioSegments {
segFile := filepath.Join(tempDir, fmt.Sprintf("seg_%d.mp3", i))
if err := os.WriteFile(segFile, seg, 0644); err != nil {
return nil, err
}
if _, err := lf.WriteString(fmt.Sprintf("file '%s'\n", segFile)); err != nil {
return nil, err
}
}
lf.Close()
outputFile := filepath.Join(tempDir, "output.mp3")
cmd := exec.Command("ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", listFile, "-c", "copy", outputFile)
if err := cmd.Run(); err != nil {
return nil, err
}
mergedData, err := os.ReadFile(outputFile)
if err != nil {
return nil, err
}
log.Printf("使用ffmpeg合并完成总大小: %s", formatFileSize(len(mergedData)))
return mergedData, nil
}
// formatFileSize 格式化文件大小
func formatFileSize(size int) string {
switch {
case size < 1024:
return fmt.Sprintf("%d B", size)
case size < 1024*1024:
return fmt.Sprintf("%.2f KB", float64(size)/1024.0)
case size < 1024*1024*1024:
return fmt.Sprintf("%.2f MB", float64(size)/(1024.0*1024.0))
default:
return fmt.Sprintf("%.2f GB", float64(size)/(1024.0*1024.0*1024.0))
}
}

View File

@@ -0,0 +1,41 @@
package handlers
import (
"encoding/json"
"net/http"
"tts/internal/tts"
)
// VoicesHandler 处理语音列表请求
type VoicesHandler struct {
ttsService tts.Service
}
// NewVoicesHandler 创建一个新的语音列表处理器
func NewVoicesHandler(service tts.Service) *VoicesHandler {
return &VoicesHandler{
ttsService: service,
}
}
// HandleVoices 处理语音列表请求
func (h *VoicesHandler) HandleVoices(w http.ResponseWriter, r *http.Request) {
// 从查询参数中获取语言筛选
locale := r.URL.Query().Get("locale")
// 获取语音列表
voices, err := h.ttsService.ListVoices(r.Context(), locale)
if err != nil {
http.Error(w, "获取语音列表失败: "+err.Error(), http.StatusInternalServerError)
return
}
// 设置内容类型
w.Header().Set("Content-Type", "application/json")
// 编码为JSON并返回
if err := json.NewEncoder(w).Encode(voices); err != nil {
http.Error(w, "JSON编码失败", http.StatusInternalServerError)
return
}
}

View File

@@ -0,0 +1,22 @@
package middleware
import "net/http"
// CORS 处理跨域资源共享
func CORS(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// 设置CORS响应头
w.Header().Set("Access-Control-Allow-Origin", "*")
w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization")
// 如果是预检请求直接返回200
if r.Method == http.MethodOptions {
w.WriteHeader(http.StatusOK)
return
}
// 继续下一个处理器
next.ServeHTTP(w, r)
})
}

View File

@@ -0,0 +1,46 @@
package middleware
import (
"log"
"net/http"
"time"
)
// Logger 是一个HTTP中间件记录请求的详细信息
func Logger(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
start := time.Now()
// 包装ResponseWriter以捕获状态码
wrapper := &responseWriterWrapper{
ResponseWriter: w,
statusCode: http.StatusOK,
}
// 调用下一个处理器
next.ServeHTTP(wrapper, r)
// 记录请求信息
duration := time.Since(start)
log.Printf(
"[%s] %s %s %d %s",
r.Method,
r.RequestURI,
r.RemoteAddr,
wrapper.statusCode,
duration,
)
})
}
// responseWriterWrapper 包装http.ResponseWriter以捕获状态码
type responseWriterWrapper struct {
http.ResponseWriter
statusCode int
}
// WriteHeader 捕获状态码
func (w *responseWriterWrapper) WriteHeader(statusCode int) {
w.statusCode = statusCode
w.ResponseWriter.WriteHeader(statusCode)
}

View File

@@ -0,0 +1,83 @@
package server
import (
"context"
"fmt"
"log"
"os"
"os/signal"
"syscall"
"time"
"tts/internal/config"
)
// App 表示整个TTS应用程序
type App struct {
server *Server
cfg *config.Config
}
// NewApp 创建一个新的应用程序实例
func NewApp(configPath string) (*App, error) {
// 加载配置
cfg, err := config.Load(configPath)
if err != nil {
return nil, fmt.Errorf("加载配置失败: %w", err)
}
// 初始化服务
ttsService, err := InitializeServices(cfg)
if err != nil {
return nil, fmt.Errorf("初始化服务失败: %w", err)
}
// 设置路由
handler, err := SetupRoutes(cfg, ttsService)
if err != nil {
return nil, fmt.Errorf("设置路由失败: %w", err)
}
// 创建HTTP服务器
server := New(cfg, handler)
return &App{
server: server,
cfg: cfg,
}, nil
}
// Start 启动应用程序
func (a *App) Start() error {
// 创建一个错误通道
errChan := make(chan error, 1)
// 创建一个退出信号通道
quit := make(chan os.Signal, 1)
signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
// 在一个goroutine中启动服务器
go func() {
log.Printf("启动TTS服务监听端口 %d...\n", a.cfg.Server.Port)
errChan <- a.server.Start()
}()
// 等待退出信号或错误
select {
case err := <-errChan:
return err
case <-quit:
log.Println("接收到退出信号,正在优雅关闭...")
// 创建一个超时上下文用于优雅关闭
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
// 尝试优雅关闭服务器
if err := a.server.Shutdown(ctx); err != nil {
return fmt.Errorf("服务器关闭出错: %w", err)
}
log.Println("服务器已优雅关闭")
return nil
}
}

View File

@@ -0,0 +1,65 @@
package server
import (
"net/http"
"tts/internal/config"
"tts/internal/http/handlers"
"tts/internal/http/middleware"
"tts/internal/tts"
"tts/internal/tts/microsoft"
)
// SetupRoutes 配置所有API路由
func SetupRoutes(cfg *config.Config, ttsService tts.Service) (http.Handler, error) {
// 创建一个新的路由多路复用器
mux := http.NewServeMux()
// 创建处理器
ttsHandler := handlers.NewTTSHandler(ttsService, cfg)
voicesHandler := handlers.NewVoicesHandler(ttsService)
// 创建页面处理器
pagesHandler, err := handlers.NewPagesHandler("./web/templates", cfg)
if err != nil {
return nil, err
}
// 设置主页路由
mux.HandleFunc("/", pagesHandler.HandleIndex)
// 设置API文档路由
mux.HandleFunc("/api-doc", pagesHandler.HandleAPIDoc)
// 设置TTS API路由
mux.HandleFunc("/tts", ttsHandler.HandleTTS)
// 设置语音列表API路由
mux.HandleFunc("/voices", voicesHandler.HandleVoices)
mux.HandleFunc("/v1/audio/speech", ttsHandler.HandleOpenAITTS)
mux.HandleFunc("/audio/speech", ttsHandler.HandleOpenAITTS)
// 设置静态文件服务
fs := http.FileServer(http.Dir("./web/static"))
mux.Handle("/static/", http.StripPrefix("/static/", fs))
// 应用基础路径前缀
var handler http.Handler = mux
if cfg.Server.BasePath != "" {
handler = http.StripPrefix(cfg.Server.BasePath, mux)
}
// 应用中间件
handler = middleware.Logger(handler) // 日志中间件
handler = middleware.CORS(handler) // CORS中间件
return handler, nil
}
// InitializeServices 初始化所有服务
func InitializeServices(cfg *config.Config) (tts.Service, error) {
// 创建Microsoft TTS客户端
ttsClient := microsoft.NewClient(cfg)
return ttsClient, nil
}

View File

@@ -0,0 +1,45 @@
package server
import (
"context"
"fmt"
"net/http"
"time"
"tts/internal/config"
)
// Server 封装HTTP服务器
type Server struct {
server *http.Server
basePath string
}
// New 创建新的HTTP服务器
func New(cfg *config.Config, handler http.Handler) *Server {
// 创建HTTP服务器
httpServer := &http.Server{
Addr: fmt.Sprintf(":%d", cfg.Server.Port),
Handler: handler,
ReadTimeout: time.Duration(cfg.Server.ReadTimeout) * time.Second,
WriteTimeout: time.Duration(cfg.Server.WriteTimeout) * time.Second,
IdleTimeout: 120 * time.Second,
}
return &Server{
server: httpServer,
basePath: cfg.Server.BasePath,
}
}
// Start 启动HTTP服务器
func (s *Server) Start() error {
fmt.Printf("服务启动在 %s\n", s.server.Addr)
return s.server.ListenAndServe()
}
// Shutdown 优雅关闭服务器
func (s *Server) Shutdown(ctx context.Context) error {
fmt.Println("正在关闭HTTP服务器...")
return s.server.Shutdown(ctx)
}

16
internal/models/tts.go Normal file
View File

@@ -0,0 +1,16 @@
package models
// TTSRequest 表示一个语音合成请求
type TTSRequest struct {
Text string `json:"text"` // 要转换的文本
Voice string `json:"voice"` // 语音ID
Rate string `json:"rate"` // 语速 (-100% 到 +100%)
Pitch string `json:"pitch"` // 语调 (-100% 到 +100%)
}
// TTSResponse 表示一个语音合成响应
type TTSResponse struct {
AudioContent []byte `json:"audio_content"` // 音频数据
ContentType string `json:"content_type"` // MIME类型
CacheHit bool `json:"cache_hit"` // 是否命中缓存
}

14
internal/models/voice.go Normal file
View File

@@ -0,0 +1,14 @@
package models
// Voice 表示一个语音合成声音
type Voice struct {
Name string `json:"name"` // 语音唯一标识符
DisplayName string `json:"display_name"` // 语音显示名称
LocalName string `json:"local_name"` // 本地化名称
ShortName string `json:"short_name"` // 简称,例如 zh-CN-XiaoxiaoNeural
Gender string `json:"gender"` // 性别: Female, Male
Locale string `json:"locale"` // 语言区域, 如 zh-CN
LocaleName string `json:"locale_name"` // 语言区域显示名称,如 中文(中国)
StyleList []string `json:"style_list,omitempty"` // 支持的说话风格列表
SampleRateHertz string `json:"sample_rate_hertz"` // 采样率
}

View File

@@ -0,0 +1,290 @@
package microsoft
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"html"
"io"
"log"
"net/http"
"strings"
"sync"
"time"
"tts/internal/config"
"tts/internal/models"
"tts/internal/utils"
)
const (
userAgent = "okhttp/4.5.0"
voicesEndpoint = "https://%s.tts.speech.microsoft.com/cognitiveservices/voices/list"
ttsEndpoint = "https://%s.tts.speech.microsoft.com/cognitiveservices/v1"
ssmlTemplate = `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xmlns:mstts="http://www.w3.org/2001/mstts" xml:lang='%s'>
<voice name='%s'>
<mstts:express-as style="general" styledegree="1.0" role="default">
<prosody rate='%s%%' pitch='%s%%' volume="medium">
%s
</prosody>
</mstts:express-as>
</voice>
</speak>`
)
// Client 是Microsoft TTS API的客户端实现
type Client struct {
defaultVoice string
defaultRate string
defaultPitch string
defaultFormat string
maxTextLength int
httpClient *http.Client
voicesCache []models.Voice
voicesCacheMu sync.RWMutex
voicesCacheExpiry time.Time
// 端点和认证信息
endpoint map[string]interface{}
endpointMu sync.RWMutex
endpointExpiry time.Time
}
func (c *Client) HandleOpenAITTS(w http.ResponseWriter, r *http.Request) {
//TODO implement me
panic("implement me")
}
// NewClient 创建一个新的Microsoft TTS客户端
func NewClient(cfg *config.Config) *Client {
client := &Client{
defaultVoice: cfg.TTS.DefaultVoice,
defaultRate: cfg.TTS.DefaultRate,
defaultPitch: cfg.TTS.DefaultPitch,
defaultFormat: cfg.TTS.DefaultFormat,
maxTextLength: cfg.TTS.MaxTextLength,
httpClient: &http.Client{
Timeout: time.Duration(cfg.TTS.RequestTimeout) * time.Second,
},
voicesCacheExpiry: time.Time{}, // 初始时缓存为空
endpointExpiry: time.Time{}, // 初始时端点为空
}
return client
}
// getEndpoint 获取或刷新认证端点
func (c *Client) getEndpoint(ctx context.Context) (map[string]interface{}, error) {
c.endpointMu.RLock()
if !c.endpointExpiry.IsZero() && time.Now().Before(c.endpointExpiry) && c.endpoint != nil {
endpoint := c.endpoint
c.endpointMu.RUnlock()
return endpoint, nil
}
c.endpointMu.RUnlock()
// 获取新的端点信息
endpoint, err := utils.GetEndpoint()
if err != nil {
return nil, err
}
// 更新缓存
c.endpointMu.Lock()
c.endpoint = endpoint
c.endpointExpiry = time.Now().Add(45 * time.Minute) // 令牌有效期通常是1小时提前刷新
c.endpointMu.Unlock()
return endpoint, nil
}
// ListVoices 获取可用的语音列表
func (c *Client) ListVoices(ctx context.Context, locale string) ([]models.Voice, error) {
// 检查缓存是否有效
c.voicesCacheMu.RLock()
if !c.voicesCacheExpiry.IsZero() && time.Now().Before(c.voicesCacheExpiry) && len(c.voicesCache) > 0 {
voices := c.voicesCache
c.voicesCacheMu.RUnlock()
// 如果指定了locale则过滤结果
if locale != "" {
var filtered []models.Voice
for _, voice := range voices {
if strings.HasPrefix(voice.Locale, locale) {
filtered = append(filtered, voice)
}
}
return filtered, nil
}
return voices, nil
}
c.voicesCacheMu.RUnlock()
// 缓存无效需要从API获取
endpoint, err := c.getEndpoint(ctx)
if err != nil {
return nil, err
}
url := fmt.Sprintf(voicesEndpoint, endpoint["r"])
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, err
}
// 使用新的认证方式
req.Header.Set("Authorization", endpoint["t"].(string))
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("API error: %s, status: %d", string(body), resp.StatusCode)
}
var msVoices []MicrosoftVoice
if err := json.NewDecoder(resp.Body).Decode(&msVoices); err != nil {
return nil, err
}
// 转换为通用模型
voices := make([]models.Voice, len(msVoices))
for i, v := range msVoices {
voices[i] = models.Voice{
Name: v.Name,
DisplayName: v.DisplayName,
LocalName: v.LocalName,
ShortName: v.ShortName,
Gender: v.Gender,
Locale: v.Locale,
LocaleName: v.LocaleName,
StyleList: v.StyleList,
SampleRateHertz: v.SampleRateHertz, // 直接使用字符串,无需转换
}
}
// 更新缓存
c.voicesCacheMu.Lock()
c.voicesCache = voices
c.voicesCacheExpiry = time.Now().Add(1 * time.Hour) // 缓存1小时
c.voicesCacheMu.Unlock()
// 如果指定了locale则过滤结果
if locale != "" {
var filtered []models.Voice
for _, voice := range voices {
if strings.HasPrefix(voice.Locale, locale) {
filtered = append(filtered, voice)
}
}
return filtered, nil
}
return voices, nil
}
// SynthesizeSpeech 将文本转换为语音
func (c *Client) SynthesizeSpeech(ctx context.Context, req models.TTSRequest) (*models.TTSResponse, error) {
resp, err := c.createTTSRequest(ctx, req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
// 读取音频数据
audio, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
return &models.TTSResponse{
AudioContent: audio,
ContentType: "audio/mpeg",
CacheHit: false,
}, nil
}
// createTTSRequest 创建并执行TTS请求返回HTTP响应
func (c *Client) createTTSRequest(ctx context.Context, req models.TTSRequest) (*http.Response, error) {
// 参数验证
if req.Text == "" {
return nil, errors.New("文本不能为空")
}
if len(req.Text) > c.maxTextLength {
return nil, fmt.Errorf("文本长度超过限制 (%d > %d)", len(req.Text), c.maxTextLength)
}
// 使用默认值填充空白参数
voice := req.Voice
if voice == "" {
voice = c.defaultVoice
}
rate := req.Rate
if rate == "" {
rate = c.defaultRate
}
pitch := req.Pitch
if pitch == "" {
pitch = c.defaultPitch
}
// 提取语言
locale := "zh-CN" // 默认
parts := strings.Split(voice, "-")
if len(parts) >= 2 {
locale = parts[0] + "-" + parts[1]
}
// 对文本进行HTML转义防止XML解析错误
escapedText := html.EscapeString(req.Text)
// 准备SSML内容
ssml := fmt.Sprintf(ssmlTemplate, locale, voice, rate, pitch, escapedText)
// 获取端点信息
endpoint, err := c.getEndpoint(ctx)
if err != nil {
return nil, err
}
// 准备请求
url := fmt.Sprintf(ttsEndpoint, endpoint["r"])
reqBody := bytes.NewBufferString(ssml)
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, reqBody)
if err != nil {
return nil, err
}
httpReq.Header.Set("Authorization", endpoint["t"].(string))
httpReq.Header.Set("Content-Type", "application/ssml+xml")
httpReq.Header.Set("X-Microsoft-OutputFormat", c.defaultFormat)
httpReq.Header.Set("User-Agent", userAgent)
// 发送请求
resp, err := c.httpClient.Do(httpReq)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
// 获取响应体以便调试
body, _ := io.ReadAll(resp.Body)
resp.Body.Close()
log.Printf("TTS API错误: %s, 状态码: %d", string(body), resp.StatusCode)
return nil, fmt.Errorf("TTS API错误: %s, 状态码: %d", string(body), resp.StatusCode)
}
return resp, nil
}

View File

@@ -0,0 +1,45 @@
package microsoft
// MicrosoftVoice 表示Microsoft TTS服务中的一个语音
type MicrosoftVoice struct {
Name string `json:"Name"`
DisplayName string `json:"DisplayName"`
LocalName string `json:"LocalName"`
ShortName string `json:"ShortName"`
Gender string `json:"Gender"`
Locale string `json:"Locale"`
LocaleName string `json:"LocaleName"`
StyleList []string `json:"StyleList,omitempty"`
SampleRateHertz string `json:"SampleRateHertz"`
VoiceType string `json:"VoiceType"`
Status string `json:"Status"`
}
// SSMLRequest 表示发送给Microsoft TTS服务的SSML请求
type SSMLRequest struct {
XMLHeader string
Voice string
Language string
Rate string
Pitch string
Text string
}
// FormatContentTypeMap 定义音频格式到MIME类型的映射
var FormatContentTypeMap = map[string]string{
"raw-16khz-16bit-mono-pcm": "audio/pcm",
"raw-8khz-8bit-mono-mulaw": "audio/basic",
"riff-8khz-8bit-mono-alaw": "audio/alaw",
"riff-8khz-8bit-mono-mulaw": "audio/mulaw",
"riff-16khz-16bit-mono-pcm": "audio/wav",
"audio-16khz-128kbitrate-mono-mp3": "audio/mp3",
"audio-16khz-64kbitrate-mono-mp3": "audio/mp3",
"audio-16khz-32kbitrate-mono-mp3": "audio/mp3",
"raw-24khz-16bit-mono-pcm": "audio/pcm",
"riff-24khz-16bit-mono-pcm": "audio/wav",
"audio-24khz-160kbitrate-mono-mp3": "audio/mp3",
"audio-24khz-96kbitrate-mono-mp3": "audio/mp3",
"audio-24khz-48kbitrate-mono-mp3": "audio/mp3",
"ogg-24khz-16bit-mono-opus": "audio/ogg",
"webm-24khz-16bit-mono-opus": "audio/webm",
}

15
internal/tts/service.go Normal file
View File

@@ -0,0 +1,15 @@
package tts
import (
"context"
"tts/internal/models"
)
// Service 定义TTS服务接口
type Service interface {
// ListVoices 获取可用的语音列表
ListVoices(ctx context.Context, locale string) ([]models.Voice, error)
// SynthesizeSpeech 将文本转换为语音
SynthesizeSpeech(ctx context.Context, req models.TTSRequest) (*models.TTSResponse, error)
}

87
internal/utils/utils.go Normal file
View File

@@ -0,0 +1,87 @@
package utils
import (
"crypto/hmac"
"crypto/sha256"
"encoding/base64"
"encoding/json"
"fmt"
"net/http"
"net/url"
"strings"
"time"
"github.com/google/uuid"
"github.com/sirupsen/logrus"
)
var (
log = logrus.New()
client = &http.Client{}
)
const (
endpointURL = "https://dev.microsofttranslator.com/apps/endpoint?api-version=1.0"
userAgent = "okhttp/4.5.0"
clientVersion = "4.0.530a 5fe1dc6c"
userId = "0f04d16a175c411e"
homeGeographicRegion = "zh-Hans-CN"
clientTraceId = "aab069b9-70a7-4844-a734-96cd78d94be9"
voiceDecodeKey = "oik6PdDdMnOXemTbwvMn9de/h9lFnfBaCWbGMMZqqoSaQaqUOqjVGm5NqsmjcBI1x+sS9ugjB55HEJWRiFXYFw=="
)
// GetEndpoint 获取语音合成服务的端点信息
func GetEndpoint() (map[string]interface{}, error) {
signature := Sign(endpointURL)
headers := map[string]string{
"Accept-Language": "zh-Hans",
"X-ClientVersion": clientVersion,
"X-UserId": userId,
"X-HomeGeographicRegion": homeGeographicRegion,
"X-ClientTraceId": clientTraceId,
"X-MT-Signature": signature,
"User-Agent": userAgent,
"Content-Type": "application/json; charset=utf-8",
"Content-Length": "0",
"Accept-Encoding": "gzip",
}
req, err := http.NewRequest("POST", endpointURL, nil)
if err != nil {
return nil, err
}
for k, v := range headers {
req.Header.Set(k, v)
}
resp, err := client.Do(req)
if err != nil {
log.Error("failed to do request: ", err)
return nil, err
}
defer resp.Body.Close()
var result map[string]interface{}
err = json.NewDecoder(resp.Body).Decode(&result)
if err != nil {
return nil, err
}
return result, nil
}
// Sign 生成签名
func Sign(urlStr string) string {
u := strings.Split(urlStr, "://")[1]
encodedUrl := url.QueryEscape(u)
uuidStr := strings.ReplaceAll(uuid.New().String(), "-", "")
formattedDate := strings.ToLower(time.Now().UTC().Format("Mon, 02 Jan 2006 15:04:05")) + "gmt"
bytesToSign := fmt.Sprintf("MSTranslatorAndroidApp%s%s%s", encodedUrl, formattedDate, uuidStr)
bytesToSign = strings.ToLower(bytesToSign)
decode, _ := base64.StdEncoding.DecodeString(voiceDecodeKey)
hash := hmac.New(sha256.New, decode)
hash.Write([]byte(bytesToSign))
secretKey := hash.Sum(nil)
signBase64 := base64.StdEncoding.EncodeToString(secretKey)
return fmt.Sprintf("MSTranslatorAndroidApp::%s::%s::%s", signBase64, formattedDate, uuidStr)
}

View File

@@ -1,23 +0,0 @@
package routes
import (
"tts/handlers"
"github.com/gin-gonic/gin"
)
func SetupRouter() *gin.Engine {
router := gin.Default()
// 加载模板文件
router.LoadHTMLGlob("templates/*")
router.GET("/voices", handlers.GetVoiceList)
router.POST("/tts", handlers.SynthesizeVoicePost)
router.GET("/tts", handlers.SynthesizeVoice)
router.GET("/v1/audio/speech", handlers.SynthesizeVoiceOpenAI)
router.GET("/", handlers.Index)
router.GET("/doc", handlers.ApiDoc)
return router
}

View File

@@ -1,37 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>TTS</title>
</head>
<body>
<h1> 支持接口 </h1>
<h2>语音合成</h2>
<div>
<strong>/tts</strong> | GET / POST(json)
<a target="_blank" href="/tts?t=岂曰无衣?与子同袍。王于兴师,修我戈矛,与子同仇!岂曰无衣?与子同泽。王于兴师,修我矛戟,与子偕作!岂曰无衣?与子同裳。王于兴师,修我甲兵,与子偕行!&v=zh-CN-XiaoxiaoMultilingualNeural&r=0&p=0&o=audio-24khz-48kbitrate-mono-mp3">try</a>
</div>
<pre>
参数列表:
1. t: 文本内容 (必填)
2. v: 语音名称 (可选), 默认为 zh-CN-XiaoxiaoMultilingualNeural
3. r: 语速 (可选), 默认为 0
4. p: 语调 (可选), 默认为 0
5. o: 输出格式 (可选), 默认为audio-24khz-48kbitrate-mono-mp3
</pre>
<h2>声音列表</h2>
<div>
<strong>/voices</strong> | GET <a target="_blank" href="/voices?l=zh">try</a>
</div>
<pre>
参数列表:
1. l: 语言区域 (可选), 使用 contains 匹配,如 l=zh
2. d: 显示详细信息 (可选) , 默认为 false, 如需显示详细信息, 请添加参数d , 如 /voices?d
</pre>
</body>
</html>

View File

@@ -1,121 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>TTS Demo</title>
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script>
<script src="https://cdn.tailwindcss.com"></script>
<style>
.top-right {
position: absolute;
top: 20px;
right: 20px;
}
</style>
</head>
<body class="bg-gradient-to-r from-blue-100 to-purple-100 min-h-screen flex items-center justify-center p-4">
<div class="top-right">
<a href="/doc" class="hover:underline p-2 rounded">Documentation</a>
</div>
<div class="bg-white p-8 rounded-xl shadow-lg w-full max-w-4xl">
<h1 class="text-4xl font-bold mb-8 text-center text-gray-800">语音合成演示</h1>
<div id="ttsForm" class="space-y-6">
<textarea id="textInput" rows="6" class="w-full p-4 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 text-gray-700 text-lg resize-none" placeholder="请输入要合成的文本">欢迎使用我们的语音合成演示系统。这项技术能够将文字转换成自然流畅的语音。您可以尝试调整语速和语调,体验不同的合成效果。我们提供多种语言和声音选项,满足您的各种需求。无论是阅读文章、语言学习,还是辅助视障人士,语音合成技术都能发挥重要作用。希望这个演示能让您感受到科技的魅力。祝您使用愉快!</textarea>
<div class="grid grid-cols-2 gap-4">
<div>
<label for="localeSelect" class="block text-sm font-medium text-gray-700 mb-1">语言</label>
<select id="localeSelect" class="w-full p-3 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 text-gray-700">
<option value="zh-CN">中文 (中国)</option>
<option value="en-US">English (US)</option>
<option value="ja-JP">日本語 (日本)</option>
</select>
</div>
<div>
<label for="voiceSelect" class="block text-sm font-medium text-gray-700 mb-1">声音</label>
<select id="voiceSelect" class="w-full p-3 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 text-gray-700"></select>
</div>
<div>
<label for="styleSelect" class="block text-sm font-medium text-gray-700 mb-1">风格</label>
<select id="styleSelect" class="w-full p-3 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 text-gray-700"></select>
</div>
</div>
<div class="flex space-x-4">
<div class="w-1/2 space-y-2">
<label for="rateInput" class="block text-sm font-medium text-gray-700">语速</label>
<input type="range" id="rateInput" min="-100" max="100" value="0" class="w-full h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer">
</div>
<div class="w-1/2 space-y-2">
<label for="pitchInput" class="block text-sm font-medium text-gray-700">语调</label>
<input type="range" id="pitchInput" min="-100" max="100" value="0" class="w-full h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer">
</div>
</div>
<button id="synthesizeButton" class="w-full bg-gradient-to-r from-blue-500 to-purple-600 text-white py-3 px-6 rounded-lg hover:from-blue-600 hover:to-purple-700 focus:outline-none focus:ring-2 focus:ring-purple-500 focus:ring-opacity-50 transition duration-300 text-lg font-semibold shadow-md">合成并播放</button>
</div>
<audio id="audioPlayer" controls class="w-full mt-6 hidden"></audio>
</div>
<script>
$(document).ready(function() {
let globalVoices = []
function updateVoices(locale) {
$('#voiceSelect').empty();
$.get('/voices?d&l=' + locale, function(voices) {
globalVoices = voices.voices
globalVoices.forEach(function(voice) {
$('#voiceSelect').append($('<option>', {
value: voice.ShortName,
text: voice.LocalName + ' (' + voice.ShortName + ')'
}));
});
updateStyles($('#voiceSelect').val());
});
}
function updateStyles(voice) {
const currentVoice = globalVoices.filter(v => v.ShortName === voice)[0]
if (currentVoice) {
$('#styleSelect').empty()
currentVoice?.StyleList?.forEach(function(style) {
$('#styleSelect').append($('<option>', {
value: style,
text: style
}));
});
}
}
updateVoices($('#localeSelect').val());
$('#localeSelect').change(function() {
updateVoices($(this).val());
});
$('#voiceSelect').change(function() {
updateStyles($(this).val());
});
$('#synthesizeButton').click(function() {
var text = $('#textInput').val();
var voice = $('#voiceSelect').val();
var rate = $('#rateInput').val();
var pitch = $('#pitchInput').val();
var locale = $('#localeSelect').val();
var style = $('#styleSelect').val();
var url = `/tts?t=${encodeURIComponent(text)}&v=${encodeURIComponent(voice)}&r=${rate}&p=${pitch}&l=${locale}&s=${style}`;
$('#audioPlayer').attr('src', url).removeClass('hidden')[0].play();
});
});
</script>
</body>
</html>

View File

@@ -1,218 +0,0 @@
package utils
import (
"bytes"
"crypto/hmac"
"crypto/sha256"
"encoding/base64"
"encoding/json"
"fmt"
"github.com/google/uuid"
"github.com/sirupsen/logrus"
"html"
"io"
"net/http"
"net/url"
"strings"
"time"
)
var (
log = logrus.New()
client = &http.Client{}
voiceListCache []interface{}
cacheDuration = 1 * time.Hour // 缓存持续时间
)
func init() {
ticker := time.NewTicker(cacheDuration)
go func() {
for range ticker.C {
voiceListCache = nil
}
}()
}
const (
endpointURL = "https://dev.microsofttranslator.com/apps/endpoint?api-version=1.0"
voicesListURL = "https://eastus.api.speech.microsoft.com/cognitiveservices/voices/list"
userAgent = "okhttp/4.5.0"
clientVersion = "4.0.530a 5fe1dc6c"
userId = "0f04d16a175c411e"
homeGeographicRegion = "zh-Hans-CN"
clientTraceId = "aab069b9-70a7-4844-a734-96cd78d94be9"
voiceDecodeKey = "oik6PdDdMnOXemTbwvMn9de/h9lFnfBaCWbGMMZqqoSaQaqUOqjVGm5NqsmjcBI1x+sS9ugjB55HEJWRiFXYFw=="
defaultVoiceName = "zh-CN-XiaoxiaoMultilingualNeural"
defaultRate = "0"
defaultPitch = "0"
defaultOutputFormat = "audio-24khz-48kbitrate-mono-mp3"
defaultStyle = "general"
)
// GetEndpoint 获取语音合成服务的端点信息
func GetEndpoint() (map[string]interface{}, error) {
signature := Sign(endpointURL)
headers := map[string]string{
"Accept-Language": "zh-Hans",
"X-ClientVersion": clientVersion,
"X-UserId": userId,
"X-HomeGeographicRegion": homeGeographicRegion,
"X-ClientTraceId": clientTraceId,
"X-MT-Signature": signature,
"User-Agent": userAgent,
"Content-Type": "application/json; charset=utf-8",
"Content-Length": "0",
"Accept-Encoding": "gzip",
}
req, err := http.NewRequest("POST", endpointURL, nil)
if err != nil {
return nil, err
}
for k, v := range headers {
req.Header.Set(k, v)
}
resp, err := client.Do(req)
if err != nil {
log.Error("failed to do request: ", err)
return nil, err
}
defer resp.Body.Close()
var result map[string]interface{}
err = json.NewDecoder(resp.Body).Decode(&result)
if err != nil {
return nil, err
}
return result, nil
}
// Sign 生成签名
func Sign(urlStr string) string {
u := strings.Split(urlStr, "://")[1]
encodedUrl := url.QueryEscape(u)
uuidStr := strings.ReplaceAll(uuid.New().String(), "-", "")
formattedDate := strings.ToLower(time.Now().UTC().Format("Mon, 02 Jan 2006 15:04:05")) + "gmt"
bytesToSign := fmt.Sprintf("MSTranslatorAndroidApp%s%s%s", encodedUrl, formattedDate, uuidStr)
bytesToSign = strings.ToLower(bytesToSign)
decode, _ := base64.StdEncoding.DecodeString(voiceDecodeKey)
hash := hmac.New(sha256.New, decode)
hash.Write([]byte(bytesToSign))
secretKey := hash.Sum(nil)
signBase64 := base64.StdEncoding.EncodeToString(secretKey)
return fmt.Sprintf("MSTranslatorAndroidApp::%s::%s::%s", signBase64, formattedDate, uuidStr)
}
// GetVoice 获取语音合成结果
func GetVoice(text, voiceName, rate, pitch, outputFormat, style string) ([]byte, error) {
if voiceName == "" {
voiceName = defaultVoiceName
}
if rate == "" {
rate = defaultRate
}
if pitch == "" {
pitch = defaultPitch
}
if outputFormat == "" {
outputFormat = defaultOutputFormat
}
if style == "" {
style = defaultStyle
}
endpoint, err := GetEndpoint()
if err != nil {
return nil, err
}
u := fmt.Sprintf("https://%s.tts.speech.microsoft.com/cognitiveservices/v1", endpoint["r"])
headers := map[string]string{
"Authorization": endpoint["t"].(string),
"Content-Type": "application/ssml+xml",
"X-Microsoft-OutputFormat": outputFormat,
}
ssml := GetSsml(text, voiceName, rate, pitch, style)
req, err := http.NewRequest("POST", u, bytes.NewBufferString(ssml))
if err != nil {
return nil, err
}
for k, v := range headers {
req.Header.Set(k, v)
}
resp, err := client.Do(req)
if err != nil {
log.Error("failed to do request: ", err)
return nil, err
}
defer resp.Body.Close()
return io.ReadAll(resp.Body)
}
// GetSsml 生成 SSML 格式的文本
func GetSsml(text, voiceName, rate, pitch, style string) string {
// 对文本进行转义
text = html.EscapeString(text)
return fmt.Sprintf(`
<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" version="1.0" xml:lang="zh-CN">
<voice name="%s">
<mstts:express-as style="%s" styledegree="1.0" role="default">
<prosody rate="%s%%" pitch="%s%%" volume="medium">
%s
</prosody>
</mstts:express-as>
</voice>
</speak>
`, voiceName, style, rate, pitch, text)
}
// VoiceList 获取可用的语音列表
func VoiceList() ([]interface{}, error) {
// 如果缓存中有值,直接返回缓存的结果
if voiceListCache != nil {
return voiceListCache, nil
}
headers := map[string]string{
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.26",
"X-Ms-Useragent": "SpeechStudio/2021.05.001",
"Content-Type": "application/json",
"Origin": "https://azure.microsoft.com",
"Referer": "https://azure.microsoft.com",
}
req, err := http.NewRequest("GET", voicesListURL, nil)
if err != nil {
return nil, err
}
for k, v := range headers {
req.Header.Set(k, v)
}
resp, err := client.Do(req)
if err != nil {
log.Error("failed to do request: ", err)
return nil, err
}
defer resp.Body.Close()
var result []interface{}
err = json.NewDecoder(resp.Body).Decode(&result)
if err != nil {
return nil, err
}
// 将结果存储到缓存中
voiceListCache = result
return result, nil
}

271
web/static/css/style.css Normal file
View File

@@ -0,0 +1,271 @@
/* 基本样式重置 */
* {
box-sizing: border-box;
margin: 0;
padding: 0;
}
body {
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
line-height: 1.6;
color: #333;
background-color: #f5f7fa;
padding: 20px;
}
/* 容器 */
.container {
max-width: 1000px;
margin: 0 auto;
}
/* 页眉 */
header {
text-align: center;
margin-bottom: 30px;
padding: 20px;
}
header h1 {
font-size: 2.5rem;
margin-bottom: 10px;
color: #2c3e50;
}
header p {
font-size: 1.2rem;
color: #7f8c8d;
margin-bottom: 20px;
}
/* 导航 */
nav {
display: flex;
justify-content: center;
margin-top: 20px;
}
nav a {
text-decoration: none;
color: #3498db;
margin: 0 15px;
padding: 5px 10px;
border-radius: 5px;
transition: all 0.3s ease;
}
nav a:hover {
background-color: #3498db;
color: #fff;
}
nav a.active {
background-color: #3498db;
color: #fff;
}
/* 卡片 */
.card {
background-color: #fff;
border-radius: 10px;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
padding: 25px;
margin-bottom: 25px;
}
/* 标题 */
h2 {
color: #2c3e50;
margin-bottom: 20px;
border-bottom: 1px solid #ecf0f1;
padding-bottom: 10px;
}
h3 {
color: #3498db;
margin: 20px 0 10px;
}
/* 输入区域 */
.input-group {
position: relative;
margin-bottom: 20px;
}
textarea {
width: 100%;
padding: 15px;
border: 1px solid #ddd;
border-radius: 5px;
resize: none;
font-size: 1rem;
font-family: inherit;
}
textarea:focus {
outline: none;
border-color: #3498db;
box-shadow: 0 0 0 2px rgba(52, 152, 219, 0.2);
}
.char-counter {
position: absolute;
bottom: 10px;
right: 10px;
font-size: 0.8rem;
color: #7f8c8d;
}
/* 设置区域 */
.settings {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
gap: 20px;
margin-bottom: 20px;
}
.setting-group {
display: flex;
flex-direction: column;
}
label {
margin-bottom: 5px;
font-weight: bold;
color: #2c3e50;
}
select, input[type="range"] {
padding: 8px;
border: 1px solid #ddd;
border-radius: 5px;
background-color: #fff;
}
select:focus {
outline: none;
border-color: #3498db;
}
/* 按钮 */
.actions {
display: flex;
justify-content: center;
margin-top: 20px;
}
button {
padding: 10px 20px;
border: none;
border-radius: 5px;
cursor: pointer;
font-size: 1rem;
transition: all 0.3s ease;
}
.primary-button {
background-color: #3498db;
color: #fff;
}
.primary-button:hover {
background-color: #2980b9;
}
.secondary-button {
background-color: #ecf0f1;
color: #2c3e50;
margin: 0 5px;
}
.secondary-button:hover {
background-color: #bdc3c7;
}
/* 音频播放器 */
.audio-player {
display: flex;
flex-direction: column;
align-items: center;
}
audio {
width: 100%;
margin-bottom: 15px;
}
.audio-controls {
display: flex;
justify-content: center;
}
/* 表格 */
table {
width: 100%;
border-collapse: collapse;
margin: 20px 0;
}
th, td {
padding: 12px 15px;
text-align: left;
border-bottom: 1px solid #ddd;
}
th {
background-color: #f8f9fa;
font-weight: bold;
}
/* 代码 */
code, pre {
font-family: SFMono-Regular, Menlo, Monaco, Consolas, monospace;
background-color: #f8f9fa;
border-radius: 3px;
padding: 2px 5px;
font-size: 0.9rem;
}
pre {
padding: 15px;
overflow-x: auto;
margin: 15px 0;
}
pre code {
padding: 0;
background-color: transparent;
}
/* 页脚 */
footer {
text-align: center;
margin-top: 40px;
padding: 20px;
color: #7f8c8d;
font-size: 0.9rem;
}
footer a {
color: #3498db;
text-decoration: none;
}
footer a:hover {
text-decoration: underline;
}
/* 响应式调整 */
@media (max-width: 768px) {
.settings {
grid-template-columns: 1fr;
}
header h1 {
font-size: 2rem;
}
.card {
padding: 15px;
}
}

176
web/static/js/app.js Normal file
View File

@@ -0,0 +1,176 @@
document.addEventListener('DOMContentLoaded', function() {
// 获取DOM元素
const textInput = document.getElementById('text');
const voiceSelect = document.getElementById('voice');
const rateInput = document.getElementById('rate');
const rateValue = document.getElementById('rateValue');
const pitchInput = document.getElementById('pitch');
const pitchValue = document.getElementById('pitchValue');
const speakButton = document.getElementById('speak');
const downloadButton = document.getElementById('download');
const copyLinkButton = document.getElementById('copyLink');
const audioPlayer = document.getElementById('audioPlayer');
const resultSection = document.getElementById('resultSection');
const charCount = document.getElementById('charCount');
// 保存最后一个音频URL
let lastAudioUrl = '';
// 初始化
initVoicesList();
initEventListeners();
// 更新字符计数
textInput.addEventListener('input', function() {
charCount.textContent = this.value.length;
});
// 更新语速值显示
rateInput.addEventListener('input', function() {
const value = this.value;
rateValue.textContent = value + '%';
});
// 更新语调值显示
pitchInput.addEventListener('input', function() {
const value = this.value;
pitchValue.textContent = value + '%';
});
// 获取可用语音列表
async function initVoicesList() {
try {
const response = await fetch(`${config.basePath}/voices`);
if (!response.ok) throw new Error('获取语音列表失败');
const voices = await response.json();
// 清空并重建选项
voiceSelect.innerHTML = '';
// 按语言和名称分组
const voicesByLocale = {};
voices.forEach(voice => {
if (!voicesByLocale[voice.locale]) {
voicesByLocale[voice.locale] = [];
}
voicesByLocale[voice.locale].push(voice);
});
// 创建选项组
for (const locale in voicesByLocale) {
const optgroup = document.createElement('optgroup');
optgroup.label = voicesByLocale[locale][0].locale_name;
voicesByLocale[locale].forEach(voice => {
const option = document.createElement('option');
option.value = voice.short_name;
option.textContent = `${voice.local_name || voice.display_name} (${voice.gender})`;
// 如果是默认语音则选中
if (voice.short_name === config.defaultVoice) {
option.selected = true;
}
optgroup.appendChild(option);
});
voiceSelect.appendChild(optgroup);
}
} catch (error) {
console.error('获取语音列表失败:', error);
voiceSelect.innerHTML = '<option value="">无法加载语音列表</option>';
}
}
// 初始化事件监听器
function initEventListeners() {
// 转换按钮点击事件
speakButton.addEventListener('click', generateSpeech);
// 下载按钮点击事件
downloadButton.addEventListener('click', function() {
if (lastAudioUrl) {
const a = document.createElement('a');
a.href = lastAudioUrl;
a.download = 'speech.mp3';
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
}
});
// 复制链接按钮点击事件
copyLinkButton.addEventListener('click', function() {
if (lastAudioUrl) {
navigator.clipboard.writeText(lastAudioUrl).then(() => {
alert('链接已复制到剪贴板');
}).catch(err => {
console.error('复制失败:', err);
// 兼容处理
const textArea = document.createElement('textarea');
textArea.value = lastAudioUrl;
document.body.appendChild(textArea);
textArea.focus();
textArea.select();
try {
document.execCommand('copy');
alert('链接已复制到剪贴板');
} catch (err) {
console.error('复制失败:', err);
}
document.body.removeChild(textArea);
});
}
});
}
// 生成语音
async function generateSpeech() {
const text = textInput.value.trim();
if (!text) {
alert('请输入要转换的文本');
return;
}
const voice = voiceSelect.value;
const rate = rateInput.value;
const pitch = pitchInput.value;
// 禁用按钮,显示加载状态
speakButton.disabled = true;
speakButton.textContent = '生成中...';
try {
// 构建URL参数
const params = new URLSearchParams({
t: text,
v: voice,
r: rate,
p: pitch
});
const url = `${config.basePath}/tts?${params.toString()}`;
// 更新音频播放器
audioPlayer.src = url;
lastAudioUrl = url;
// 显示结果区域
resultSection.style.display = 'block';
// 播放音频
audioPlayer.play();
} catch (error) {
console.error('生成语音失败:', error);
alert('生成语音失败,请重试');
} finally {
// 恢复按钮状态
speakButton.disabled = false;
speakButton.textContent = '转换为语音';
}
}
});

310
web/templates/api-doc.html Normal file
View File

@@ -0,0 +1,310 @@
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>API文档 - TTS服务</title>
<link rel="stylesheet" href="{{.BasePath}}/static/css/style.css">
<meta name="description" content="TTS服务API文档">
</head>
<body>
<div class="container">
<header>
<h1>TTS服务 API文档</h1>
<p>快速、高质量的文本转语音API服务</p>
<nav>
<a href="{{.BasePath}}/">主页</a>
<a href="{{.BasePath}}/api-doc" class="active">API文档</a>
</nav>
</header>
<main>
<section class="card">
<h2>API概述</h2>
<p>TTS服务API提供了简单而强大的方式将文本转换为自然语音。我们支持多种语言和声音并允许您调节语速、语调以适应不同场景需求。</p>
<p>基础URL: <code>{{.BasePath}}</code></p>
<p>所有API请求均使用HTTP协议返回标准HTTP状态码表示请求结果。</p>
</section>
<section class="card">
<h2>文本转语音 API</h2>
<h3>端点</h3>
<code>GET {{.BasePath}}/tts</code>
<h3>参数</h3>
<table>
<thead>
<tr>
<th>参数</th>
<th>类型</th>
<th>必选</th>
<th>描述</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>t</code></td>
<td>string</td>
<td></td>
<td>要转换的文本需要进行URL编码</td>
</tr>
<tr>
<td><code>v</code></td>
<td>string</td>
<td></td>
<td>语音名称使用short_name格式默认: {{.DefaultVoice}}。可通过/voices接口获取所有可用语音</td>
</tr>
<tr>
<td><code>r</code></td>
<td>string</td>
<td></td>
<td>语速调整,范围: -100%到100%,默认: {{.DefaultRate}}。正值加快语速,负值减慢语速</td>
</tr>
<tr>
<td><code>p</code></td>
<td>string</td>
<td></td>
<td>语调调整,范围: -100%到100%,默认: {{.DefaultPitch}}。正值提高语调,负值降低语调</td>
</tr>
<tr>
<td><code>o</code></td>
<td>string</td>
<td></td>
<td>输出音频格式,默认: {{.DefaultFormat}}。详见下方支持的格式列表</td>
</tr>
<tr>
<td><code>s</code></td>
<td>string</td>
<td></td>
<td>情感风格可用值取决于所选语音的style_list属性。例如"cheerful"、"sad"等</td>
</tr>
</tbody>
</table>
<h3>示例请求</h3>
<pre><code>curl "{{.BasePath}}/tts?t=%E4%BD%A0%E5%A5%BD%EF%BC%8C%E4%B8%96%E7%95%8C&v=zh-CN-XiaoxiaoNeural&r=0%25&p=0%25"</code></pre>
<h3>另一个示例(带情感风格)</h3>
<pre><code>curl "{{.BasePath}}/tts?t=%E4%BB%8A%E5%A4%A9%E5%A4%A9%E6%B0%94%E7%9C%9F%E5%A5%BD&v=zh-CN-XiaoxiaoNeural&s=cheerful"</code></pre>
<h3>响应</h3>
<p>返回音频文件内容类型取决于请求的输出格式。正常响应状态码为200。</p>
<h3>错误响应</h3>
<p>如果请求参数有误或服务出现问题将返回对应的HTTP错误码和错误消息。</p>
<table>
<thead>
<tr>
<th>状态码</th>
<th>描述</th>
</tr>
</thead>
<tbody>
<tr>
<td>400</td>
<td>参数错误或缺失必要参数</td>
</tr>
<tr>
<td>404</td>
<td>请求的资源不存在</td>
</tr>
<tr>
<td>500</td>
<td>服务器内部错误</td>
</tr>
</tbody>
</table>
</section>
<section class="card">
<h2>获取可用语音 API</h2>
<h3>端点</h3>
<code>GET {{.BasePath}}/voices</code>
<h3>参数</h3>
<table>
<thead>
<tr>
<th>参数</th>
<th>类型</th>
<th>必选</th>
<th>描述</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>locale</code></td>
<td>string</td>
<td></td>
<td>筛选特定语言的语音例如zh-CN中文、en-US英文</td>
</tr>
<tr>
<td><code>gender</code></td>
<td>string</td>
<td></td>
<td>筛选特定性别的语音可选值Male男性、Female女性</td>
</tr>
</tbody>
</table>
<h3>示例请求</h3>
<pre><code>curl "{{.BasePath}}/voices?locale=zh-CN&gender=Female"</code></pre>
<h3>响应</h3>
<p>返回JSON格式的可用语音列表</p>
<pre><code>[
{
"name": "Microsoft Server Speech Text to Speech Voice (zh-CN, XiaoxiaoNeural)",
"display_name": "Xiaoxiao",
"local_name": "晓晓",
"short_name": "zh-CN-XiaoxiaoNeural",
"gender": "Female",
"locale": "zh-CN",
"locale_name": "中文(中国)",
"style_list": ["cheerful", "sad", "angry", "fearful", "disgruntled"]
},
...
]</code></pre>
<p>响应字段说明:</p>
<ul>
<li><strong>name</strong>:语音的完整名称</li>
<li><strong>display_name</strong>:显示用名称(拉丁字符)</li>
<li><strong>local_name</strong>:本地化名称</li>
<li><strong>short_name</strong>简短名称用于API调用的v参数</li>
<li><strong>gender</strong>性别Male或Female</li>
<li><strong>locale</strong>:语言代码</li>
<li><strong>locale_name</strong>:语言本地化名称</li>
<li><strong>style_list</strong>:支持的情感风格列表(如有)</li>
</ul>
</section>
<section class="card">
<h2>兼容OpenAI接口 API</h2>
<h3>语音合成</h3>
<code>POST {{.BasePath}}/v1/audio/speech</code>
<h3>请求体 (JSON)</h3>
<table>
<thead>
<tr>
<th>参数</th>
<th>类型</th>
<th>必选</th>
<th>描述</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>model</code></td>
<td>string</td>
<td></td>
<td>当前仅支持值: "tts-1"</td>
</tr>
<tr>
<td><code>input</code></td>
<td>string</td>
<td></td>
<td>要转换的文本内容</td>
</tr>
<tr>
<td><code>voice</code></td>
<td>string</td>
<td></td>
<td>声音名称使用Microsoft语音格式例如ja-JP-KeitaNeural、zh-CN-XiaoxiaoNeural</td>
</tr>
<tr>
<td><code>speed</code></td>
<td>number</td>
<td></td>
<td>语速调整,范围: 0.5到2.0,默认: 1.0</td>
</tr>
</tbody>
</table>
<h3>示例请求</h3>
<pre><code>curl -X POST "{{.BasePath}}/v1/audio/speech" \
-H "Content-Type: application/json" \
-d '{
"model": "tts-1",
"input": "你好,世界!",
"voice": "zh-CN-XiaoxiaoNeural"
}'</code></pre>
<h3>另一个示例(带速度调整)</h3>
<pre><code>curl -X POST "{{.BasePath}}/v1/audio/speech" \
-H "Content-Type: application/json" \
-d '{
"model": "tts-1",
"input": "こんにちは、世界!",
"voice": "ja-JP-NanamiNeural",
"speed": 1.2
}'</code></pre>
<h3>响应</h3>
<p>返回音频文件内容类型取决于请求的输出格式。正常响应状态码为200。</p>
<h3>错误响应</h3>
<p>如果请求有误将返回JSON格式的错误信息</p>
<pre><code>{
"error": {
"message": "错误信息描述",
"type": "错误类型",
"code": "错误代码"
}
}</code></pre>
</section>
<section class="card">
<h2>支持的输出格式</h2>
<table>
<thead>
<tr>
<th>格式名称</th>
<th>描述</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>audio-16khz-32kbitrate-mono-mp3</code></td>
<td>MP3格式16kHz, 32kbps</td>
</tr>
<tr>
<td><code>audio-16khz-64kbitrate-mono-mp3</code></td>
<td>MP3格式16kHz, 64kbps</td>
</tr>
<tr>
<td><code>audio-16khz-128kbitrate-mono-mp3</code></td>
<td>MP3格式16kHz, 128kbps</td>
</tr>
<tr>
<td><code>audio-24khz-48kbitrate-mono-mp3</code></td>
<td>MP3格式24kHz, 48kbps</td>
</tr>
<tr>
<td><code>audio-24khz-96kbitrate-mono-mp3</code></td>
<td>MP3格式24kHz, 96kbps</td>
</tr>
<tr>
<td><code>audio-24khz-160kbitrate-mono-mp3</code></td>
<td>MP3格式24kHz, 160kbps</td>
</tr>
<tr>
<td><code>riff-16khz-16bit-mono-pcm</code></td>
<td>WAV格式16kHz</td>
</tr>
<tr>
<td><code>riff-24khz-16bit-mono-pcm</code></td>
<td>WAV格式24kHz</td>
</tr>
</tbody>
</table>
</section>
</main>
<footer>
<p>© 2025 TTS服务 | <a href="{{.BasePath}}/">返回主页</a></p>
</footer>
</div>
</body>
</html>

83
web/templates/index.html Normal file
View File

@@ -0,0 +1,83 @@
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>文本转语音 - TTS服务</title>
<link rel="stylesheet" href="{{.BasePath}}/static/css/style.css">
<meta name="description" content="基于Microsoft Azure语音服务的在线文本转语音工具">
</head>
<body>
<div class="container">
<header>
<h1>文本转语音 (TTS)</h1>
<p>将文本转换为自然流畅的语音</p>
<nav>
<a href="{{.BasePath}}/" class="active">主页</a>
<a href="{{.BasePath}}/api-doc">API文档</a>
</nav>
</header>
<main>
<section class="card">
<h2>输入文本</h2>
<div class="input-group">
<textarea id="text" placeholder="输入要转换的文本..." rows="6" maxlength="5000"></textarea>
<div class="char-counter"><span id="charCount">0</span>/5000</div>
</div>
<div class="settings">
<div class="setting-group">
<label for="voice">语音:</label>
<select id="voice">
<option value="loading">加载中...</option>
</select>
</div>
<div class="setting-group">
<label for="rate">语速:</label>
<input type="range" id="rate" min="-50" max="50" value="0">
<span id="rateValue">0%</span>
</div>
<div class="setting-group">
<label for="pitch">语调:</label>
<input type="range" id="pitch" min="-50" max="50" value="0">
<span id="pitchValue">0%</span>
</div>
</div>
<div class="actions">
<button id="speak" class="primary-button">转换为语音</button>
</div>
</section>
<section class="card" id="resultSection" style="display:none;">
<h2>语音输出</h2>
<div class="audio-player">
<audio id="audioPlayer" controls></audio>
<div class="audio-controls">
<button id="download" class="secondary-button">下载音频</button>
<button id="copyLink" class="secondary-button">复制链接</button>
</div>
</div>
</section>
</main>
<footer>
<p>© 2025 TTS服务 | <a href="{{.BasePath}}/api-doc">API文档</a></p>
</footer>
</div>
<script>
// 存储一些全局配置
const config = {
basePath: "{{.BasePath}}",
defaultVoice: "{{.DefaultVoice}}",
defaultRate: "{{.DefaultRate}}",
defaultPitch: "{{.DefaultPitch}}"
};
</script>
<script src="{{.BasePath}}/static/js/app.js"></script>
</body>
</html>