diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..f597e4b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,27 @@
+### Go template
+# If you prefer the allow list template instead of the deny list, see community template:
+# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
+#
+# Binaries for programs and plugins
+*.exe
+*.exe~
+*.dll
+*.so
+*.dylib
+
+# Test binary, built with `go test -c`
+*.test
+
+# Output of the go coverage tool, specifically when used with LiteIDE
+*.out
+
+# Dependency directories (remove the comment below to include it)
+# vendor/
+
+# Go workspace file
+go.work
+go.work.sum
+
+# env file
+.env
+
diff --git a/.idea/git_toolbox_blame.xml b/.idea/git_toolbox_blame.xml
new file mode 100644
index 0000000..7dc1249
--- /dev/null
+++ b/.idea/git_toolbox_blame.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/cmd/api/main.go b/cmd/api/main.go
new file mode 100644
index 0000000..7887c1a
--- /dev/null
+++ b/cmd/api/main.go
@@ -0,0 +1,58 @@
+package main
+
+import (
+ "flag"
+ "log"
+ "os"
+ "path/filepath"
+
+ "tts/internal/http/server"
+)
+
+func main() {
+ // 解析命令行参数
+ configPath := flag.String("config", "", "配置文件路径")
+ flag.Parse()
+
+ // 如果没有指定配置文件,尝试默认位置
+ if *configPath == "" {
+ // 尝试多个位置查找配置文件
+ possiblePaths := []string{
+ "./configs/config.yaml",
+ "../configs/config.yaml",
+ "/etc/tts/config.yaml",
+ }
+
+ for _, path := range possiblePaths {
+ if _, err := os.Stat(path); err == nil {
+ *configPath = path
+ break
+ }
+ }
+
+ // 如果还是没找到,使用默认位置
+ if *configPath == "" {
+ *configPath = "./configs/config.yaml"
+ }
+ }
+
+ // 确保配置文件路径是绝对路径
+ absConfigPath, err := filepath.Abs(*configPath)
+ if err != nil {
+ log.Fatalf("无法获取配置文件的绝对路径: %v", err)
+ }
+
+ // 打印使用的配置文件路径
+ log.Printf("使用配置文件: %s", absConfigPath)
+
+ // 创建并启动应用
+ app, err := server.NewApp(absConfigPath)
+ if err != nil {
+ log.Fatalf("初始化应用失败: %v", err)
+ }
+
+ // 启动应用并处理错误
+ if err := app.Start(); err != nil {
+ log.Fatalf("应用运行出错: %v", err)
+ }
+}
diff --git a/configs/config.yaml b/configs/config.yaml
new file mode 100644
index 0000000..b60501e
--- /dev/null
+++ b/configs/config.yaml
@@ -0,0 +1,27 @@
+server:
+ port: 8080
+ read_timeout: 30
+ write_timeout: 30
+ base_path: ""
+
+tts:
+ region: "eastasia"
+ default_voice: "zh-CN-XiaoxiaoNeural"
+ default_rate: "0"
+ default_pitch: "0"
+ default_format: "audio-24khz-48kbitrate-mono-mp3"
+ max_text_length: 65535
+ request_timeout: 30
+ max_concurrent: 10
+ segment_threshold: 300
+ min_sentence_length: 200
+ max_sentence_length: 300
+
+ # OpenAI 到微软 TTS 中文语音的映射
+ voice_mapping:
+ alloy: "zh-CN-XiaoyiNeural" # 中性女声
+ echo: "zh-CN-YunxiNeural" # 年轻男声
+ fable: "zh-CN-XiaochenNeural" # 儿童声
+ onyx: "zh-CN-YunjianNeural" # 成熟男声
+ nova: "zh-CN-XiaohanNeural" # 活力女声
+ shimmer: "zh-CN-XiaomoNeural" # 温柔女声
diff --git a/go.mod b/go.mod
index d52b110..a78185a 100644
--- a/go.mod
+++ b/go.mod
@@ -3,36 +3,12 @@ module tts
go 1.22
require (
- github.com/gin-gonic/gin v1.10.0
github.com/google/uuid v1.6.0
github.com/sirupsen/logrus v1.9.3
+ gopkg.in/yaml.v3 v3.0.1
)
require (
- github.com/bytedance/sonic v1.11.6 // indirect
- github.com/bytedance/sonic/loader v0.1.1 // indirect
- github.com/cloudwego/base64x v0.1.4 // indirect
- github.com/cloudwego/iasm v0.2.0 // indirect
- github.com/gabriel-vasile/mimetype v1.4.3 // indirect
- github.com/gin-contrib/sse v0.1.0 // indirect
- github.com/go-playground/locales v0.14.1 // indirect
- github.com/go-playground/universal-translator v0.18.1 // indirect
- github.com/go-playground/validator/v10 v10.20.0 // indirect
- github.com/goccy/go-json v0.10.2 // indirect
- github.com/json-iterator/go v1.1.12 // indirect
- github.com/klauspost/cpuid/v2 v2.2.7 // indirect
- github.com/leodido/go-urn v1.4.0 // indirect
- github.com/mattn/go-isatty v0.0.20 // indirect
- github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
- github.com/modern-go/reflect2 v1.0.2 // indirect
- github.com/pelletier/go-toml/v2 v2.2.2 // indirect
- github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
- github.com/ugorji/go/codec v1.2.12 // indirect
- golang.org/x/arch v0.8.0 // indirect
- golang.org/x/crypto v0.23.0 // indirect
- golang.org/x/net v0.25.0 // indirect
+ github.com/stretchr/testify v1.9.0 // indirect
golang.org/x/sys v0.20.0 // indirect
- golang.org/x/text v0.15.0 // indirect
- google.golang.org/protobuf v1.34.1 // indirect
- gopkg.in/yaml.v3 v3.0.1 // indirect
)
diff --git a/go.sum b/go.sum
index 8ba6d3d..57c0620 100644
--- a/go.sum
+++ b/go.sum
@@ -1,124 +1,21 @@
-github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM=
-github.com/bytedance/sonic v1.10.0-rc/go.mod h1:ElCzW+ufi8qKqNW0FY314xriJhyJhuoJ3gFZdAHF7NM=
-github.com/bytedance/sonic v1.11.4 h1:8+OMLSSDDm2/qJc6ld5K5Sm62NK9VHcUKk0NzBoMAM4=
-github.com/bytedance/sonic v1.11.4/go.mod h1:YrWEqYtlBPS6LUA0vpuG79a1trsh4Ae41uWUWUreHhE=
-github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
-github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
-github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
-github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
-github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY=
-github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk=
-github.com/chenzhuoyu/iasm v0.9.0/go.mod h1:Xjy2NpN3h7aUqeqM+woSuuvxmIe6+DDsiNLIrkAmYog=
-github.com/cloudwego/base64x v0.1.0 h1:Tg5q9tq1khq9Y9UwfoC6zkHK0FypN2GLDvhqFceOL8U=
-github.com/cloudwego/base64x v0.1.0/go.mod h1:lM8nFiNbg74QgesNo6EAtv8N9tlRjBWExmHoNDa3PkU=
-github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
-github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
-github.com/cloudwego/iasm v0.0.9/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
-github.com/cloudwego/iasm v0.1.1 h1:Py/XoYVR3xFd2pXmvmOnoS5vHTlYT9SnGK28ES8JOIk=
-github.com/cloudwego/iasm v0.1.1/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
-github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
-github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
-github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
-github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
-github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
-github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg=
-github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU=
-github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
-github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
-github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
-github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
-github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
-github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
-github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
-github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
-github.com/go-playground/validator/v10 v10.19.0 h1:ol+5Fu+cSq9JD7SoSqe04GMI92cbn0+wvQ3bZ8b/AU4=
-github.com/go-playground/validator/v10 v10.19.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
-github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8=
-github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
-github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
-github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
-github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
-github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
-github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
-github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
-github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
-github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
-github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
-github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
-github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
-github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
-github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
-github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
-github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
-github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
-github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
-github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
-github.com/pelletier/go-toml/v2 v2.2.1 h1:9TA9+T8+8CUCO2+WYnDLCgrYi9+omqKXyjDtosvtEhg=
-github.com/pelletier/go-toml/v2 v2.2.1/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
-github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM=
-github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
-github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
-github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
-github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
-github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
-github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
-github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
-github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
-github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
-github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
-github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
-golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
-golang.org/x/arch v0.7.0 h1:pskyeJh/3AmoQ8CPE95vxHLqp1G1GfGNXTmcl9NEKTc=
-golang.org/x/arch v0.7.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
-golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
-golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
-golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30=
-golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M=
-golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI=
-golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
-golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w=
-golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8=
-golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
-golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o=
-golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
-golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
-golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
-golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
-golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
-golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
-google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
-google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
-google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
-nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
-rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
diff --git a/handlers/handlers.go b/handlers/handlers.go
deleted file mode 100644
index 8feabf9..0000000
--- a/handlers/handlers.go
+++ /dev/null
@@ -1,119 +0,0 @@
-package handlers
-
-import (
- "github.com/gin-gonic/gin"
- "net/http"
- "strings"
- "tts/utils"
-)
-
-func GetVoiceList(c *gin.Context) {
- locale := c.Query("l")
- voices, err := utils.VoiceList()
- if err != nil {
- c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
- return
- }
-
- if locale != "" {
- filteredVoices := make([]interface{}, 0)
- for _, voice := range voices {
- if strings.Contains(voice.(map[string]interface{})["Locale"].(string), locale) {
- filteredVoices = append(filteredVoices, voice)
- }
- }
- voices = filteredVoices
- }
-
- _, detail := c.GetQuery("d")
- if detail {
- c.JSON(http.StatusOK, gin.H{"voices": voices})
- } else {
- voiceSimpleList := make([]map[string]string, 0)
- for _, voice := range voices {
- localName := voice.(map[string]interface{})["LocalName"].(string)
- shortName := voice.(map[string]interface{})["ShortName"].(string)
- voiceSimpleList = append(voiceSimpleList, map[string]string{
- "LocalName": localName,
- "ShortName": shortName,
- })
- }
- c.JSON(http.StatusOK, gin.H{"voices": voiceSimpleList})
- }
-
-}
-
-func SynthesizeVoice(c *gin.Context) {
- text := c.Query("t")
- voiceName := c.DefaultQuery("v", "zh-CN-XiaoxiaoMultilingualNeural")
- rate := c.DefaultQuery("r", "0")
- pitch := c.DefaultQuery("p", "0")
- outputFormat := c.DefaultQuery("o", "audio-24khz-48kbitrate-mono-mp3")
-
- voice, err := utils.GetVoice(text, voiceName, rate, pitch, outputFormat, c.Query("s"))
- if err != nil {
- c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
- return
- }
-
- c.Data(http.StatusOK, "audio/mpeg", voice)
-}
-
-func Index(c *gin.Context) {
- c.HTML(http.StatusOK, "index.html", gin.H{
- "title": "TTS",
- })
-}
-
-func ApiDoc(c *gin.Context) {
- c.HTML(http.StatusOK, "api-doc.html", gin.H{
- "title": "TTS",
- })
-}
-
-type SynthesizeVoiceRequest struct {
- Text string `json:"t"`
- VoiceName string `json:"v"`
- Rate string `json:"r"`
- Pitch string `json:"p"`
- OutputFormat string `json:"o"`
- Style string `json:"s"`
-}
-
-type SynthesizeVoiceOpenAIRequest struct {
- Model string `json:"model"`
- Input string `json:"input"`
- Voice string `json:"voice"`
-}
-
-func SynthesizeVoicePost(c *gin.Context) {
- var request SynthesizeVoiceRequest
- if err := c.BindJSON(&request); err != nil {
- c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
- return
- }
-
- voice, err := utils.GetVoice(request.Text, request.VoiceName, request.Rate, request.Pitch, request.OutputFormat, request.Style)
- if err != nil {
- c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
- return
- }
-
- c.Data(http.StatusOK, "audio/mpeg", voice)
-}
-
-func SynthesizeVoiceOpenAI(c *gin.Context) {
- var request SynthesizeVoiceOpenAIRequest
- if err := c.BindJSON(&request); err != nil {
- c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
- return
- }
-
- voice, err := utils.GetVoice(request.Input, request.Voice, c.Query("r"), c.Query("p"), c.Query("o"), c.Query("s"))
-
- if err != nil {
- c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
- return
- }
- c.Data(http.StatusOK, "audio/mpeg", voice)
-}
diff --git a/internal/config/config.go b/internal/config/config.go
new file mode 100644
index 0000000..d062940
--- /dev/null
+++ b/internal/config/config.go
@@ -0,0 +1,129 @@
+package config
+
+import (
+ "fmt"
+ "os"
+ "sync"
+
+ "gopkg.in/yaml.v3"
+)
+
+// Config 包含应用程序的所有配置
+type Config struct {
+ Server ServerConfig `yaml:"server"`
+ TTS TTSConfig `yaml:"tts"`
+}
+
+// ServerConfig 包含HTTP服务器配置
+type ServerConfig struct {
+ Port int `yaml:"port"`
+ ReadTimeout int `yaml:"read_timeout"` // 单位:秒
+ WriteTimeout int `yaml:"write_timeout"` // 单位:秒
+ BasePath string `yaml:"base_path"`
+}
+
+// TTSConfig 包含Microsoft TTS API配置
+type TTSConfig struct {
+ APIKey string `yaml:"api_key"`
+ Region string `yaml:"region"`
+ DefaultVoice string `yaml:"default_voice"`
+ DefaultRate string `yaml:"default_rate"`
+ DefaultPitch string `yaml:"default_pitch"`
+ DefaultFormat string `yaml:"default_format"`
+ MaxTextLength int `yaml:"max_text_length"`
+ RequestTimeout int `yaml:"request_timeout"` // 单位:秒
+ MaxConcurrent int `yaml:"max_concurrent"`
+ SegmentThreshold int `yaml:"segment_threshold"`
+ MinSentenceLength int `yaml:"min_sentence_length"`
+ MaxSentenceLength int `yaml:"max_sentence_length"`
+ VoiceMapping map[string]string `yaml:"voice_mapping"` // OpenAI声音到Azure声音的映射
+}
+
+var (
+ config Config
+ once sync.Once
+)
+
+// Load 从指定路径加载配置文件
+func Load(configPath string) (*Config, error) {
+ var err error
+ once.Do(func() {
+ // 设置默认配置
+ setDefaults()
+
+ // 从配置文件加载
+ if configPath != "" {
+ err = loadFromFile(configPath)
+ if err != nil {
+ err = fmt.Errorf("加载配置文件失败: %w", err)
+ return
+ }
+ }
+
+ // 从环境变量覆盖
+ overrideFromEnv()
+ })
+
+ if err != nil {
+ return nil, err
+ }
+
+ return &config, nil
+}
+
+// 设置默认配置值
+func setDefaults() {
+ config = Config{
+ Server: ServerConfig{
+ Port: 8080,
+ ReadTimeout: 30,
+ WriteTimeout: 30,
+ BasePath: "",
+ },
+ TTS: TTSConfig{
+ DefaultVoice: "zh-CN-XiaoxiaoNeural",
+ DefaultRate: "0%",
+ DefaultPitch: "0%",
+ DefaultFormat: "audio-24khz-48kbitrate-mono-mp3",
+ MaxTextLength: 5000,
+ RequestTimeout: 30,
+ MaxConcurrent: 10,
+ SegmentThreshold: 500,
+ MinSentenceLength: 200,
+ MaxSentenceLength: 300,
+ VoiceMapping: make(map[string]string),
+ },
+ }
+}
+
+// 从配置文件加载配置
+func loadFromFile(path string) error {
+ data, err := os.ReadFile(path)
+ if err != nil {
+ return err
+ }
+
+ return yaml.Unmarshal(data, &config)
+}
+
+// 从环境变量中覆盖配置
+func overrideFromEnv() {
+ if port := os.Getenv("TTS_SERVER_PORT"); port != "" {
+ fmt.Sscanf(port, "%d", &config.Server.Port)
+ }
+
+ if apiKey := os.Getenv("TTS_API_KEY"); apiKey != "" {
+ config.TTS.APIKey = apiKey
+ }
+
+ if region := os.Getenv("TTS_API_REGION"); region != "" {
+ config.TTS.Region = region
+ }
+
+ // 可以添加更多环境变量覆盖
+}
+
+// Get 返回已加载的配置
+func Get() *Config {
+ return &config
+}
diff --git a/internal/http/handlers/pages.go b/internal/http/handlers/pages.go
new file mode 100644
index 0000000..71f4898
--- /dev/null
+++ b/internal/http/handlers/pages.go
@@ -0,0 +1,76 @@
+package handlers
+
+import (
+ "html/template"
+ "net/http"
+ "path/filepath"
+
+ "tts/internal/config"
+)
+
+// PagesHandler 处理页面请求
+type PagesHandler struct {
+ templates *template.Template
+ config *config.Config
+}
+
+// NewPagesHandler 创建一个新的页面处理器
+func NewPagesHandler(templatesDir string, cfg *config.Config) (*PagesHandler, error) {
+ // 解析所有模板文件
+ templates, err := template.ParseGlob(filepath.Join(templatesDir, "*.html"))
+ if err != nil {
+ return nil, err
+ }
+
+ return &PagesHandler{
+ templates: templates,
+ config: cfg,
+ }, nil
+}
+
+// HandleIndex 处理首页请求
+func (h *PagesHandler) HandleIndex(w http.ResponseWriter, r *http.Request) {
+ // 如果不是根路径,返回404
+ if r.URL.Path != "/" && r.URL.Path != "/index.html" {
+ http.NotFound(w, r)
+ return
+ }
+
+ // 准备模板数据
+ data := map[string]interface{}{
+ "BasePath": h.config.Server.BasePath,
+ "DefaultVoice": h.config.TTS.DefaultVoice,
+ "DefaultRate": h.config.TTS.DefaultRate,
+ "DefaultPitch": h.config.TTS.DefaultPitch,
+ }
+
+ // 设置内容类型
+ w.Header().Set("Content-Type", "text/html; charset=utf-8")
+
+ // 渲染模板
+ if err := h.templates.ExecuteTemplate(w, "index.html", data); err != nil {
+ http.Error(w, "模板渲染失败: "+err.Error(), http.StatusInternalServerError)
+ return
+ }
+}
+
+// HandleAPIDoc 处理API文档请求
+func (h *PagesHandler) HandleAPIDoc(w http.ResponseWriter, r *http.Request) {
+ // 准备模板数据
+ data := map[string]interface{}{
+ "BasePath": h.config.Server.BasePath,
+ "DefaultVoice": h.config.TTS.DefaultVoice,
+ "DefaultRate": h.config.TTS.DefaultRate,
+ "DefaultPitch": h.config.TTS.DefaultPitch,
+ "DefaultFormat": h.config.TTS.DefaultFormat,
+ }
+
+ // 设置内容类型
+ w.Header().Set("Content-Type", "text/html; charset=utf-8")
+
+ // 渲染模板
+ if err := h.templates.ExecuteTemplate(w, "api-doc.html", data); err != nil {
+ http.Error(w, "模板渲染失败: "+err.Error(), http.StatusInternalServerError)
+ return
+ }
+}
diff --git a/internal/http/handlers/tts.go b/internal/http/handlers/tts.go
new file mode 100644
index 0000000..8cf45b9
--- /dev/null
+++ b/internal/http/handlers/tts.go
@@ -0,0 +1,553 @@
+package handlers
+
+import (
+ "encoding/json"
+ "fmt"
+ "log"
+ "net/http"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strings"
+ "sync"
+ "time"
+ "tts/internal/config"
+ "tts/internal/models"
+ "tts/internal/tts"
+ "unicode/utf8"
+)
+
+// TTSHandler 处理TTS请求
+type TTSHandler struct {
+ ttsService tts.Service
+ config *config.Config
+}
+
+// NewTTSHandler 创建一个新的TTS处理器
+func NewTTSHandler(service tts.Service, cfg *config.Config) *TTSHandler {
+ return &TTSHandler{
+ ttsService: service,
+ config: cfg,
+ }
+}
+
+// HandleOpenAITTS 处理OpenAI兼容的TTS请求
+func (h *TTSHandler) HandleOpenAITTS(w http.ResponseWriter, r *http.Request) {
+ // 记录请求开始时间
+ startTime := time.Now()
+
+ // 只支持POST请求
+ if r.Method != http.MethodPost {
+ http.Error(w, "仅支持POST请求", http.StatusMethodNotAllowed)
+ return
+ }
+
+ // 解析请求
+ var openaiReq struct {
+ Model string `json:"model"`
+ Input string `json:"input"`
+ Voice string `json:"voice"`
+ Speed float64 `json:"speed"`
+ }
+
+ if err := json.NewDecoder(r.Body).Decode(&openaiReq); err != nil {
+ http.Error(w, "无效的JSON请求: "+err.Error(), http.StatusBadRequest)
+ return
+ }
+
+ // 记录解析时间
+ parseTime := time.Since(startTime)
+
+ // 检查必需字段
+ if openaiReq.Input == "" {
+ http.Error(w, "input字段不能为空", http.StatusBadRequest)
+ return
+ }
+
+ // 映射OpenAI声音到Microsoft声音
+ msVoice := h.config.TTS.DefaultVoice
+ if openaiReq.Voice != "" {
+ // 检查是否有配置映射
+ if mappedVoice, exists := h.config.TTS.VoiceMapping[openaiReq.Voice]; exists {
+ msVoice = mappedVoice
+ }
+ }
+
+ // 转换速度参数到微软格式
+ msRate := h.config.TTS.DefaultRate
+ if openaiReq.Speed != 0 {
+ // OpenAI速度转换为微软速度格式
+ // OpenAI: 0.5(慢速), 1.0(正常), 2.0(快速)
+ // 微软: "-50%"(慢), "+0%"(中), "+100%"(快)
+ speedPercentage := (openaiReq.Speed - 1.0) * 100
+ if speedPercentage >= 0 {
+ msRate = fmt.Sprintf("+%.0f", speedPercentage)
+ } else {
+ msRate = fmt.Sprintf("%.0f", speedPercentage)
+ }
+ }
+
+ // 创建内部TTS请求
+ req := models.TTSRequest{
+ Text: openaiReq.Input,
+ Voice: msVoice,
+ Rate: msRate,
+ Pitch: h.config.TTS.DefaultPitch,
+ }
+
+ log.Printf("OpenAI TTS请求: model=%s, voice=%s → %s, speed=%.2f → %s, 文本长度=%d",
+ openaiReq.Model, openaiReq.Voice, msVoice, openaiReq.Speed, msRate, len(req.Text))
+
+ // 检查文本长度
+ if len(req.Text) > h.config.TTS.MaxTextLength {
+ http.Error(w, "文本长度超过限制", http.StatusBadRequest)
+ return
+ }
+
+ // 检查是否需要分段处理
+ segmentThreshold := h.config.TTS.SegmentThreshold
+ if len(req.Text) > segmentThreshold && len(req.Text) <= h.config.TTS.MaxTextLength {
+ log.Printf("文本长度 %d 超过阈值 %d,使用分段处理", len(req.Text), segmentThreshold)
+ // 使用分段处理
+ h.handleSegmentedTTS(w, r, req)
+ return
+ }
+
+ // 非流式模式处理
+ synthStart := time.Now()
+ resp, err := h.ttsService.SynthesizeSpeech(r.Context(), req)
+ synthTime := time.Since(synthStart)
+ log.Printf("TTS合成耗时: %v, 文本长度: %d", synthTime, len(req.Text))
+
+ if err != nil {
+ http.Error(w, "语音合成失败: "+err.Error(), http.StatusInternalServerError)
+ return
+ }
+
+ // 设置响应
+ w.Header().Set("Content-Type", "audio/mpeg")
+ writeStart := time.Now()
+ w.Write(resp.AudioContent)
+ writeTime := time.Since(writeStart)
+
+ // 记录总耗时
+ totalTime := time.Since(startTime)
+ log.Printf("OpenAI TTS请求总耗时: %v (解析: %v, 合成: %v, 写入: %v), 音频大小: %s",
+ totalTime, parseTime, synthTime, writeTime, formatFileSize(len(resp.AudioContent)))
+}
+
+// HandleTTS 处理TTS请求
+func (h *TTSHandler) HandleTTS(w http.ResponseWriter, r *http.Request) {
+ // 记录请求开始时间
+ startTime := time.Now()
+
+ // 解析请求参数
+ var req models.TTSRequest
+
+ switch r.Method {
+ case http.MethodGet:
+ // 从URL参数获取
+ q := r.URL.Query()
+ req = models.TTSRequest{
+ Text: q.Get("t"),
+ Voice: q.Get("v"),
+ Rate: q.Get("r"),
+ Pitch: q.Get("p"),
+ }
+ case http.MethodPost:
+ // 从POST JSON体获取
+ if r.Header.Get("Content-Type") == "application/json" {
+ if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+ log.Printf("JSON解析错误: %v", err)
+ http.Error(w, "无效的JSON请求", http.StatusBadRequest)
+ return
+ }
+ } else {
+ // 表单数据
+ if err := r.ParseForm(); err != nil {
+ log.Printf("表单解析错误: %v", err)
+ http.Error(w, "无法解析表单数据", http.StatusBadRequest)
+ return
+ }
+ req = models.TTSRequest{
+ Text: r.FormValue("text"),
+ Voice: r.FormValue("voice"),
+ Rate: r.FormValue("rate"),
+ Pitch: r.FormValue("pitch"),
+ }
+ }
+ default:
+ log.Printf("不支持的HTTP方法: %s", r.Method)
+ http.Error(w, "仅支持GET和POST请求", http.StatusMethodNotAllowed)
+ return
+ }
+
+ // 记录参数解析耗时
+ parseTime := time.Since(startTime)
+ log.Printf("请求参数解析耗时: %v", parseTime)
+
+ // 验证必要参数
+ if req.Text == "" {
+ log.Print("错误: 未提供文本参数")
+ http.Error(w, "必须提供文本参数", http.StatusBadRequest)
+ return
+ }
+
+ // 使用默认值填充空白参数
+ if req.Voice == "" {
+ req.Voice = h.config.TTS.DefaultVoice
+ }
+ if req.Rate == "" {
+ req.Rate = h.config.TTS.DefaultRate
+ }
+ if req.Pitch == "" {
+ req.Pitch = h.config.TTS.DefaultPitch
+ }
+
+ // 检查文本长度
+ if len(req.Text) > h.config.TTS.MaxTextLength {
+ http.Error(w, "文本长度超过限制", http.StatusBadRequest)
+ return
+ }
+
+ // 检查是否需要分段处理
+ segmentThreshold := h.config.TTS.SegmentThreshold
+ if len(req.Text) > segmentThreshold && len(req.Text) <= h.config.TTS.MaxTextLength {
+ log.Printf("文本长度 %d 超过阈值 %d,使用分段处理", len(req.Text), segmentThreshold)
+ // 如果文本长度超过阈值但小于最大限制,使用分段处理
+ h.handleSegmentedTTS(w, r, req)
+ return
+ }
+
+ // 非流式模式处理(保持原有逻辑)
+ synthStart := time.Now()
+ resp, err := h.ttsService.SynthesizeSpeech(r.Context(), req)
+ synthTime := time.Since(synthStart)
+ log.Printf("TTS合成耗时: %v, 文本长度: %d", synthTime, len(req.Text))
+
+ if err != nil {
+ http.Error(w, "语音合成失败: "+err.Error(), http.StatusInternalServerError)
+ return
+ }
+
+ // 设置响应
+ w.Header().Set("Content-Type", "audio/mpeg")
+ writeStart := time.Now()
+ w.Write(resp.AudioContent)
+ writeTime := time.Since(writeStart)
+
+ // 记录总耗时
+ totalTime := time.Since(startTime)
+ log.Printf("TTS请求总耗时: %v (解析: %v, 合成: %v, 写入: %v), 音频大小: %s",
+ totalTime, parseTime, synthTime, writeTime, formatFileSize(len(resp.AudioContent)))
+}
+
+// handleSegmentedTTS 处理长文本的分段TTS请求
+func (h *TTSHandler) handleSegmentedTTS(w http.ResponseWriter, r *http.Request, req models.TTSRequest) {
+ segmentStart := time.Now() // 分段处理开始时间
+ text := req.Text
+
+ // 开始计时:分割文本
+ splitStart := time.Now()
+ // 按句子分段处理
+ sentences := splitTextBySentences(text)
+ segmentCount := len(sentences)
+ splitTime := time.Since(splitStart)
+
+ log.Printf("分割文本耗时: %v, 文本总长度: %d, 分段数: %d, 平均句子长度: %.2f",
+ splitTime, len(text), segmentCount, float64(len(text))/float64(segmentCount))
+
+ // 创建用于存储每段音频的切片
+ results := make([][]byte, segmentCount)
+ errChan := make(chan error, segmentCount)
+ var wg sync.WaitGroup
+
+ // 限制并发数量,避免创建过多goroutine
+ maxConcurrent := h.config.TTS.MaxConcurrent
+ semaphore := make(chan struct{}, maxConcurrent)
+
+ // 用于记录每个分段处理的时间
+ segmentTimes := make([]time.Duration, segmentCount)
+
+ // 合成阶段开始时间
+ synthesisStart := time.Now()
+
+ // 并发处理每一个句子
+ for i := 0; i < segmentCount; i++ {
+ wg.Add(1)
+ semaphore <- struct{}{} // 获取信号量
+ go func(index int) {
+ defer wg.Done()
+ defer func() { <-semaphore }() // 释放信号量
+
+ // 创建该句的请求
+ segReq := models.TTSRequest{
+ Text: sentences[index],
+ Voice: req.Voice,
+ Rate: req.Rate,
+ Pitch: req.Pitch,
+ }
+
+ log.Printf("开始处理句子 #%d: 长度=%d, 内容='%s'",
+ index+1,
+ utf8.RuneCountInString(sentences[index]),
+ truncateForLog(sentences[index], 20))
+
+ // 记录该段合成开始时间
+ segStart := time.Now()
+
+ // 合成该段音频
+ resp, err := h.ttsService.SynthesizeSpeech(r.Context(), segReq)
+
+ // 记录该段合成耗时
+ segTime := time.Since(segStart)
+ segmentTimes[index] = segTime
+
+ if err != nil {
+ log.Printf("句子 #%d 合成失败,耗时: %v, 错误: %v", index+1, segTime, err)
+ select {
+ case errChan <- fmt.Errorf("句子 %d 合成失败: %w", index+1, err):
+ default:
+ // 已经有错误了,忽略
+ }
+ return
+ }
+
+ log.Printf("句子 #%d 合成成功:长度=%d, 耗时=%v, 音频大小=%s",
+ index+1, utf8.RuneCountInString(sentences[index]), segTime, formatFileSize(len(resp.AudioContent)))
+
+ // 存储该段结果
+ results[index] = resp.AudioContent
+ }(i)
+ }
+
+ // 等待所有goroutine完成
+ wg.Wait()
+ close(errChan)
+
+ // 记录所有分段合成总耗时
+ synthesisTime := time.Since(synthesisStart)
+ log.Printf("所有分段合成总耗时: %v, 平均每段耗时: %v",
+ synthesisTime, synthesisTime/time.Duration(segmentCount))
+
+ // 检查是否有错误发生
+ if err := <-errChan; err != nil {
+ http.Error(w, "语音合成失败: "+err.Error(), http.StatusInternalServerError)
+ return
+ }
+
+ // 记录写入开始时间
+ writeStart := time.Now()
+
+ var audioData []byte
+ var err error
+
+ audioData, err = audioMerge(results)
+
+ if err != nil {
+ log.Printf("合并音频失败: %v", err)
+ http.Error(w, "音频合并失败: "+err.Error(), http.StatusInternalServerError)
+ return
+ }
+
+ // 设置响应内容类型
+ w.Header().Set("Content-Type", "audio/mpeg")
+
+ // 写入合并后的音频数据
+ totalSize := len(audioData)
+ if _, writeErr := w.Write(audioData); writeErr != nil {
+ log.Printf("写入响应失败: %v", writeErr)
+ }
+
+ // 记录写入耗时
+ writeTime := time.Since(writeStart)
+
+ // 记录总耗时
+ totalTime := time.Since(segmentStart)
+ log.Printf("分段TTS请求总耗时: %v (分割: %v, 合成: %v, 写入: %v), 总音频大小: %s",
+ totalTime, splitTime, synthesisTime, writeTime, formatFileSize(totalSize))
+}
+
+// splitTextBySentences 将文本按句子分割
+func splitTextBySentences(text string) []string {
+ // 定义句子结束的标点符号
+ sentenceEnders := []string{"。", "!", "?", "…", ".", "!", "?", "…", "\n"}
+
+ // 如果文本过短,直接作为一个句子返回
+ if utf8.RuneCountInString(text) < 100 {
+ return []string{text}
+ }
+
+ var sentences []string
+ var currentSentence strings.Builder
+ maxSentenceLength := config.Get().TTS.MaxSentenceLength // 设置单个句子的最大长度,避免过长句子
+ runeCount := 0 // 当前句子的实际字符数量
+
+ for _, char := range text {
+ currentSentence.WriteRune(char)
+ runeCount++
+
+ // 检查是否到达句子结束标点
+ lastChar := string(char)
+ isSentenceEnder := false
+ for _, ender := range sentenceEnders {
+ if lastChar == ender {
+ isSentenceEnder = true
+ break
+ }
+ }
+
+ // 判断是否结束一个句子 - 使用字符数量而非字节长度
+ if isSentenceEnder || runeCount >= maxSentenceLength {
+ // 添加当前句子到结果中
+ sentence := currentSentence.String()
+ if len(sentence) > 0 {
+ sentences = append(sentences, sentence)
+ }
+ currentSentence.Reset() // 重置构建器
+ runeCount = 0 // 重置字符计数器
+ }
+ }
+
+ // 处理可能的最后一个句子
+ if currentSentence.Len() > 0 {
+ lastSentence := currentSentence.String()
+ sentences = append(sentences, lastSentence)
+ }
+
+ // 合并过短的句子
+ minSentenceLength := config.Get().TTS.MinSentenceLength // 设置最小句子长度阈值
+
+ if len(sentences) > 1 {
+ mergedSentences := []string{}
+ var currentMerged strings.Builder
+ currentMergedLength := 0
+
+ for i, sentence := range sentences {
+ sentenceLength := utf8.RuneCountInString(sentence)
+
+ // 如果当前句子太短,且不是最后一个,考虑合并
+ if sentenceLength < minSentenceLength && i < len(sentences)-1 {
+ // 检查合并后是否会超过最大长度
+ if currentMergedLength+sentenceLength > maxSentenceLength {
+ // 合并后会超长,先保存当前内容
+ if currentMerged.Len() > 0 {
+ mergedSentences = append(mergedSentences, currentMerged.String())
+ currentMerged.Reset()
+ currentMergedLength = 0
+ }
+ }
+
+ // 当前句子过短,添加到合并缓冲区
+ currentMerged.WriteString(sentence)
+ currentMergedLength += sentenceLength
+ } else {
+ // 句子足够长或是最后一句
+ if currentMerged.Len() > 0 {
+ // 检查合并后是否会超过最大长度
+ if currentMergedLength+sentenceLength <= maxSentenceLength {
+ // 有待合并的内容,将当前句子也合并进去
+ currentMerged.WriteString(sentence)
+ mergedSentence := currentMerged.String()
+ mergedSentences = append(mergedSentences, mergedSentence)
+ } else {
+ // 合并后会超长,分别添加
+ mergedSentence := currentMerged.String()
+ mergedSentences = append(mergedSentences, mergedSentence)
+ mergedSentences = append(mergedSentences, sentence)
+ }
+ currentMerged.Reset()
+ currentMergedLength = 0
+ } else {
+ // 没有待合并内容,直接添加当前句子
+ mergedSentences = append(mergedSentences, sentence)
+ }
+ }
+ }
+
+ // 处理可能剩余的合并内容
+ if currentMerged.Len() > 0 {
+ mergedSentence := currentMerged.String()
+ mergedSentences = append(mergedSentences, mergedSentence)
+ log.Printf("添加最后剩余的合并句子,长度=%d", utf8.RuneCountInString(mergedSentence))
+ }
+
+ return mergedSentences
+ }
+
+ return sentences
+}
+
+// truncateForLog 截断文本用于日志显示,同时显示开头和结尾
+func truncateForLog(text string, maxLength int) string {
+ // 先去除换行符
+ text = strings.ReplaceAll(text, "\n", " ")
+ text = strings.ReplaceAll(text, "\r", " ")
+
+ runes := []rune(text)
+ if len(runes) <= maxLength {
+ return text
+ }
+ // 计算开头和结尾各显示多少字符
+ halfLength := maxLength / 2
+ return string(runes[:halfLength]) + "..." + string(runes[len(runes)-halfLength:])
+}
+
+// audioMerge 音频合并
+func audioMerge(audioSegments [][]byte) ([]byte, error) {
+ if len(audioSegments) == 0 {
+ return nil, fmt.Errorf("没有音频片段可合并")
+ }
+
+ // 使用 ffmpeg 合并音频
+ tempDir, err := os.MkdirTemp("", "audio_merge_")
+ if err != nil {
+ return nil, err
+ }
+ defer os.RemoveAll(tempDir)
+
+ listFile := filepath.Join(tempDir, "concat.txt")
+ lf, err := os.Create(listFile)
+ if err != nil {
+ return nil, err
+ }
+
+ for i, seg := range audioSegments {
+ segFile := filepath.Join(tempDir, fmt.Sprintf("seg_%d.mp3", i))
+ if err := os.WriteFile(segFile, seg, 0644); err != nil {
+ return nil, err
+ }
+ if _, err := lf.WriteString(fmt.Sprintf("file '%s'\n", segFile)); err != nil {
+ return nil, err
+ }
+ }
+ lf.Close()
+
+ outputFile := filepath.Join(tempDir, "output.mp3")
+
+ cmd := exec.Command("ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", listFile, "-c", "copy", outputFile)
+ if err := cmd.Run(); err != nil {
+ return nil, err
+ }
+
+ mergedData, err := os.ReadFile(outputFile)
+ if err != nil {
+ return nil, err
+ }
+ log.Printf("使用ffmpeg合并完成,总大小: %s", formatFileSize(len(mergedData)))
+ return mergedData, nil
+}
+
+// formatFileSize 格式化文件大小
+func formatFileSize(size int) string {
+ switch {
+ case size < 1024:
+ return fmt.Sprintf("%d B", size)
+ case size < 1024*1024:
+ return fmt.Sprintf("%.2f KB", float64(size)/1024.0)
+ case size < 1024*1024*1024:
+ return fmt.Sprintf("%.2f MB", float64(size)/(1024.0*1024.0))
+ default:
+ return fmt.Sprintf("%.2f GB", float64(size)/(1024.0*1024.0*1024.0))
+ }
+}
diff --git a/internal/http/handlers/voices.go b/internal/http/handlers/voices.go
new file mode 100644
index 0000000..3907e97
--- /dev/null
+++ b/internal/http/handlers/voices.go
@@ -0,0 +1,41 @@
+package handlers
+
+import (
+ "encoding/json"
+ "net/http"
+ "tts/internal/tts"
+)
+
+// VoicesHandler 处理语音列表请求
+type VoicesHandler struct {
+ ttsService tts.Service
+}
+
+// NewVoicesHandler 创建一个新的语音列表处理器
+func NewVoicesHandler(service tts.Service) *VoicesHandler {
+ return &VoicesHandler{
+ ttsService: service,
+ }
+}
+
+// HandleVoices 处理语音列表请求
+func (h *VoicesHandler) HandleVoices(w http.ResponseWriter, r *http.Request) {
+ // 从查询参数中获取语言筛选
+ locale := r.URL.Query().Get("locale")
+
+ // 获取语音列表
+ voices, err := h.ttsService.ListVoices(r.Context(), locale)
+ if err != nil {
+ http.Error(w, "获取语音列表失败: "+err.Error(), http.StatusInternalServerError)
+ return
+ }
+
+ // 设置内容类型
+ w.Header().Set("Content-Type", "application/json")
+
+ // 编码为JSON并返回
+ if err := json.NewEncoder(w).Encode(voices); err != nil {
+ http.Error(w, "JSON编码失败", http.StatusInternalServerError)
+ return
+ }
+}
diff --git a/internal/http/middleware/cors.go b/internal/http/middleware/cors.go
new file mode 100644
index 0000000..0bd2578
--- /dev/null
+++ b/internal/http/middleware/cors.go
@@ -0,0 +1,22 @@
+package middleware
+
+import "net/http"
+
+// CORS 处理跨域资源共享
+func CORS(next http.Handler) http.Handler {
+ return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ // 设置CORS响应头
+ w.Header().Set("Access-Control-Allow-Origin", "*")
+ w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
+ w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization")
+
+ // 如果是预检请求,直接返回200
+ if r.Method == http.MethodOptions {
+ w.WriteHeader(http.StatusOK)
+ return
+ }
+
+ // 继续下一个处理器
+ next.ServeHTTP(w, r)
+ })
+}
diff --git a/internal/http/middleware/logger.go b/internal/http/middleware/logger.go
new file mode 100644
index 0000000..0be2f81
--- /dev/null
+++ b/internal/http/middleware/logger.go
@@ -0,0 +1,46 @@
+package middleware
+
+import (
+ "log"
+ "net/http"
+ "time"
+)
+
+// Logger 是一个HTTP中间件,记录请求的详细信息
+func Logger(next http.Handler) http.Handler {
+ return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ start := time.Now()
+
+ // 包装ResponseWriter以捕获状态码
+ wrapper := &responseWriterWrapper{
+ ResponseWriter: w,
+ statusCode: http.StatusOK,
+ }
+
+ // 调用下一个处理器
+ next.ServeHTTP(wrapper, r)
+
+ // 记录请求信息
+ duration := time.Since(start)
+ log.Printf(
+ "[%s] %s %s %d %s",
+ r.Method,
+ r.RequestURI,
+ r.RemoteAddr,
+ wrapper.statusCode,
+ duration,
+ )
+ })
+}
+
+// responseWriterWrapper 包装http.ResponseWriter以捕获状态码
+type responseWriterWrapper struct {
+ http.ResponseWriter
+ statusCode int
+}
+
+// WriteHeader 捕获状态码
+func (w *responseWriterWrapper) WriteHeader(statusCode int) {
+ w.statusCode = statusCode
+ w.ResponseWriter.WriteHeader(statusCode)
+}
diff --git a/internal/http/server/app.go b/internal/http/server/app.go
new file mode 100644
index 0000000..edfafdd
--- /dev/null
+++ b/internal/http/server/app.go
@@ -0,0 +1,83 @@
+package server
+
+import (
+ "context"
+ "fmt"
+ "log"
+ "os"
+ "os/signal"
+ "syscall"
+ "time"
+ "tts/internal/config"
+)
+
+// App 表示整个TTS应用程序
+type App struct {
+ server *Server
+ cfg *config.Config
+}
+
+// NewApp 创建一个新的应用程序实例
+func NewApp(configPath string) (*App, error) {
+ // 加载配置
+ cfg, err := config.Load(configPath)
+ if err != nil {
+ return nil, fmt.Errorf("加载配置失败: %w", err)
+ }
+
+ // 初始化服务
+ ttsService, err := InitializeServices(cfg)
+ if err != nil {
+ return nil, fmt.Errorf("初始化服务失败: %w", err)
+ }
+
+ // 设置路由
+ handler, err := SetupRoutes(cfg, ttsService)
+ if err != nil {
+ return nil, fmt.Errorf("设置路由失败: %w", err)
+ }
+
+ // 创建HTTP服务器
+ server := New(cfg, handler)
+
+ return &App{
+ server: server,
+ cfg: cfg,
+ }, nil
+}
+
+// Start 启动应用程序
+func (a *App) Start() error {
+ // 创建一个错误通道
+ errChan := make(chan error, 1)
+
+ // 创建一个退出信号通道
+ quit := make(chan os.Signal, 1)
+ signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
+
+ // 在一个goroutine中启动服务器
+ go func() {
+ log.Printf("启动TTS服务,监听端口 %d...\n", a.cfg.Server.Port)
+ errChan <- a.server.Start()
+ }()
+
+ // 等待退出信号或错误
+ select {
+ case err := <-errChan:
+ return err
+ case <-quit:
+ log.Println("接收到退出信号,正在优雅关闭...")
+
+ // 创建一个超时上下文用于优雅关闭
+ ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+ defer cancel()
+
+ // 尝试优雅关闭服务器
+ if err := a.server.Shutdown(ctx); err != nil {
+ return fmt.Errorf("服务器关闭出错: %w", err)
+ }
+
+ log.Println("服务器已优雅关闭")
+ return nil
+ }
+}
diff --git a/internal/http/server/routes.go b/internal/http/server/routes.go
new file mode 100644
index 0000000..9a602f6
--- /dev/null
+++ b/internal/http/server/routes.go
@@ -0,0 +1,65 @@
+package server
+
+import (
+ "net/http"
+ "tts/internal/config"
+ "tts/internal/http/handlers"
+ "tts/internal/http/middleware"
+ "tts/internal/tts"
+ "tts/internal/tts/microsoft"
+)
+
+// SetupRoutes 配置所有API路由
+func SetupRoutes(cfg *config.Config, ttsService tts.Service) (http.Handler, error) {
+ // 创建一个新的路由多路复用器
+ mux := http.NewServeMux()
+
+ // 创建处理器
+ ttsHandler := handlers.NewTTSHandler(ttsService, cfg)
+ voicesHandler := handlers.NewVoicesHandler(ttsService)
+
+ // 创建页面处理器
+ pagesHandler, err := handlers.NewPagesHandler("./web/templates", cfg)
+ if err != nil {
+ return nil, err
+ }
+
+ // 设置主页路由
+ mux.HandleFunc("/", pagesHandler.HandleIndex)
+
+ // 设置API文档路由
+ mux.HandleFunc("/api-doc", pagesHandler.HandleAPIDoc)
+
+ // 设置TTS API路由
+ mux.HandleFunc("/tts", ttsHandler.HandleTTS)
+
+ // 设置语音列表API路由
+ mux.HandleFunc("/voices", voicesHandler.HandleVoices)
+
+ mux.HandleFunc("/v1/audio/speech", ttsHandler.HandleOpenAITTS)
+ mux.HandleFunc("/audio/speech", ttsHandler.HandleOpenAITTS)
+
+ // 设置静态文件服务
+ fs := http.FileServer(http.Dir("./web/static"))
+ mux.Handle("/static/", http.StripPrefix("/static/", fs))
+
+ // 应用基础路径前缀
+ var handler http.Handler = mux
+ if cfg.Server.BasePath != "" {
+ handler = http.StripPrefix(cfg.Server.BasePath, mux)
+ }
+
+ // 应用中间件
+ handler = middleware.Logger(handler) // 日志中间件
+ handler = middleware.CORS(handler) // CORS中间件
+
+ return handler, nil
+}
+
+// InitializeServices 初始化所有服务
+func InitializeServices(cfg *config.Config) (tts.Service, error) {
+ // 创建Microsoft TTS客户端
+ ttsClient := microsoft.NewClient(cfg)
+
+ return ttsClient, nil
+}
diff --git a/internal/http/server/server.go b/internal/http/server/server.go
new file mode 100644
index 0000000..fe581a8
--- /dev/null
+++ b/internal/http/server/server.go
@@ -0,0 +1,45 @@
+package server
+
+import (
+ "context"
+ "fmt"
+ "net/http"
+ "time"
+
+ "tts/internal/config"
+)
+
+// Server 封装HTTP服务器
+type Server struct {
+ server *http.Server
+ basePath string
+}
+
+// New 创建新的HTTP服务器
+func New(cfg *config.Config, handler http.Handler) *Server {
+ // 创建HTTP服务器
+ httpServer := &http.Server{
+ Addr: fmt.Sprintf(":%d", cfg.Server.Port),
+ Handler: handler,
+ ReadTimeout: time.Duration(cfg.Server.ReadTimeout) * time.Second,
+ WriteTimeout: time.Duration(cfg.Server.WriteTimeout) * time.Second,
+ IdleTimeout: 120 * time.Second,
+ }
+
+ return &Server{
+ server: httpServer,
+ basePath: cfg.Server.BasePath,
+ }
+}
+
+// Start 启动HTTP服务器
+func (s *Server) Start() error {
+ fmt.Printf("服务启动在 %s\n", s.server.Addr)
+ return s.server.ListenAndServe()
+}
+
+// Shutdown 优雅关闭服务器
+func (s *Server) Shutdown(ctx context.Context) error {
+ fmt.Println("正在关闭HTTP服务器...")
+ return s.server.Shutdown(ctx)
+}
diff --git a/internal/models/tts.go b/internal/models/tts.go
new file mode 100644
index 0000000..67ee7a5
--- /dev/null
+++ b/internal/models/tts.go
@@ -0,0 +1,16 @@
+package models
+
+// TTSRequest 表示一个语音合成请求
+type TTSRequest struct {
+ Text string `json:"text"` // 要转换的文本
+ Voice string `json:"voice"` // 语音ID
+ Rate string `json:"rate"` // 语速 (-100% 到 +100%)
+ Pitch string `json:"pitch"` // 语调 (-100% 到 +100%)
+}
+
+// TTSResponse 表示一个语音合成响应
+type TTSResponse struct {
+ AudioContent []byte `json:"audio_content"` // 音频数据
+ ContentType string `json:"content_type"` // MIME类型
+ CacheHit bool `json:"cache_hit"` // 是否命中缓存
+}
diff --git a/internal/models/voice.go b/internal/models/voice.go
new file mode 100644
index 0000000..4391694
--- /dev/null
+++ b/internal/models/voice.go
@@ -0,0 +1,14 @@
+package models
+
+// Voice 表示一个语音合成声音
+type Voice struct {
+ Name string `json:"name"` // 语音唯一标识符
+ DisplayName string `json:"display_name"` // 语音显示名称
+ LocalName string `json:"local_name"` // 本地化名称
+ ShortName string `json:"short_name"` // 简称,例如 zh-CN-XiaoxiaoNeural
+ Gender string `json:"gender"` // 性别: Female, Male
+ Locale string `json:"locale"` // 语言区域, 如 zh-CN
+ LocaleName string `json:"locale_name"` // 语言区域显示名称,如 中文(中国)
+ StyleList []string `json:"style_list,omitempty"` // 支持的说话风格列表
+ SampleRateHertz string `json:"sample_rate_hertz"` // 采样率
+}
diff --git a/internal/tts/microsoft/client.go b/internal/tts/microsoft/client.go
new file mode 100644
index 0000000..8e869f7
--- /dev/null
+++ b/internal/tts/microsoft/client.go
@@ -0,0 +1,290 @@
+package microsoft
+
+import (
+ "bytes"
+ "context"
+ "encoding/json"
+ "errors"
+ "fmt"
+ "html"
+ "io"
+ "log"
+ "net/http"
+ "strings"
+ "sync"
+ "time"
+
+ "tts/internal/config"
+ "tts/internal/models"
+ "tts/internal/utils"
+)
+
+const (
+ userAgent = "okhttp/4.5.0"
+ voicesEndpoint = "https://%s.tts.speech.microsoft.com/cognitiveservices/voices/list"
+ ttsEndpoint = "https://%s.tts.speech.microsoft.com/cognitiveservices/v1"
+ ssmlTemplate = `
+
+
+
+ %s
+
+
+
+ `
+)
+
+// Client 是Microsoft TTS API的客户端实现
+type Client struct {
+ defaultVoice string
+ defaultRate string
+ defaultPitch string
+ defaultFormat string
+ maxTextLength int
+ httpClient *http.Client
+ voicesCache []models.Voice
+ voicesCacheMu sync.RWMutex
+ voicesCacheExpiry time.Time
+
+ // 端点和认证信息
+ endpoint map[string]interface{}
+ endpointMu sync.RWMutex
+ endpointExpiry time.Time
+}
+
+func (c *Client) HandleOpenAITTS(w http.ResponseWriter, r *http.Request) {
+ //TODO implement me
+ panic("implement me")
+}
+
+// NewClient 创建一个新的Microsoft TTS客户端
+func NewClient(cfg *config.Config) *Client {
+ client := &Client{
+ defaultVoice: cfg.TTS.DefaultVoice,
+ defaultRate: cfg.TTS.DefaultRate,
+ defaultPitch: cfg.TTS.DefaultPitch,
+ defaultFormat: cfg.TTS.DefaultFormat,
+ maxTextLength: cfg.TTS.MaxTextLength,
+ httpClient: &http.Client{
+ Timeout: time.Duration(cfg.TTS.RequestTimeout) * time.Second,
+ },
+ voicesCacheExpiry: time.Time{}, // 初始时缓存为空
+ endpointExpiry: time.Time{}, // 初始时端点为空
+ }
+
+ return client
+}
+
+// getEndpoint 获取或刷新认证端点
+func (c *Client) getEndpoint(ctx context.Context) (map[string]interface{}, error) {
+ c.endpointMu.RLock()
+ if !c.endpointExpiry.IsZero() && time.Now().Before(c.endpointExpiry) && c.endpoint != nil {
+ endpoint := c.endpoint
+ c.endpointMu.RUnlock()
+ return endpoint, nil
+ }
+ c.endpointMu.RUnlock()
+
+ // 获取新的端点信息
+ endpoint, err := utils.GetEndpoint()
+ if err != nil {
+ return nil, err
+ }
+
+ // 更新缓存
+ c.endpointMu.Lock()
+ c.endpoint = endpoint
+ c.endpointExpiry = time.Now().Add(45 * time.Minute) // 令牌有效期通常是1小时,提前刷新
+ c.endpointMu.Unlock()
+
+ return endpoint, nil
+}
+
+// ListVoices 获取可用的语音列表
+func (c *Client) ListVoices(ctx context.Context, locale string) ([]models.Voice, error) {
+ // 检查缓存是否有效
+ c.voicesCacheMu.RLock()
+ if !c.voicesCacheExpiry.IsZero() && time.Now().Before(c.voicesCacheExpiry) && len(c.voicesCache) > 0 {
+ voices := c.voicesCache
+ c.voicesCacheMu.RUnlock()
+
+ // 如果指定了locale,则过滤结果
+ if locale != "" {
+ var filtered []models.Voice
+ for _, voice := range voices {
+ if strings.HasPrefix(voice.Locale, locale) {
+ filtered = append(filtered, voice)
+ }
+ }
+ return filtered, nil
+ }
+ return voices, nil
+ }
+ c.voicesCacheMu.RUnlock()
+
+ // 缓存无效,需要从API获取
+ endpoint, err := c.getEndpoint(ctx)
+ if err != nil {
+ return nil, err
+ }
+
+ url := fmt.Sprintf(voicesEndpoint, endpoint["r"])
+ req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
+ if err != nil {
+ return nil, err
+ }
+
+ // 使用新的认证方式
+ req.Header.Set("Authorization", endpoint["t"].(string))
+
+ resp, err := c.httpClient.Do(req)
+ if err != nil {
+ return nil, err
+ }
+ defer resp.Body.Close()
+
+ if resp.StatusCode != http.StatusOK {
+ body, _ := io.ReadAll(resp.Body)
+ return nil, fmt.Errorf("API error: %s, status: %d", string(body), resp.StatusCode)
+ }
+
+ var msVoices []MicrosoftVoice
+ if err := json.NewDecoder(resp.Body).Decode(&msVoices); err != nil {
+ return nil, err
+ }
+
+ // 转换为通用模型
+ voices := make([]models.Voice, len(msVoices))
+ for i, v := range msVoices {
+ voices[i] = models.Voice{
+ Name: v.Name,
+ DisplayName: v.DisplayName,
+ LocalName: v.LocalName,
+ ShortName: v.ShortName,
+ Gender: v.Gender,
+ Locale: v.Locale,
+ LocaleName: v.LocaleName,
+ StyleList: v.StyleList,
+ SampleRateHertz: v.SampleRateHertz, // 直接使用字符串,无需转换
+ }
+ }
+
+ // 更新缓存
+ c.voicesCacheMu.Lock()
+ c.voicesCache = voices
+ c.voicesCacheExpiry = time.Now().Add(1 * time.Hour) // 缓存1小时
+ c.voicesCacheMu.Unlock()
+
+ // 如果指定了locale,则过滤结果
+ if locale != "" {
+ var filtered []models.Voice
+ for _, voice := range voices {
+ if strings.HasPrefix(voice.Locale, locale) {
+ filtered = append(filtered, voice)
+ }
+ }
+ return filtered, nil
+ }
+
+ return voices, nil
+}
+
+// SynthesizeSpeech 将文本转换为语音
+func (c *Client) SynthesizeSpeech(ctx context.Context, req models.TTSRequest) (*models.TTSResponse, error) {
+ resp, err := c.createTTSRequest(ctx, req)
+ if err != nil {
+ return nil, err
+ }
+ defer resp.Body.Close()
+
+ // 读取音频数据
+ audio, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return nil, err
+ }
+
+ return &models.TTSResponse{
+ AudioContent: audio,
+ ContentType: "audio/mpeg",
+ CacheHit: false,
+ }, nil
+}
+
+// createTTSRequest 创建并执行TTS请求,返回HTTP响应
+func (c *Client) createTTSRequest(ctx context.Context, req models.TTSRequest) (*http.Response, error) {
+ // 参数验证
+ if req.Text == "" {
+ return nil, errors.New("文本不能为空")
+ }
+
+ if len(req.Text) > c.maxTextLength {
+ return nil, fmt.Errorf("文本长度超过限制 (%d > %d)", len(req.Text), c.maxTextLength)
+ }
+
+ // 使用默认值填充空白参数
+ voice := req.Voice
+ if voice == "" {
+ voice = c.defaultVoice
+ }
+
+ rate := req.Rate
+ if rate == "" {
+ rate = c.defaultRate
+ }
+
+ pitch := req.Pitch
+ if pitch == "" {
+ pitch = c.defaultPitch
+ }
+
+ // 提取语言
+ locale := "zh-CN" // 默认
+ parts := strings.Split(voice, "-")
+ if len(parts) >= 2 {
+ locale = parts[0] + "-" + parts[1]
+ }
+
+ // 对文本进行HTML转义,防止XML解析错误
+
+ escapedText := html.EscapeString(req.Text)
+
+ // 准备SSML内容
+ ssml := fmt.Sprintf(ssmlTemplate, locale, voice, rate, pitch, escapedText)
+
+ // 获取端点信息
+ endpoint, err := c.getEndpoint(ctx)
+ if err != nil {
+ return nil, err
+ }
+
+ // 准备请求
+ url := fmt.Sprintf(ttsEndpoint, endpoint["r"])
+ reqBody := bytes.NewBufferString(ssml)
+
+ httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, reqBody)
+ if err != nil {
+ return nil, err
+ }
+
+ httpReq.Header.Set("Authorization", endpoint["t"].(string))
+ httpReq.Header.Set("Content-Type", "application/ssml+xml")
+ httpReq.Header.Set("X-Microsoft-OutputFormat", c.defaultFormat)
+ httpReq.Header.Set("User-Agent", userAgent)
+
+ // 发送请求
+ resp, err := c.httpClient.Do(httpReq)
+
+ if err != nil {
+ return nil, err
+ }
+
+ if resp.StatusCode != http.StatusOK {
+ // 获取响应体以便调试
+ body, _ := io.ReadAll(resp.Body)
+ resp.Body.Close()
+ log.Printf("TTS API错误: %s, 状态码: %d", string(body), resp.StatusCode)
+ return nil, fmt.Errorf("TTS API错误: %s, 状态码: %d", string(body), resp.StatusCode)
+ }
+
+ return resp, nil
+}
diff --git a/internal/tts/microsoft/models.go b/internal/tts/microsoft/models.go
new file mode 100644
index 0000000..27fcc6d
--- /dev/null
+++ b/internal/tts/microsoft/models.go
@@ -0,0 +1,45 @@
+package microsoft
+
+// MicrosoftVoice 表示Microsoft TTS服务中的一个语音
+type MicrosoftVoice struct {
+ Name string `json:"Name"`
+ DisplayName string `json:"DisplayName"`
+ LocalName string `json:"LocalName"`
+ ShortName string `json:"ShortName"`
+ Gender string `json:"Gender"`
+ Locale string `json:"Locale"`
+ LocaleName string `json:"LocaleName"`
+ StyleList []string `json:"StyleList,omitempty"`
+ SampleRateHertz string `json:"SampleRateHertz"`
+ VoiceType string `json:"VoiceType"`
+ Status string `json:"Status"`
+}
+
+// SSMLRequest 表示发送给Microsoft TTS服务的SSML请求
+type SSMLRequest struct {
+ XMLHeader string
+ Voice string
+ Language string
+ Rate string
+ Pitch string
+ Text string
+}
+
+// FormatContentTypeMap 定义音频格式到MIME类型的映射
+var FormatContentTypeMap = map[string]string{
+ "raw-16khz-16bit-mono-pcm": "audio/pcm",
+ "raw-8khz-8bit-mono-mulaw": "audio/basic",
+ "riff-8khz-8bit-mono-alaw": "audio/alaw",
+ "riff-8khz-8bit-mono-mulaw": "audio/mulaw",
+ "riff-16khz-16bit-mono-pcm": "audio/wav",
+ "audio-16khz-128kbitrate-mono-mp3": "audio/mp3",
+ "audio-16khz-64kbitrate-mono-mp3": "audio/mp3",
+ "audio-16khz-32kbitrate-mono-mp3": "audio/mp3",
+ "raw-24khz-16bit-mono-pcm": "audio/pcm",
+ "riff-24khz-16bit-mono-pcm": "audio/wav",
+ "audio-24khz-160kbitrate-mono-mp3": "audio/mp3",
+ "audio-24khz-96kbitrate-mono-mp3": "audio/mp3",
+ "audio-24khz-48kbitrate-mono-mp3": "audio/mp3",
+ "ogg-24khz-16bit-mono-opus": "audio/ogg",
+ "webm-24khz-16bit-mono-opus": "audio/webm",
+}
diff --git a/internal/tts/service.go b/internal/tts/service.go
new file mode 100644
index 0000000..ade289c
--- /dev/null
+++ b/internal/tts/service.go
@@ -0,0 +1,15 @@
+package tts
+
+import (
+ "context"
+ "tts/internal/models"
+)
+
+// Service 定义TTS服务接口
+type Service interface {
+ // ListVoices 获取可用的语音列表
+ ListVoices(ctx context.Context, locale string) ([]models.Voice, error)
+
+ // SynthesizeSpeech 将文本转换为语音
+ SynthesizeSpeech(ctx context.Context, req models.TTSRequest) (*models.TTSResponse, error)
+}
diff --git a/internal/utils/utils.go b/internal/utils/utils.go
new file mode 100644
index 0000000..8d97dd6
--- /dev/null
+++ b/internal/utils/utils.go
@@ -0,0 +1,87 @@
+package utils
+
+import (
+ "crypto/hmac"
+ "crypto/sha256"
+ "encoding/base64"
+ "encoding/json"
+ "fmt"
+ "net/http"
+ "net/url"
+ "strings"
+ "time"
+
+ "github.com/google/uuid"
+ "github.com/sirupsen/logrus"
+)
+
+var (
+ log = logrus.New()
+ client = &http.Client{}
+)
+
+const (
+ endpointURL = "https://dev.microsofttranslator.com/apps/endpoint?api-version=1.0"
+ userAgent = "okhttp/4.5.0"
+ clientVersion = "4.0.530a 5fe1dc6c"
+ userId = "0f04d16a175c411e"
+ homeGeographicRegion = "zh-Hans-CN"
+ clientTraceId = "aab069b9-70a7-4844-a734-96cd78d94be9"
+ voiceDecodeKey = "oik6PdDdMnOXemTbwvMn9de/h9lFnfBaCWbGMMZqqoSaQaqUOqjVGm5NqsmjcBI1x+sS9ugjB55HEJWRiFXYFw=="
+)
+
+// GetEndpoint 获取语音合成服务的端点信息
+func GetEndpoint() (map[string]interface{}, error) {
+ signature := Sign(endpointURL)
+ headers := map[string]string{
+ "Accept-Language": "zh-Hans",
+ "X-ClientVersion": clientVersion,
+ "X-UserId": userId,
+ "X-HomeGeographicRegion": homeGeographicRegion,
+ "X-ClientTraceId": clientTraceId,
+ "X-MT-Signature": signature,
+ "User-Agent": userAgent,
+ "Content-Type": "application/json; charset=utf-8",
+ "Content-Length": "0",
+ "Accept-Encoding": "gzip",
+ }
+ req, err := http.NewRequest("POST", endpointURL, nil)
+ if err != nil {
+ return nil, err
+ }
+
+ for k, v := range headers {
+ req.Header.Set(k, v)
+ }
+
+ resp, err := client.Do(req)
+ if err != nil {
+ log.Error("failed to do request: ", err)
+ return nil, err
+ }
+ defer resp.Body.Close()
+
+ var result map[string]interface{}
+ err = json.NewDecoder(resp.Body).Decode(&result)
+ if err != nil {
+ return nil, err
+ }
+
+ return result, nil
+}
+
+// Sign 生成签名
+func Sign(urlStr string) string {
+ u := strings.Split(urlStr, "://")[1]
+ encodedUrl := url.QueryEscape(u)
+ uuidStr := strings.ReplaceAll(uuid.New().String(), "-", "")
+ formattedDate := strings.ToLower(time.Now().UTC().Format("Mon, 02 Jan 2006 15:04:05")) + "gmt"
+ bytesToSign := fmt.Sprintf("MSTranslatorAndroidApp%s%s%s", encodedUrl, formattedDate, uuidStr)
+ bytesToSign = strings.ToLower(bytesToSign)
+ decode, _ := base64.StdEncoding.DecodeString(voiceDecodeKey)
+ hash := hmac.New(sha256.New, decode)
+ hash.Write([]byte(bytesToSign))
+ secretKey := hash.Sum(nil)
+ signBase64 := base64.StdEncoding.EncodeToString(secretKey)
+ return fmt.Sprintf("MSTranslatorAndroidApp::%s::%s::%s", signBase64, formattedDate, uuidStr)
+}
diff --git a/routes/routes.go b/routes/routes.go
deleted file mode 100644
index d8d0083..0000000
--- a/routes/routes.go
+++ /dev/null
@@ -1,23 +0,0 @@
-package routes
-
-import (
- "tts/handlers"
-
- "github.com/gin-gonic/gin"
-)
-
-func SetupRouter() *gin.Engine {
- router := gin.Default()
-
- // 加载模板文件
- router.LoadHTMLGlob("templates/*")
-
- router.GET("/voices", handlers.GetVoiceList)
- router.POST("/tts", handlers.SynthesizeVoicePost)
- router.GET("/tts", handlers.SynthesizeVoice)
- router.GET("/v1/audio/speech", handlers.SynthesizeVoiceOpenAI)
- router.GET("/", handlers.Index)
- router.GET("/doc", handlers.ApiDoc)
-
- return router
-}
diff --git a/templates/api-doc.html b/templates/api-doc.html
deleted file mode 100644
index 5c5f7dd..0000000
--- a/templates/api-doc.html
+++ /dev/null
@@ -1,37 +0,0 @@
-
-
-
-
- TTS
-
-
- 支持接口
-语音合成
-
-
/tts | GET / POST(json)
-
try
-
-
-
-
-参数列表:
-1. t: 文本内容 (必填)
-2. v: 语音名称 (可选), 默认为 zh-CN-XiaoxiaoMultilingualNeural
-3. r: 语速 (可选), 默认为 0
-4. p: 语调 (可选), 默认为 0
-5. o: 输出格式 (可选), 默认为audio-24khz-48kbitrate-mono-mp3
-
-
-
-声音列表
-
-
-
-参数列表:
-1. l: 语言区域 (可选), 使用 contains 匹配,如 l=zh
-2. d: 显示详细信息 (可选) , 默认为 false, 如需显示详细信息, 请添加参数d , 如 /voices?d
-
-
-
diff --git a/templates/index.html b/templates/index.html
deleted file mode 100644
index 9500d00..0000000
--- a/templates/index.html
+++ /dev/null
@@ -1,121 +0,0 @@
-
-
-
-
-
- TTS Demo
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/utils/utils.go b/utils/utils.go
deleted file mode 100644
index 6f858b5..0000000
--- a/utils/utils.go
+++ /dev/null
@@ -1,218 +0,0 @@
-package utils
-
-import (
- "bytes"
- "crypto/hmac"
- "crypto/sha256"
- "encoding/base64"
- "encoding/json"
- "fmt"
- "github.com/google/uuid"
- "github.com/sirupsen/logrus"
- "html"
- "io"
- "net/http"
- "net/url"
- "strings"
- "time"
-)
-
-var (
- log = logrus.New()
- client = &http.Client{}
- voiceListCache []interface{}
- cacheDuration = 1 * time.Hour // 缓存持续时间
-)
-
-func init() {
- ticker := time.NewTicker(cacheDuration)
- go func() {
- for range ticker.C {
- voiceListCache = nil
- }
- }()
-}
-
-const (
- endpointURL = "https://dev.microsofttranslator.com/apps/endpoint?api-version=1.0"
- voicesListURL = "https://eastus.api.speech.microsoft.com/cognitiveservices/voices/list"
- userAgent = "okhttp/4.5.0"
- clientVersion = "4.0.530a 5fe1dc6c"
- userId = "0f04d16a175c411e"
- homeGeographicRegion = "zh-Hans-CN"
- clientTraceId = "aab069b9-70a7-4844-a734-96cd78d94be9"
- voiceDecodeKey = "oik6PdDdMnOXemTbwvMn9de/h9lFnfBaCWbGMMZqqoSaQaqUOqjVGm5NqsmjcBI1x+sS9ugjB55HEJWRiFXYFw=="
- defaultVoiceName = "zh-CN-XiaoxiaoMultilingualNeural"
- defaultRate = "0"
- defaultPitch = "0"
- defaultOutputFormat = "audio-24khz-48kbitrate-mono-mp3"
- defaultStyle = "general"
-)
-
-// GetEndpoint 获取语音合成服务的端点信息
-func GetEndpoint() (map[string]interface{}, error) {
- signature := Sign(endpointURL)
- headers := map[string]string{
- "Accept-Language": "zh-Hans",
- "X-ClientVersion": clientVersion,
- "X-UserId": userId,
- "X-HomeGeographicRegion": homeGeographicRegion,
- "X-ClientTraceId": clientTraceId,
- "X-MT-Signature": signature,
- "User-Agent": userAgent,
- "Content-Type": "application/json; charset=utf-8",
- "Content-Length": "0",
- "Accept-Encoding": "gzip",
- }
- req, err := http.NewRequest("POST", endpointURL, nil)
- if err != nil {
- return nil, err
- }
-
- for k, v := range headers {
- req.Header.Set(k, v)
- }
-
- resp, err := client.Do(req)
- if err != nil {
- log.Error("failed to do request: ", err)
- return nil, err
- }
- defer resp.Body.Close()
-
- var result map[string]interface{}
- err = json.NewDecoder(resp.Body).Decode(&result)
- if err != nil {
- return nil, err
- }
-
- return result, nil
-}
-
-// Sign 生成签名
-func Sign(urlStr string) string {
- u := strings.Split(urlStr, "://")[1]
- encodedUrl := url.QueryEscape(u)
- uuidStr := strings.ReplaceAll(uuid.New().String(), "-", "")
- formattedDate := strings.ToLower(time.Now().UTC().Format("Mon, 02 Jan 2006 15:04:05")) + "gmt"
- bytesToSign := fmt.Sprintf("MSTranslatorAndroidApp%s%s%s", encodedUrl, formattedDate, uuidStr)
- bytesToSign = strings.ToLower(bytesToSign)
- decode, _ := base64.StdEncoding.DecodeString(voiceDecodeKey)
- hash := hmac.New(sha256.New, decode)
- hash.Write([]byte(bytesToSign))
- secretKey := hash.Sum(nil)
- signBase64 := base64.StdEncoding.EncodeToString(secretKey)
- return fmt.Sprintf("MSTranslatorAndroidApp::%s::%s::%s", signBase64, formattedDate, uuidStr)
-}
-
-// GetVoice 获取语音合成结果
-func GetVoice(text, voiceName, rate, pitch, outputFormat, style string) ([]byte, error) {
- if voiceName == "" {
- voiceName = defaultVoiceName
- }
- if rate == "" {
- rate = defaultRate
- }
- if pitch == "" {
- pitch = defaultPitch
- }
- if outputFormat == "" {
- outputFormat = defaultOutputFormat
- }
-
- if style == "" {
- style = defaultStyle
- }
-
- endpoint, err := GetEndpoint()
- if err != nil {
- return nil, err
- }
-
- u := fmt.Sprintf("https://%s.tts.speech.microsoft.com/cognitiveservices/v1", endpoint["r"])
- headers := map[string]string{
- "Authorization": endpoint["t"].(string),
- "Content-Type": "application/ssml+xml",
- "X-Microsoft-OutputFormat": outputFormat,
- }
-
- ssml := GetSsml(text, voiceName, rate, pitch, style)
-
- req, err := http.NewRequest("POST", u, bytes.NewBufferString(ssml))
- if err != nil {
- return nil, err
- }
-
- for k, v := range headers {
- req.Header.Set(k, v)
- }
-
- resp, err := client.Do(req)
- if err != nil {
- log.Error("failed to do request: ", err)
- return nil, err
- }
- defer resp.Body.Close()
-
- return io.ReadAll(resp.Body)
-}
-
-// GetSsml 生成 SSML 格式的文本
-func GetSsml(text, voiceName, rate, pitch, style string) string {
- // 对文本进行转义
- text = html.EscapeString(text)
- return fmt.Sprintf(`
-
-
-
-
- %s
-
-
-
-
- `, voiceName, style, rate, pitch, text)
-}
-
-// VoiceList 获取可用的语音列表
-func VoiceList() ([]interface{}, error) {
- // 如果缓存中有值,直接返回缓存的结果
- if voiceListCache != nil {
- return voiceListCache, nil
- }
-
- headers := map[string]string{
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.26",
- "X-Ms-Useragent": "SpeechStudio/2021.05.001",
- "Content-Type": "application/json",
- "Origin": "https://azure.microsoft.com",
- "Referer": "https://azure.microsoft.com",
- }
-
- req, err := http.NewRequest("GET", voicesListURL, nil)
- if err != nil {
- return nil, err
- }
-
- for k, v := range headers {
- req.Header.Set(k, v)
- }
-
- resp, err := client.Do(req)
- if err != nil {
- log.Error("failed to do request: ", err)
- return nil, err
- }
- defer resp.Body.Close()
-
- var result []interface{}
- err = json.NewDecoder(resp.Body).Decode(&result)
- if err != nil {
- return nil, err
- }
-
- // 将结果存储到缓存中
- voiceListCache = result
-
- return result, nil
-}
diff --git a/web/static/css/style.css b/web/static/css/style.css
new file mode 100644
index 0000000..5970b68
--- /dev/null
+++ b/web/static/css/style.css
@@ -0,0 +1,271 @@
+/* 基本样式重置 */
+* {
+ box-sizing: border-box;
+ margin: 0;
+ padding: 0;
+}
+
+body {
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
+ line-height: 1.6;
+ color: #333;
+ background-color: #f5f7fa;
+ padding: 20px;
+}
+
+/* 容器 */
+.container {
+ max-width: 1000px;
+ margin: 0 auto;
+}
+
+/* 页眉 */
+header {
+ text-align: center;
+ margin-bottom: 30px;
+ padding: 20px;
+}
+
+header h1 {
+ font-size: 2.5rem;
+ margin-bottom: 10px;
+ color: #2c3e50;
+}
+
+header p {
+ font-size: 1.2rem;
+ color: #7f8c8d;
+ margin-bottom: 20px;
+}
+
+/* 导航 */
+nav {
+ display: flex;
+ justify-content: center;
+ margin-top: 20px;
+}
+
+nav a {
+ text-decoration: none;
+ color: #3498db;
+ margin: 0 15px;
+ padding: 5px 10px;
+ border-radius: 5px;
+ transition: all 0.3s ease;
+}
+
+nav a:hover {
+ background-color: #3498db;
+ color: #fff;
+}
+
+nav a.active {
+ background-color: #3498db;
+ color: #fff;
+}
+
+/* 卡片 */
+.card {
+ background-color: #fff;
+ border-radius: 10px;
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+ padding: 25px;
+ margin-bottom: 25px;
+}
+
+/* 标题 */
+h2 {
+ color: #2c3e50;
+ margin-bottom: 20px;
+ border-bottom: 1px solid #ecf0f1;
+ padding-bottom: 10px;
+}
+
+h3 {
+ color: #3498db;
+ margin: 20px 0 10px;
+}
+
+/* 输入区域 */
+.input-group {
+ position: relative;
+ margin-bottom: 20px;
+}
+
+textarea {
+ width: 100%;
+ padding: 15px;
+ border: 1px solid #ddd;
+ border-radius: 5px;
+ resize: none;
+ font-size: 1rem;
+ font-family: inherit;
+}
+
+textarea:focus {
+ outline: none;
+ border-color: #3498db;
+ box-shadow: 0 0 0 2px rgba(52, 152, 219, 0.2);
+}
+
+.char-counter {
+ position: absolute;
+ bottom: 10px;
+ right: 10px;
+ font-size: 0.8rem;
+ color: #7f8c8d;
+}
+
+/* 设置区域 */
+.settings {
+ display: grid;
+ grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
+ gap: 20px;
+ margin-bottom: 20px;
+}
+
+.setting-group {
+ display: flex;
+ flex-direction: column;
+}
+
+label {
+ margin-bottom: 5px;
+ font-weight: bold;
+ color: #2c3e50;
+}
+
+select, input[type="range"] {
+ padding: 8px;
+ border: 1px solid #ddd;
+ border-radius: 5px;
+ background-color: #fff;
+}
+
+select:focus {
+ outline: none;
+ border-color: #3498db;
+}
+
+/* 按钮 */
+.actions {
+ display: flex;
+ justify-content: center;
+ margin-top: 20px;
+}
+
+button {
+ padding: 10px 20px;
+ border: none;
+ border-radius: 5px;
+ cursor: pointer;
+ font-size: 1rem;
+ transition: all 0.3s ease;
+}
+
+.primary-button {
+ background-color: #3498db;
+ color: #fff;
+}
+
+.primary-button:hover {
+ background-color: #2980b9;
+}
+
+.secondary-button {
+ background-color: #ecf0f1;
+ color: #2c3e50;
+ margin: 0 5px;
+}
+
+.secondary-button:hover {
+ background-color: #bdc3c7;
+}
+
+/* 音频播放器 */
+.audio-player {
+ display: flex;
+ flex-direction: column;
+ align-items: center;
+}
+
+audio {
+ width: 100%;
+ margin-bottom: 15px;
+}
+
+.audio-controls {
+ display: flex;
+ justify-content: center;
+}
+
+/* 表格 */
+table {
+ width: 100%;
+ border-collapse: collapse;
+ margin: 20px 0;
+}
+
+th, td {
+ padding: 12px 15px;
+ text-align: left;
+ border-bottom: 1px solid #ddd;
+}
+
+th {
+ background-color: #f8f9fa;
+ font-weight: bold;
+}
+
+/* 代码 */
+code, pre {
+ font-family: SFMono-Regular, Menlo, Monaco, Consolas, monospace;
+ background-color: #f8f9fa;
+ border-radius: 3px;
+ padding: 2px 5px;
+ font-size: 0.9rem;
+}
+
+pre {
+ padding: 15px;
+ overflow-x: auto;
+ margin: 15px 0;
+}
+
+pre code {
+ padding: 0;
+ background-color: transparent;
+}
+
+/* 页脚 */
+footer {
+ text-align: center;
+ margin-top: 40px;
+ padding: 20px;
+ color: #7f8c8d;
+ font-size: 0.9rem;
+}
+
+footer a {
+ color: #3498db;
+ text-decoration: none;
+}
+
+footer a:hover {
+ text-decoration: underline;
+}
+
+/* 响应式调整 */
+@media (max-width: 768px) {
+ .settings {
+ grid-template-columns: 1fr;
+ }
+
+ header h1 {
+ font-size: 2rem;
+ }
+
+ .card {
+ padding: 15px;
+ }
+}
\ No newline at end of file
diff --git a/web/static/js/app.js b/web/static/js/app.js
new file mode 100644
index 0000000..d84c5b4
--- /dev/null
+++ b/web/static/js/app.js
@@ -0,0 +1,176 @@
+document.addEventListener('DOMContentLoaded', function() {
+ // 获取DOM元素
+ const textInput = document.getElementById('text');
+ const voiceSelect = document.getElementById('voice');
+ const rateInput = document.getElementById('rate');
+ const rateValue = document.getElementById('rateValue');
+ const pitchInput = document.getElementById('pitch');
+ const pitchValue = document.getElementById('pitchValue');
+ const speakButton = document.getElementById('speak');
+ const downloadButton = document.getElementById('download');
+ const copyLinkButton = document.getElementById('copyLink');
+ const audioPlayer = document.getElementById('audioPlayer');
+ const resultSection = document.getElementById('resultSection');
+ const charCount = document.getElementById('charCount');
+
+ // 保存最后一个音频URL
+ let lastAudioUrl = '';
+
+ // 初始化
+ initVoicesList();
+ initEventListeners();
+
+ // 更新字符计数
+ textInput.addEventListener('input', function() {
+ charCount.textContent = this.value.length;
+ });
+
+ // 更新语速值显示
+ rateInput.addEventListener('input', function() {
+ const value = this.value;
+ rateValue.textContent = value + '%';
+ });
+
+ // 更新语调值显示
+ pitchInput.addEventListener('input', function() {
+ const value = this.value;
+ pitchValue.textContent = value + '%';
+ });
+
+ // 获取可用语音列表
+ async function initVoicesList() {
+ try {
+ const response = await fetch(`${config.basePath}/voices`);
+ if (!response.ok) throw new Error('获取语音列表失败');
+
+ const voices = await response.json();
+
+ // 清空并重建选项
+ voiceSelect.innerHTML = '';
+
+ // 按语言和名称分组
+ const voicesByLocale = {};
+
+ voices.forEach(voice => {
+ if (!voicesByLocale[voice.locale]) {
+ voicesByLocale[voice.locale] = [];
+ }
+ voicesByLocale[voice.locale].push(voice);
+ });
+
+ // 创建选项组
+ for (const locale in voicesByLocale) {
+ const optgroup = document.createElement('optgroup');
+ optgroup.label = voicesByLocale[locale][0].locale_name;
+
+ voicesByLocale[locale].forEach(voice => {
+ const option = document.createElement('option');
+ option.value = voice.short_name;
+ option.textContent = `${voice.local_name || voice.display_name} (${voice.gender})`;
+
+ // 如果是默认语音则选中
+ if (voice.short_name === config.defaultVoice) {
+ option.selected = true;
+ }
+
+ optgroup.appendChild(option);
+ });
+
+ voiceSelect.appendChild(optgroup);
+ }
+ } catch (error) {
+ console.error('获取语音列表失败:', error);
+ voiceSelect.innerHTML = '无法加载语音列表 ';
+ }
+ }
+
+ // 初始化事件监听器
+ function initEventListeners() {
+ // 转换按钮点击事件
+ speakButton.addEventListener('click', generateSpeech);
+
+ // 下载按钮点击事件
+ downloadButton.addEventListener('click', function() {
+ if (lastAudioUrl) {
+ const a = document.createElement('a');
+ a.href = lastAudioUrl;
+ a.download = 'speech.mp3';
+ document.body.appendChild(a);
+ a.click();
+ document.body.removeChild(a);
+ }
+ });
+
+ // 复制链接按钮点击事件
+ copyLinkButton.addEventListener('click', function() {
+ if (lastAudioUrl) {
+ navigator.clipboard.writeText(lastAudioUrl).then(() => {
+ alert('链接已复制到剪贴板');
+ }).catch(err => {
+ console.error('复制失败:', err);
+ // 兼容处理
+ const textArea = document.createElement('textarea');
+ textArea.value = lastAudioUrl;
+ document.body.appendChild(textArea);
+ textArea.focus();
+ textArea.select();
+
+ try {
+ document.execCommand('copy');
+ alert('链接已复制到剪贴板');
+ } catch (err) {
+ console.error('复制失败:', err);
+ }
+
+ document.body.removeChild(textArea);
+ });
+ }
+ });
+ }
+
+ // 生成语音
+ async function generateSpeech() {
+ const text = textInput.value.trim();
+ if (!text) {
+ alert('请输入要转换的文本');
+ return;
+ }
+
+ const voice = voiceSelect.value;
+ const rate = rateInput.value;
+ const pitch = pitchInput.value;
+
+ // 禁用按钮,显示加载状态
+ speakButton.disabled = true;
+ speakButton.textContent = '生成中...';
+
+ try {
+ // 构建URL参数
+ const params = new URLSearchParams({
+ t: text,
+ v: voice,
+ r: rate,
+ p: pitch
+ });
+
+ const url = `${config.basePath}/tts?${params.toString()}`;
+
+ // 更新音频播放器
+ audioPlayer.src = url;
+ lastAudioUrl = url;
+
+ // 显示结果区域
+ resultSection.style.display = 'block';
+
+ // 播放音频
+ audioPlayer.play();
+ } catch (error) {
+ console.error('生成语音失败:', error);
+ alert('生成语音失败,请重试');
+ } finally {
+ // 恢复按钮状态
+ speakButton.disabled = false;
+ speakButton.textContent = '转换为语音';
+ }
+ }
+});
\ No newline at end of file
diff --git a/web/templates/api-doc.html b/web/templates/api-doc.html
new file mode 100644
index 0000000..55023e5
--- /dev/null
+++ b/web/templates/api-doc.html
@@ -0,0 +1,310 @@
+
+
+
+
+
+ API文档 - TTS服务
+
+
+
+
+
+
+ TTS服务 API文档
+ 快速、高质量的文本转语音API服务
+
+ 主页
+ API文档
+
+
+
+
+
+ API概述
+ TTS服务API提供了简单而强大的方式将文本转换为自然语音。我们支持多种语言和声音,并允许您调节语速、语调以适应不同场景需求。
+ 基础URL: {{.BasePath}}
+ 所有API请求均使用HTTP协议,返回标准HTTP状态码表示请求结果。
+
+
+
+ 文本转语音 API
+ 端点
+ GET {{.BasePath}}/tts
+
+ 参数
+
+
+
+ 参数
+ 类型
+ 必选
+ 描述
+
+
+
+
+ t
+ string
+ 是
+ 要转换的文本(需要进行URL编码)
+
+
+ v
+ string
+ 否
+ 语音名称,使用short_name格式,默认: {{.DefaultVoice}}。可通过/voices接口获取所有可用语音
+
+
+ r
+ string
+ 否
+ 语速调整,范围: -100%到100%,默认: {{.DefaultRate}}。正值加快语速,负值减慢语速
+
+
+ p
+ string
+ 否
+ 语调调整,范围: -100%到100%,默认: {{.DefaultPitch}}。正值提高语调,负值降低语调
+
+
+ o
+ string
+ 否
+ 输出音频格式,默认: {{.DefaultFormat}}。详见下方支持的格式列表
+
+
+ s
+ string
+ 否
+ 情感风格,可用值取决于所选语音的style_list属性。例如:"cheerful"、"sad"等
+
+
+
+
+ 示例请求
+ curl "{{.BasePath}}/tts?t=%E4%BD%A0%E5%A5%BD%EF%BC%8C%E4%B8%96%E7%95%8C&v=zh-CN-XiaoxiaoNeural&r=0%25&p=0%25"
+
+ 另一个示例(带情感风格)
+ curl "{{.BasePath}}/tts?t=%E4%BB%8A%E5%A4%A9%E5%A4%A9%E6%B0%94%E7%9C%9F%E5%A5%BD&v=zh-CN-XiaoxiaoNeural&s=cheerful"
+
+ 响应
+ 返回音频文件,内容类型取决于请求的输出格式。正常响应状态码为200。
+
+ 错误响应
+ 如果请求参数有误或服务出现问题,将返回对应的HTTP错误码和错误消息。
+
+
+
+ 状态码
+ 描述
+
+
+
+
+ 400
+ 参数错误或缺失必要参数
+
+
+ 404
+ 请求的资源不存在
+
+
+ 500
+ 服务器内部错误
+
+
+
+
+
+
+ 获取可用语音 API
+ 端点
+ GET {{.BasePath}}/voices
+
+ 参数
+
+
+
+ 参数
+ 类型
+ 必选
+ 描述
+
+
+
+
+ locale
+ string
+ 否
+ 筛选特定语言的语音,例如:zh-CN(中文)、en-US(英文)
+
+
+ gender
+ string
+ 否
+ 筛选特定性别的语音,可选值:Male(男性)、Female(女性)
+
+
+
+
+ 示例请求
+ curl "{{.BasePath}}/voices?locale=zh-CN&gender=Female"
+
+ 响应
+ 返回JSON格式的可用语音列表:
+ [
+ {
+ "name": "Microsoft Server Speech Text to Speech Voice (zh-CN, XiaoxiaoNeural)",
+ "display_name": "Xiaoxiao",
+ "local_name": "晓晓",
+ "short_name": "zh-CN-XiaoxiaoNeural",
+ "gender": "Female",
+ "locale": "zh-CN",
+ "locale_name": "中文(中国)",
+ "style_list": ["cheerful", "sad", "angry", "fearful", "disgruntled"]
+ },
+ ...
+]
+ 响应字段说明:
+
+ name :语音的完整名称
+ display_name :显示用名称(拉丁字符)
+ local_name :本地化名称
+ short_name :简短名称(用于API调用的v参数)
+ gender :性别(Male或Female)
+ locale :语言代码
+ locale_name :语言本地化名称
+ style_list :支持的情感风格列表(如有)
+
+
+
+
+ 兼容OpenAI接口 API
+ 语音合成
+ POST {{.BasePath}}/v1/audio/speech
+
+ 请求体 (JSON)
+
+
+
+ 参数
+ 类型
+ 必选
+ 描述
+
+
+
+
+ model
+ string
+ 是
+ 当前仅支持值: "tts-1"
+
+
+ input
+ string
+ 是
+ 要转换的文本内容
+
+
+ voice
+ string
+ 是
+ 声音名称,使用Microsoft语音格式,例如:ja-JP-KeitaNeural、zh-CN-XiaoxiaoNeural
+
+
+ speed
+ number
+ 否
+ 语速调整,范围: 0.5到2.0,默认: 1.0
+
+
+
+
+ 示例请求
+ curl -X POST "{{.BasePath}}/v1/audio/speech" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "tts-1",
+ "input": "你好,世界!",
+ "voice": "zh-CN-XiaoxiaoNeural"
+ }'
+
+ 另一个示例(带速度调整)
+ curl -X POST "{{.BasePath}}/v1/audio/speech" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "tts-1",
+ "input": "こんにちは、世界!",
+ "voice": "ja-JP-NanamiNeural",
+ "speed": 1.2
+ }'
+
+ 响应
+ 返回音频文件,内容类型取决于请求的输出格式。正常响应状态码为200。
+
+ 错误响应
+ 如果请求有误,将返回JSON格式的错误信息:
+ {
+ "error": {
+ "message": "错误信息描述",
+ "type": "错误类型",
+ "code": "错误代码"
+ }
+}
+
+
+
+ 支持的输出格式
+
+
+
+ 格式名称
+ 描述
+
+
+
+
+ audio-16khz-32kbitrate-mono-mp3
+ MP3格式,16kHz, 32kbps
+
+
+ audio-16khz-64kbitrate-mono-mp3
+ MP3格式,16kHz, 64kbps
+
+
+ audio-16khz-128kbitrate-mono-mp3
+ MP3格式,16kHz, 128kbps
+
+
+ audio-24khz-48kbitrate-mono-mp3
+ MP3格式,24kHz, 48kbps
+
+
+ audio-24khz-96kbitrate-mono-mp3
+ MP3格式,24kHz, 96kbps
+
+
+ audio-24khz-160kbitrate-mono-mp3
+ MP3格式,24kHz, 160kbps
+
+
+ riff-16khz-16bit-mono-pcm
+ WAV格式,16kHz
+
+
+ riff-24khz-16bit-mono-pcm
+ WAV格式,24kHz
+
+
+
+
+
+
+
+
+
+
+
diff --git a/web/templates/index.html b/web/templates/index.html
new file mode 100644
index 0000000..6bb5bb4
--- /dev/null
+++ b/web/templates/index.html
@@ -0,0 +1,83 @@
+
+
+
+
+
+ 文本转语音 - TTS服务
+
+
+
+
+
+
+ 文本转语音 (TTS)
+ 将文本转换为自然流畅的语音
+
+ 主页
+ API文档
+
+
+
+
+
+ 输入文本
+
+
+
+
+ 语音:
+
+ 加载中...
+
+
+
+
+ 语速:
+
+ 0%
+
+
+
+ 语调:
+
+ 0%
+
+
+
+
+ 转换为语音
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/templates/worker.js b/web/templates/worker.js
similarity index 100%
rename from templates/worker.js
rename to web/templates/worker.js