diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f597e4b --- /dev/null +++ b/.gitignore @@ -0,0 +1,27 @@ +### Go template +# If you prefer the allow list template instead of the deny list, see community template: +# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore +# +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Dependency directories (remove the comment below to include it) +# vendor/ + +# Go workspace file +go.work +go.work.sum + +# env file +.env + diff --git a/.idea/git_toolbox_blame.xml b/.idea/git_toolbox_blame.xml new file mode 100644 index 0000000..7dc1249 --- /dev/null +++ b/.idea/git_toolbox_blame.xml @@ -0,0 +1,6 @@ + + + + + \ No newline at end of file diff --git a/cmd/api/main.go b/cmd/api/main.go new file mode 100644 index 0000000..7887c1a --- /dev/null +++ b/cmd/api/main.go @@ -0,0 +1,58 @@ +package main + +import ( + "flag" + "log" + "os" + "path/filepath" + + "tts/internal/http/server" +) + +func main() { + // 解析命令行参数 + configPath := flag.String("config", "", "配置文件路径") + flag.Parse() + + // 如果没有指定配置文件,尝试默认位置 + if *configPath == "" { + // 尝试多个位置查找配置文件 + possiblePaths := []string{ + "./configs/config.yaml", + "../configs/config.yaml", + "/etc/tts/config.yaml", + } + + for _, path := range possiblePaths { + if _, err := os.Stat(path); err == nil { + *configPath = path + break + } + } + + // 如果还是没找到,使用默认位置 + if *configPath == "" { + *configPath = "./configs/config.yaml" + } + } + + // 确保配置文件路径是绝对路径 + absConfigPath, err := filepath.Abs(*configPath) + if err != nil { + log.Fatalf("无法获取配置文件的绝对路径: %v", err) + } + + // 打印使用的配置文件路径 + log.Printf("使用配置文件: %s", absConfigPath) + + // 创建并启动应用 + app, err := server.NewApp(absConfigPath) + if err != nil { + log.Fatalf("初始化应用失败: %v", err) + } + + // 启动应用并处理错误 + if err := app.Start(); err != nil { + log.Fatalf("应用运行出错: %v", err) + } +} diff --git a/configs/config.yaml b/configs/config.yaml new file mode 100644 index 0000000..b60501e --- /dev/null +++ b/configs/config.yaml @@ -0,0 +1,27 @@ +server: + port: 8080 + read_timeout: 30 + write_timeout: 30 + base_path: "" + +tts: + region: "eastasia" + default_voice: "zh-CN-XiaoxiaoNeural" + default_rate: "0" + default_pitch: "0" + default_format: "audio-24khz-48kbitrate-mono-mp3" + max_text_length: 65535 + request_timeout: 30 + max_concurrent: 10 + segment_threshold: 300 + min_sentence_length: 200 + max_sentence_length: 300 + + # OpenAI 到微软 TTS 中文语音的映射 + voice_mapping: + alloy: "zh-CN-XiaoyiNeural" # 中性女声 + echo: "zh-CN-YunxiNeural" # 年轻男声 + fable: "zh-CN-XiaochenNeural" # 儿童声 + onyx: "zh-CN-YunjianNeural" # 成熟男声 + nova: "zh-CN-XiaohanNeural" # 活力女声 + shimmer: "zh-CN-XiaomoNeural" # 温柔女声 diff --git a/go.mod b/go.mod index d52b110..a78185a 100644 --- a/go.mod +++ b/go.mod @@ -3,36 +3,12 @@ module tts go 1.22 require ( - github.com/gin-gonic/gin v1.10.0 github.com/google/uuid v1.6.0 github.com/sirupsen/logrus v1.9.3 + gopkg.in/yaml.v3 v3.0.1 ) require ( - github.com/bytedance/sonic v1.11.6 // indirect - github.com/bytedance/sonic/loader v0.1.1 // indirect - github.com/cloudwego/base64x v0.1.4 // indirect - github.com/cloudwego/iasm v0.2.0 // indirect - github.com/gabriel-vasile/mimetype v1.4.3 // indirect - github.com/gin-contrib/sse v0.1.0 // indirect - github.com/go-playground/locales v0.14.1 // indirect - github.com/go-playground/universal-translator v0.18.1 // indirect - github.com/go-playground/validator/v10 v10.20.0 // indirect - github.com/goccy/go-json v0.10.2 // indirect - github.com/json-iterator/go v1.1.12 // indirect - github.com/klauspost/cpuid/v2 v2.2.7 // indirect - github.com/leodido/go-urn v1.4.0 // indirect - github.com/mattn/go-isatty v0.0.20 // indirect - github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect - github.com/modern-go/reflect2 v1.0.2 // indirect - github.com/pelletier/go-toml/v2 v2.2.2 // indirect - github.com/twitchyliquid64/golang-asm v0.15.1 // indirect - github.com/ugorji/go/codec v1.2.12 // indirect - golang.org/x/arch v0.8.0 // indirect - golang.org/x/crypto v0.23.0 // indirect - golang.org/x/net v0.25.0 // indirect + github.com/stretchr/testify v1.9.0 // indirect golang.org/x/sys v0.20.0 // indirect - golang.org/x/text v0.15.0 // indirect - google.golang.org/protobuf v1.34.1 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 8ba6d3d..57c0620 100644 --- a/go.sum +++ b/go.sum @@ -1,124 +1,21 @@ -github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM= -github.com/bytedance/sonic v1.10.0-rc/go.mod h1:ElCzW+ufi8qKqNW0FY314xriJhyJhuoJ3gFZdAHF7NM= -github.com/bytedance/sonic v1.11.4 h1:8+OMLSSDDm2/qJc6ld5K5Sm62NK9VHcUKk0NzBoMAM4= -github.com/bytedance/sonic v1.11.4/go.mod h1:YrWEqYtlBPS6LUA0vpuG79a1trsh4Ae41uWUWUreHhE= -github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0= -github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4= -github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM= -github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU= -github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= -github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk= -github.com/chenzhuoyu/iasm v0.9.0/go.mod h1:Xjy2NpN3h7aUqeqM+woSuuvxmIe6+DDsiNLIrkAmYog= -github.com/cloudwego/base64x v0.1.0 h1:Tg5q9tq1khq9Y9UwfoC6zkHK0FypN2GLDvhqFceOL8U= -github.com/cloudwego/base64x v0.1.0/go.mod h1:lM8nFiNbg74QgesNo6EAtv8N9tlRjBWExmHoNDa3PkU= -github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y= -github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w= -github.com/cloudwego/iasm v0.0.9/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY= -github.com/cloudwego/iasm v0.1.1 h1:Py/XoYVR3xFd2pXmvmOnoS5vHTlYT9SnGK28ES8JOIk= -github.com/cloudwego/iasm v0.1.1/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY= -github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg= -github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0= -github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk= -github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= -github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= -github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg= -github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU= -github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU= -github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y= -github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= -github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= -github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= -github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= -github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= -github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= -github.com/go-playground/validator/v10 v10.19.0 h1:ol+5Fu+cSq9JD7SoSqe04GMI92cbn0+wvQ3bZ8b/AU4= -github.com/go-playground/validator/v10 v10.19.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM= -github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8= -github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM= -github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= -github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= -github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= -github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= -github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= -github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM= -github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= -github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M= -github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= -github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= -github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= -github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= -github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= -github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= -github.com/pelletier/go-toml/v2 v2.2.1 h1:9TA9+T8+8CUCO2+WYnDLCgrYi9+omqKXyjDtosvtEhg= -github.com/pelletier/go-toml/v2 v2.2.1/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= -github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= -github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= -github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= -github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= -github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= -github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= -golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= -golang.org/x/arch v0.7.0 h1:pskyeJh/3AmoQ8CPE95vxHLqp1G1GfGNXTmcl9NEKTc= -golang.org/x/arch v0.7.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= -golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc= -golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= -golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30= -golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M= -golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI= -golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= -golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w= -golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= -golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= -golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= -golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= -golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= -google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= -google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg= -google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50= -rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= diff --git a/handlers/handlers.go b/handlers/handlers.go deleted file mode 100644 index 8feabf9..0000000 --- a/handlers/handlers.go +++ /dev/null @@ -1,119 +0,0 @@ -package handlers - -import ( - "github.com/gin-gonic/gin" - "net/http" - "strings" - "tts/utils" -) - -func GetVoiceList(c *gin.Context) { - locale := c.Query("l") - voices, err := utils.VoiceList() - if err != nil { - c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) - return - } - - if locale != "" { - filteredVoices := make([]interface{}, 0) - for _, voice := range voices { - if strings.Contains(voice.(map[string]interface{})["Locale"].(string), locale) { - filteredVoices = append(filteredVoices, voice) - } - } - voices = filteredVoices - } - - _, detail := c.GetQuery("d") - if detail { - c.JSON(http.StatusOK, gin.H{"voices": voices}) - } else { - voiceSimpleList := make([]map[string]string, 0) - for _, voice := range voices { - localName := voice.(map[string]interface{})["LocalName"].(string) - shortName := voice.(map[string]interface{})["ShortName"].(string) - voiceSimpleList = append(voiceSimpleList, map[string]string{ - "LocalName": localName, - "ShortName": shortName, - }) - } - c.JSON(http.StatusOK, gin.H{"voices": voiceSimpleList}) - } - -} - -func SynthesizeVoice(c *gin.Context) { - text := c.Query("t") - voiceName := c.DefaultQuery("v", "zh-CN-XiaoxiaoMultilingualNeural") - rate := c.DefaultQuery("r", "0") - pitch := c.DefaultQuery("p", "0") - outputFormat := c.DefaultQuery("o", "audio-24khz-48kbitrate-mono-mp3") - - voice, err := utils.GetVoice(text, voiceName, rate, pitch, outputFormat, c.Query("s")) - if err != nil { - c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) - return - } - - c.Data(http.StatusOK, "audio/mpeg", voice) -} - -func Index(c *gin.Context) { - c.HTML(http.StatusOK, "index.html", gin.H{ - "title": "TTS", - }) -} - -func ApiDoc(c *gin.Context) { - c.HTML(http.StatusOK, "api-doc.html", gin.H{ - "title": "TTS", - }) -} - -type SynthesizeVoiceRequest struct { - Text string `json:"t"` - VoiceName string `json:"v"` - Rate string `json:"r"` - Pitch string `json:"p"` - OutputFormat string `json:"o"` - Style string `json:"s"` -} - -type SynthesizeVoiceOpenAIRequest struct { - Model string `json:"model"` - Input string `json:"input"` - Voice string `json:"voice"` -} - -func SynthesizeVoicePost(c *gin.Context) { - var request SynthesizeVoiceRequest - if err := c.BindJSON(&request); err != nil { - c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) - return - } - - voice, err := utils.GetVoice(request.Text, request.VoiceName, request.Rate, request.Pitch, request.OutputFormat, request.Style) - if err != nil { - c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) - return - } - - c.Data(http.StatusOK, "audio/mpeg", voice) -} - -func SynthesizeVoiceOpenAI(c *gin.Context) { - var request SynthesizeVoiceOpenAIRequest - if err := c.BindJSON(&request); err != nil { - c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) - return - } - - voice, err := utils.GetVoice(request.Input, request.Voice, c.Query("r"), c.Query("p"), c.Query("o"), c.Query("s")) - - if err != nil { - c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) - return - } - c.Data(http.StatusOK, "audio/mpeg", voice) -} diff --git a/internal/config/config.go b/internal/config/config.go new file mode 100644 index 0000000..d062940 --- /dev/null +++ b/internal/config/config.go @@ -0,0 +1,129 @@ +package config + +import ( + "fmt" + "os" + "sync" + + "gopkg.in/yaml.v3" +) + +// Config 包含应用程序的所有配置 +type Config struct { + Server ServerConfig `yaml:"server"` + TTS TTSConfig `yaml:"tts"` +} + +// ServerConfig 包含HTTP服务器配置 +type ServerConfig struct { + Port int `yaml:"port"` + ReadTimeout int `yaml:"read_timeout"` // 单位:秒 + WriteTimeout int `yaml:"write_timeout"` // 单位:秒 + BasePath string `yaml:"base_path"` +} + +// TTSConfig 包含Microsoft TTS API配置 +type TTSConfig struct { + APIKey string `yaml:"api_key"` + Region string `yaml:"region"` + DefaultVoice string `yaml:"default_voice"` + DefaultRate string `yaml:"default_rate"` + DefaultPitch string `yaml:"default_pitch"` + DefaultFormat string `yaml:"default_format"` + MaxTextLength int `yaml:"max_text_length"` + RequestTimeout int `yaml:"request_timeout"` // 单位:秒 + MaxConcurrent int `yaml:"max_concurrent"` + SegmentThreshold int `yaml:"segment_threshold"` + MinSentenceLength int `yaml:"min_sentence_length"` + MaxSentenceLength int `yaml:"max_sentence_length"` + VoiceMapping map[string]string `yaml:"voice_mapping"` // OpenAI声音到Azure声音的映射 +} + +var ( + config Config + once sync.Once +) + +// Load 从指定路径加载配置文件 +func Load(configPath string) (*Config, error) { + var err error + once.Do(func() { + // 设置默认配置 + setDefaults() + + // 从配置文件加载 + if configPath != "" { + err = loadFromFile(configPath) + if err != nil { + err = fmt.Errorf("加载配置文件失败: %w", err) + return + } + } + + // 从环境变量覆盖 + overrideFromEnv() + }) + + if err != nil { + return nil, err + } + + return &config, nil +} + +// 设置默认配置值 +func setDefaults() { + config = Config{ + Server: ServerConfig{ + Port: 8080, + ReadTimeout: 30, + WriteTimeout: 30, + BasePath: "", + }, + TTS: TTSConfig{ + DefaultVoice: "zh-CN-XiaoxiaoNeural", + DefaultRate: "0%", + DefaultPitch: "0%", + DefaultFormat: "audio-24khz-48kbitrate-mono-mp3", + MaxTextLength: 5000, + RequestTimeout: 30, + MaxConcurrent: 10, + SegmentThreshold: 500, + MinSentenceLength: 200, + MaxSentenceLength: 300, + VoiceMapping: make(map[string]string), + }, + } +} + +// 从配置文件加载配置 +func loadFromFile(path string) error { + data, err := os.ReadFile(path) + if err != nil { + return err + } + + return yaml.Unmarshal(data, &config) +} + +// 从环境变量中覆盖配置 +func overrideFromEnv() { + if port := os.Getenv("TTS_SERVER_PORT"); port != "" { + fmt.Sscanf(port, "%d", &config.Server.Port) + } + + if apiKey := os.Getenv("TTS_API_KEY"); apiKey != "" { + config.TTS.APIKey = apiKey + } + + if region := os.Getenv("TTS_API_REGION"); region != "" { + config.TTS.Region = region + } + + // 可以添加更多环境变量覆盖 +} + +// Get 返回已加载的配置 +func Get() *Config { + return &config +} diff --git a/internal/http/handlers/pages.go b/internal/http/handlers/pages.go new file mode 100644 index 0000000..71f4898 --- /dev/null +++ b/internal/http/handlers/pages.go @@ -0,0 +1,76 @@ +package handlers + +import ( + "html/template" + "net/http" + "path/filepath" + + "tts/internal/config" +) + +// PagesHandler 处理页面请求 +type PagesHandler struct { + templates *template.Template + config *config.Config +} + +// NewPagesHandler 创建一个新的页面处理器 +func NewPagesHandler(templatesDir string, cfg *config.Config) (*PagesHandler, error) { + // 解析所有模板文件 + templates, err := template.ParseGlob(filepath.Join(templatesDir, "*.html")) + if err != nil { + return nil, err + } + + return &PagesHandler{ + templates: templates, + config: cfg, + }, nil +} + +// HandleIndex 处理首页请求 +func (h *PagesHandler) HandleIndex(w http.ResponseWriter, r *http.Request) { + // 如果不是根路径,返回404 + if r.URL.Path != "/" && r.URL.Path != "/index.html" { + http.NotFound(w, r) + return + } + + // 准备模板数据 + data := map[string]interface{}{ + "BasePath": h.config.Server.BasePath, + "DefaultVoice": h.config.TTS.DefaultVoice, + "DefaultRate": h.config.TTS.DefaultRate, + "DefaultPitch": h.config.TTS.DefaultPitch, + } + + // 设置内容类型 + w.Header().Set("Content-Type", "text/html; charset=utf-8") + + // 渲染模板 + if err := h.templates.ExecuteTemplate(w, "index.html", data); err != nil { + http.Error(w, "模板渲染失败: "+err.Error(), http.StatusInternalServerError) + return + } +} + +// HandleAPIDoc 处理API文档请求 +func (h *PagesHandler) HandleAPIDoc(w http.ResponseWriter, r *http.Request) { + // 准备模板数据 + data := map[string]interface{}{ + "BasePath": h.config.Server.BasePath, + "DefaultVoice": h.config.TTS.DefaultVoice, + "DefaultRate": h.config.TTS.DefaultRate, + "DefaultPitch": h.config.TTS.DefaultPitch, + "DefaultFormat": h.config.TTS.DefaultFormat, + } + + // 设置内容类型 + w.Header().Set("Content-Type", "text/html; charset=utf-8") + + // 渲染模板 + if err := h.templates.ExecuteTemplate(w, "api-doc.html", data); err != nil { + http.Error(w, "模板渲染失败: "+err.Error(), http.StatusInternalServerError) + return + } +} diff --git a/internal/http/handlers/tts.go b/internal/http/handlers/tts.go new file mode 100644 index 0000000..8cf45b9 --- /dev/null +++ b/internal/http/handlers/tts.go @@ -0,0 +1,553 @@ +package handlers + +import ( + "encoding/json" + "fmt" + "log" + "net/http" + "os" + "os/exec" + "path/filepath" + "strings" + "sync" + "time" + "tts/internal/config" + "tts/internal/models" + "tts/internal/tts" + "unicode/utf8" +) + +// TTSHandler 处理TTS请求 +type TTSHandler struct { + ttsService tts.Service + config *config.Config +} + +// NewTTSHandler 创建一个新的TTS处理器 +func NewTTSHandler(service tts.Service, cfg *config.Config) *TTSHandler { + return &TTSHandler{ + ttsService: service, + config: cfg, + } +} + +// HandleOpenAITTS 处理OpenAI兼容的TTS请求 +func (h *TTSHandler) HandleOpenAITTS(w http.ResponseWriter, r *http.Request) { + // 记录请求开始时间 + startTime := time.Now() + + // 只支持POST请求 + if r.Method != http.MethodPost { + http.Error(w, "仅支持POST请求", http.StatusMethodNotAllowed) + return + } + + // 解析请求 + var openaiReq struct { + Model string `json:"model"` + Input string `json:"input"` + Voice string `json:"voice"` + Speed float64 `json:"speed"` + } + + if err := json.NewDecoder(r.Body).Decode(&openaiReq); err != nil { + http.Error(w, "无效的JSON请求: "+err.Error(), http.StatusBadRequest) + return + } + + // 记录解析时间 + parseTime := time.Since(startTime) + + // 检查必需字段 + if openaiReq.Input == "" { + http.Error(w, "input字段不能为空", http.StatusBadRequest) + return + } + + // 映射OpenAI声音到Microsoft声音 + msVoice := h.config.TTS.DefaultVoice + if openaiReq.Voice != "" { + // 检查是否有配置映射 + if mappedVoice, exists := h.config.TTS.VoiceMapping[openaiReq.Voice]; exists { + msVoice = mappedVoice + } + } + + // 转换速度参数到微软格式 + msRate := h.config.TTS.DefaultRate + if openaiReq.Speed != 0 { + // OpenAI速度转换为微软速度格式 + // OpenAI: 0.5(慢速), 1.0(正常), 2.0(快速) + // 微软: "-50%"(慢), "+0%"(中), "+100%"(快) + speedPercentage := (openaiReq.Speed - 1.0) * 100 + if speedPercentage >= 0 { + msRate = fmt.Sprintf("+%.0f", speedPercentage) + } else { + msRate = fmt.Sprintf("%.0f", speedPercentage) + } + } + + // 创建内部TTS请求 + req := models.TTSRequest{ + Text: openaiReq.Input, + Voice: msVoice, + Rate: msRate, + Pitch: h.config.TTS.DefaultPitch, + } + + log.Printf("OpenAI TTS请求: model=%s, voice=%s → %s, speed=%.2f → %s, 文本长度=%d", + openaiReq.Model, openaiReq.Voice, msVoice, openaiReq.Speed, msRate, len(req.Text)) + + // 检查文本长度 + if len(req.Text) > h.config.TTS.MaxTextLength { + http.Error(w, "文本长度超过限制", http.StatusBadRequest) + return + } + + // 检查是否需要分段处理 + segmentThreshold := h.config.TTS.SegmentThreshold + if len(req.Text) > segmentThreshold && len(req.Text) <= h.config.TTS.MaxTextLength { + log.Printf("文本长度 %d 超过阈值 %d,使用分段处理", len(req.Text), segmentThreshold) + // 使用分段处理 + h.handleSegmentedTTS(w, r, req) + return + } + + // 非流式模式处理 + synthStart := time.Now() + resp, err := h.ttsService.SynthesizeSpeech(r.Context(), req) + synthTime := time.Since(synthStart) + log.Printf("TTS合成耗时: %v, 文本长度: %d", synthTime, len(req.Text)) + + if err != nil { + http.Error(w, "语音合成失败: "+err.Error(), http.StatusInternalServerError) + return + } + + // 设置响应 + w.Header().Set("Content-Type", "audio/mpeg") + writeStart := time.Now() + w.Write(resp.AudioContent) + writeTime := time.Since(writeStart) + + // 记录总耗时 + totalTime := time.Since(startTime) + log.Printf("OpenAI TTS请求总耗时: %v (解析: %v, 合成: %v, 写入: %v), 音频大小: %s", + totalTime, parseTime, synthTime, writeTime, formatFileSize(len(resp.AudioContent))) +} + +// HandleTTS 处理TTS请求 +func (h *TTSHandler) HandleTTS(w http.ResponseWriter, r *http.Request) { + // 记录请求开始时间 + startTime := time.Now() + + // 解析请求参数 + var req models.TTSRequest + + switch r.Method { + case http.MethodGet: + // 从URL参数获取 + q := r.URL.Query() + req = models.TTSRequest{ + Text: q.Get("t"), + Voice: q.Get("v"), + Rate: q.Get("r"), + Pitch: q.Get("p"), + } + case http.MethodPost: + // 从POST JSON体获取 + if r.Header.Get("Content-Type") == "application/json" { + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + log.Printf("JSON解析错误: %v", err) + http.Error(w, "无效的JSON请求", http.StatusBadRequest) + return + } + } else { + // 表单数据 + if err := r.ParseForm(); err != nil { + log.Printf("表单解析错误: %v", err) + http.Error(w, "无法解析表单数据", http.StatusBadRequest) + return + } + req = models.TTSRequest{ + Text: r.FormValue("text"), + Voice: r.FormValue("voice"), + Rate: r.FormValue("rate"), + Pitch: r.FormValue("pitch"), + } + } + default: + log.Printf("不支持的HTTP方法: %s", r.Method) + http.Error(w, "仅支持GET和POST请求", http.StatusMethodNotAllowed) + return + } + + // 记录参数解析耗时 + parseTime := time.Since(startTime) + log.Printf("请求参数解析耗时: %v", parseTime) + + // 验证必要参数 + if req.Text == "" { + log.Print("错误: 未提供文本参数") + http.Error(w, "必须提供文本参数", http.StatusBadRequest) + return + } + + // 使用默认值填充空白参数 + if req.Voice == "" { + req.Voice = h.config.TTS.DefaultVoice + } + if req.Rate == "" { + req.Rate = h.config.TTS.DefaultRate + } + if req.Pitch == "" { + req.Pitch = h.config.TTS.DefaultPitch + } + + // 检查文本长度 + if len(req.Text) > h.config.TTS.MaxTextLength { + http.Error(w, "文本长度超过限制", http.StatusBadRequest) + return + } + + // 检查是否需要分段处理 + segmentThreshold := h.config.TTS.SegmentThreshold + if len(req.Text) > segmentThreshold && len(req.Text) <= h.config.TTS.MaxTextLength { + log.Printf("文本长度 %d 超过阈值 %d,使用分段处理", len(req.Text), segmentThreshold) + // 如果文本长度超过阈值但小于最大限制,使用分段处理 + h.handleSegmentedTTS(w, r, req) + return + } + + // 非流式模式处理(保持原有逻辑) + synthStart := time.Now() + resp, err := h.ttsService.SynthesizeSpeech(r.Context(), req) + synthTime := time.Since(synthStart) + log.Printf("TTS合成耗时: %v, 文本长度: %d", synthTime, len(req.Text)) + + if err != nil { + http.Error(w, "语音合成失败: "+err.Error(), http.StatusInternalServerError) + return + } + + // 设置响应 + w.Header().Set("Content-Type", "audio/mpeg") + writeStart := time.Now() + w.Write(resp.AudioContent) + writeTime := time.Since(writeStart) + + // 记录总耗时 + totalTime := time.Since(startTime) + log.Printf("TTS请求总耗时: %v (解析: %v, 合成: %v, 写入: %v), 音频大小: %s", + totalTime, parseTime, synthTime, writeTime, formatFileSize(len(resp.AudioContent))) +} + +// handleSegmentedTTS 处理长文本的分段TTS请求 +func (h *TTSHandler) handleSegmentedTTS(w http.ResponseWriter, r *http.Request, req models.TTSRequest) { + segmentStart := time.Now() // 分段处理开始时间 + text := req.Text + + // 开始计时:分割文本 + splitStart := time.Now() + // 按句子分段处理 + sentences := splitTextBySentences(text) + segmentCount := len(sentences) + splitTime := time.Since(splitStart) + + log.Printf("分割文本耗时: %v, 文本总长度: %d, 分段数: %d, 平均句子长度: %.2f", + splitTime, len(text), segmentCount, float64(len(text))/float64(segmentCount)) + + // 创建用于存储每段音频的切片 + results := make([][]byte, segmentCount) + errChan := make(chan error, segmentCount) + var wg sync.WaitGroup + + // 限制并发数量,避免创建过多goroutine + maxConcurrent := h.config.TTS.MaxConcurrent + semaphore := make(chan struct{}, maxConcurrent) + + // 用于记录每个分段处理的时间 + segmentTimes := make([]time.Duration, segmentCount) + + // 合成阶段开始时间 + synthesisStart := time.Now() + + // 并发处理每一个句子 + for i := 0; i < segmentCount; i++ { + wg.Add(1) + semaphore <- struct{}{} // 获取信号量 + go func(index int) { + defer wg.Done() + defer func() { <-semaphore }() // 释放信号量 + + // 创建该句的请求 + segReq := models.TTSRequest{ + Text: sentences[index], + Voice: req.Voice, + Rate: req.Rate, + Pitch: req.Pitch, + } + + log.Printf("开始处理句子 #%d: 长度=%d, 内容='%s'", + index+1, + utf8.RuneCountInString(sentences[index]), + truncateForLog(sentences[index], 20)) + + // 记录该段合成开始时间 + segStart := time.Now() + + // 合成该段音频 + resp, err := h.ttsService.SynthesizeSpeech(r.Context(), segReq) + + // 记录该段合成耗时 + segTime := time.Since(segStart) + segmentTimes[index] = segTime + + if err != nil { + log.Printf("句子 #%d 合成失败,耗时: %v, 错误: %v", index+1, segTime, err) + select { + case errChan <- fmt.Errorf("句子 %d 合成失败: %w", index+1, err): + default: + // 已经有错误了,忽略 + } + return + } + + log.Printf("句子 #%d 合成成功:长度=%d, 耗时=%v, 音频大小=%s", + index+1, utf8.RuneCountInString(sentences[index]), segTime, formatFileSize(len(resp.AudioContent))) + + // 存储该段结果 + results[index] = resp.AudioContent + }(i) + } + + // 等待所有goroutine完成 + wg.Wait() + close(errChan) + + // 记录所有分段合成总耗时 + synthesisTime := time.Since(synthesisStart) + log.Printf("所有分段合成总耗时: %v, 平均每段耗时: %v", + synthesisTime, synthesisTime/time.Duration(segmentCount)) + + // 检查是否有错误发生 + if err := <-errChan; err != nil { + http.Error(w, "语音合成失败: "+err.Error(), http.StatusInternalServerError) + return + } + + // 记录写入开始时间 + writeStart := time.Now() + + var audioData []byte + var err error + + audioData, err = audioMerge(results) + + if err != nil { + log.Printf("合并音频失败: %v", err) + http.Error(w, "音频合并失败: "+err.Error(), http.StatusInternalServerError) + return + } + + // 设置响应内容类型 + w.Header().Set("Content-Type", "audio/mpeg") + + // 写入合并后的音频数据 + totalSize := len(audioData) + if _, writeErr := w.Write(audioData); writeErr != nil { + log.Printf("写入响应失败: %v", writeErr) + } + + // 记录写入耗时 + writeTime := time.Since(writeStart) + + // 记录总耗时 + totalTime := time.Since(segmentStart) + log.Printf("分段TTS请求总耗时: %v (分割: %v, 合成: %v, 写入: %v), 总音频大小: %s", + totalTime, splitTime, synthesisTime, writeTime, formatFileSize(totalSize)) +} + +// splitTextBySentences 将文本按句子分割 +func splitTextBySentences(text string) []string { + // 定义句子结束的标点符号 + sentenceEnders := []string{"。", "!", "?", "…", ".", "!", "?", "…", "\n"} + + // 如果文本过短,直接作为一个句子返回 + if utf8.RuneCountInString(text) < 100 { + return []string{text} + } + + var sentences []string + var currentSentence strings.Builder + maxSentenceLength := config.Get().TTS.MaxSentenceLength // 设置单个句子的最大长度,避免过长句子 + runeCount := 0 // 当前句子的实际字符数量 + + for _, char := range text { + currentSentence.WriteRune(char) + runeCount++ + + // 检查是否到达句子结束标点 + lastChar := string(char) + isSentenceEnder := false + for _, ender := range sentenceEnders { + if lastChar == ender { + isSentenceEnder = true + break + } + } + + // 判断是否结束一个句子 - 使用字符数量而非字节长度 + if isSentenceEnder || runeCount >= maxSentenceLength { + // 添加当前句子到结果中 + sentence := currentSentence.String() + if len(sentence) > 0 { + sentences = append(sentences, sentence) + } + currentSentence.Reset() // 重置构建器 + runeCount = 0 // 重置字符计数器 + } + } + + // 处理可能的最后一个句子 + if currentSentence.Len() > 0 { + lastSentence := currentSentence.String() + sentences = append(sentences, lastSentence) + } + + // 合并过短的句子 + minSentenceLength := config.Get().TTS.MinSentenceLength // 设置最小句子长度阈值 + + if len(sentences) > 1 { + mergedSentences := []string{} + var currentMerged strings.Builder + currentMergedLength := 0 + + for i, sentence := range sentences { + sentenceLength := utf8.RuneCountInString(sentence) + + // 如果当前句子太短,且不是最后一个,考虑合并 + if sentenceLength < minSentenceLength && i < len(sentences)-1 { + // 检查合并后是否会超过最大长度 + if currentMergedLength+sentenceLength > maxSentenceLength { + // 合并后会超长,先保存当前内容 + if currentMerged.Len() > 0 { + mergedSentences = append(mergedSentences, currentMerged.String()) + currentMerged.Reset() + currentMergedLength = 0 + } + } + + // 当前句子过短,添加到合并缓冲区 + currentMerged.WriteString(sentence) + currentMergedLength += sentenceLength + } else { + // 句子足够长或是最后一句 + if currentMerged.Len() > 0 { + // 检查合并后是否会超过最大长度 + if currentMergedLength+sentenceLength <= maxSentenceLength { + // 有待合并的内容,将当前句子也合并进去 + currentMerged.WriteString(sentence) + mergedSentence := currentMerged.String() + mergedSentences = append(mergedSentences, mergedSentence) + } else { + // 合并后会超长,分别添加 + mergedSentence := currentMerged.String() + mergedSentences = append(mergedSentences, mergedSentence) + mergedSentences = append(mergedSentences, sentence) + } + currentMerged.Reset() + currentMergedLength = 0 + } else { + // 没有待合并内容,直接添加当前句子 + mergedSentences = append(mergedSentences, sentence) + } + } + } + + // 处理可能剩余的合并内容 + if currentMerged.Len() > 0 { + mergedSentence := currentMerged.String() + mergedSentences = append(mergedSentences, mergedSentence) + log.Printf("添加最后剩余的合并句子,长度=%d", utf8.RuneCountInString(mergedSentence)) + } + + return mergedSentences + } + + return sentences +} + +// truncateForLog 截断文本用于日志显示,同时显示开头和结尾 +func truncateForLog(text string, maxLength int) string { + // 先去除换行符 + text = strings.ReplaceAll(text, "\n", " ") + text = strings.ReplaceAll(text, "\r", " ") + + runes := []rune(text) + if len(runes) <= maxLength { + return text + } + // 计算开头和结尾各显示多少字符 + halfLength := maxLength / 2 + return string(runes[:halfLength]) + "..." + string(runes[len(runes)-halfLength:]) +} + +// audioMerge 音频合并 +func audioMerge(audioSegments [][]byte) ([]byte, error) { + if len(audioSegments) == 0 { + return nil, fmt.Errorf("没有音频片段可合并") + } + + // 使用 ffmpeg 合并音频 + tempDir, err := os.MkdirTemp("", "audio_merge_") + if err != nil { + return nil, err + } + defer os.RemoveAll(tempDir) + + listFile := filepath.Join(tempDir, "concat.txt") + lf, err := os.Create(listFile) + if err != nil { + return nil, err + } + + for i, seg := range audioSegments { + segFile := filepath.Join(tempDir, fmt.Sprintf("seg_%d.mp3", i)) + if err := os.WriteFile(segFile, seg, 0644); err != nil { + return nil, err + } + if _, err := lf.WriteString(fmt.Sprintf("file '%s'\n", segFile)); err != nil { + return nil, err + } + } + lf.Close() + + outputFile := filepath.Join(tempDir, "output.mp3") + + cmd := exec.Command("ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", listFile, "-c", "copy", outputFile) + if err := cmd.Run(); err != nil { + return nil, err + } + + mergedData, err := os.ReadFile(outputFile) + if err != nil { + return nil, err + } + log.Printf("使用ffmpeg合并完成,总大小: %s", formatFileSize(len(mergedData))) + return mergedData, nil +} + +// formatFileSize 格式化文件大小 +func formatFileSize(size int) string { + switch { + case size < 1024: + return fmt.Sprintf("%d B", size) + case size < 1024*1024: + return fmt.Sprintf("%.2f KB", float64(size)/1024.0) + case size < 1024*1024*1024: + return fmt.Sprintf("%.2f MB", float64(size)/(1024.0*1024.0)) + default: + return fmt.Sprintf("%.2f GB", float64(size)/(1024.0*1024.0*1024.0)) + } +} diff --git a/internal/http/handlers/voices.go b/internal/http/handlers/voices.go new file mode 100644 index 0000000..3907e97 --- /dev/null +++ b/internal/http/handlers/voices.go @@ -0,0 +1,41 @@ +package handlers + +import ( + "encoding/json" + "net/http" + "tts/internal/tts" +) + +// VoicesHandler 处理语音列表请求 +type VoicesHandler struct { + ttsService tts.Service +} + +// NewVoicesHandler 创建一个新的语音列表处理器 +func NewVoicesHandler(service tts.Service) *VoicesHandler { + return &VoicesHandler{ + ttsService: service, + } +} + +// HandleVoices 处理语音列表请求 +func (h *VoicesHandler) HandleVoices(w http.ResponseWriter, r *http.Request) { + // 从查询参数中获取语言筛选 + locale := r.URL.Query().Get("locale") + + // 获取语音列表 + voices, err := h.ttsService.ListVoices(r.Context(), locale) + if err != nil { + http.Error(w, "获取语音列表失败: "+err.Error(), http.StatusInternalServerError) + return + } + + // 设置内容类型 + w.Header().Set("Content-Type", "application/json") + + // 编码为JSON并返回 + if err := json.NewEncoder(w).Encode(voices); err != nil { + http.Error(w, "JSON编码失败", http.StatusInternalServerError) + return + } +} diff --git a/internal/http/middleware/cors.go b/internal/http/middleware/cors.go new file mode 100644 index 0000000..0bd2578 --- /dev/null +++ b/internal/http/middleware/cors.go @@ -0,0 +1,22 @@ +package middleware + +import "net/http" + +// CORS 处理跨域资源共享 +func CORS(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // 设置CORS响应头 + w.Header().Set("Access-Control-Allow-Origin", "*") + w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS") + w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization") + + // 如果是预检请求,直接返回200 + if r.Method == http.MethodOptions { + w.WriteHeader(http.StatusOK) + return + } + + // 继续下一个处理器 + next.ServeHTTP(w, r) + }) +} diff --git a/internal/http/middleware/logger.go b/internal/http/middleware/logger.go new file mode 100644 index 0000000..0be2f81 --- /dev/null +++ b/internal/http/middleware/logger.go @@ -0,0 +1,46 @@ +package middleware + +import ( + "log" + "net/http" + "time" +) + +// Logger 是一个HTTP中间件,记录请求的详细信息 +func Logger(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + start := time.Now() + + // 包装ResponseWriter以捕获状态码 + wrapper := &responseWriterWrapper{ + ResponseWriter: w, + statusCode: http.StatusOK, + } + + // 调用下一个处理器 + next.ServeHTTP(wrapper, r) + + // 记录请求信息 + duration := time.Since(start) + log.Printf( + "[%s] %s %s %d %s", + r.Method, + r.RequestURI, + r.RemoteAddr, + wrapper.statusCode, + duration, + ) + }) +} + +// responseWriterWrapper 包装http.ResponseWriter以捕获状态码 +type responseWriterWrapper struct { + http.ResponseWriter + statusCode int +} + +// WriteHeader 捕获状态码 +func (w *responseWriterWrapper) WriteHeader(statusCode int) { + w.statusCode = statusCode + w.ResponseWriter.WriteHeader(statusCode) +} diff --git a/internal/http/server/app.go b/internal/http/server/app.go new file mode 100644 index 0000000..edfafdd --- /dev/null +++ b/internal/http/server/app.go @@ -0,0 +1,83 @@ +package server + +import ( + "context" + "fmt" + "log" + "os" + "os/signal" + "syscall" + "time" + "tts/internal/config" +) + +// App 表示整个TTS应用程序 +type App struct { + server *Server + cfg *config.Config +} + +// NewApp 创建一个新的应用程序实例 +func NewApp(configPath string) (*App, error) { + // 加载配置 + cfg, err := config.Load(configPath) + if err != nil { + return nil, fmt.Errorf("加载配置失败: %w", err) + } + + // 初始化服务 + ttsService, err := InitializeServices(cfg) + if err != nil { + return nil, fmt.Errorf("初始化服务失败: %w", err) + } + + // 设置路由 + handler, err := SetupRoutes(cfg, ttsService) + if err != nil { + return nil, fmt.Errorf("设置路由失败: %w", err) + } + + // 创建HTTP服务器 + server := New(cfg, handler) + + return &App{ + server: server, + cfg: cfg, + }, nil +} + +// Start 启动应用程序 +func (a *App) Start() error { + // 创建一个错误通道 + errChan := make(chan error, 1) + + // 创建一个退出信号通道 + quit := make(chan os.Signal, 1) + signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM) + + // 在一个goroutine中启动服务器 + go func() { + log.Printf("启动TTS服务,监听端口 %d...\n", a.cfg.Server.Port) + errChan <- a.server.Start() + }() + + // 等待退出信号或错误 + select { + case err := <-errChan: + return err + case <-quit: + log.Println("接收到退出信号,正在优雅关闭...") + + // 创建一个超时上下文用于优雅关闭 + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + // 尝试优雅关闭服务器 + if err := a.server.Shutdown(ctx); err != nil { + return fmt.Errorf("服务器关闭出错: %w", err) + } + + log.Println("服务器已优雅关闭") + return nil + } +} diff --git a/internal/http/server/routes.go b/internal/http/server/routes.go new file mode 100644 index 0000000..9a602f6 --- /dev/null +++ b/internal/http/server/routes.go @@ -0,0 +1,65 @@ +package server + +import ( + "net/http" + "tts/internal/config" + "tts/internal/http/handlers" + "tts/internal/http/middleware" + "tts/internal/tts" + "tts/internal/tts/microsoft" +) + +// SetupRoutes 配置所有API路由 +func SetupRoutes(cfg *config.Config, ttsService tts.Service) (http.Handler, error) { + // 创建一个新的路由多路复用器 + mux := http.NewServeMux() + + // 创建处理器 + ttsHandler := handlers.NewTTSHandler(ttsService, cfg) + voicesHandler := handlers.NewVoicesHandler(ttsService) + + // 创建页面处理器 + pagesHandler, err := handlers.NewPagesHandler("./web/templates", cfg) + if err != nil { + return nil, err + } + + // 设置主页路由 + mux.HandleFunc("/", pagesHandler.HandleIndex) + + // 设置API文档路由 + mux.HandleFunc("/api-doc", pagesHandler.HandleAPIDoc) + + // 设置TTS API路由 + mux.HandleFunc("/tts", ttsHandler.HandleTTS) + + // 设置语音列表API路由 + mux.HandleFunc("/voices", voicesHandler.HandleVoices) + + mux.HandleFunc("/v1/audio/speech", ttsHandler.HandleOpenAITTS) + mux.HandleFunc("/audio/speech", ttsHandler.HandleOpenAITTS) + + // 设置静态文件服务 + fs := http.FileServer(http.Dir("./web/static")) + mux.Handle("/static/", http.StripPrefix("/static/", fs)) + + // 应用基础路径前缀 + var handler http.Handler = mux + if cfg.Server.BasePath != "" { + handler = http.StripPrefix(cfg.Server.BasePath, mux) + } + + // 应用中间件 + handler = middleware.Logger(handler) // 日志中间件 + handler = middleware.CORS(handler) // CORS中间件 + + return handler, nil +} + +// InitializeServices 初始化所有服务 +func InitializeServices(cfg *config.Config) (tts.Service, error) { + // 创建Microsoft TTS客户端 + ttsClient := microsoft.NewClient(cfg) + + return ttsClient, nil +} diff --git a/internal/http/server/server.go b/internal/http/server/server.go new file mode 100644 index 0000000..fe581a8 --- /dev/null +++ b/internal/http/server/server.go @@ -0,0 +1,45 @@ +package server + +import ( + "context" + "fmt" + "net/http" + "time" + + "tts/internal/config" +) + +// Server 封装HTTP服务器 +type Server struct { + server *http.Server + basePath string +} + +// New 创建新的HTTP服务器 +func New(cfg *config.Config, handler http.Handler) *Server { + // 创建HTTP服务器 + httpServer := &http.Server{ + Addr: fmt.Sprintf(":%d", cfg.Server.Port), + Handler: handler, + ReadTimeout: time.Duration(cfg.Server.ReadTimeout) * time.Second, + WriteTimeout: time.Duration(cfg.Server.WriteTimeout) * time.Second, + IdleTimeout: 120 * time.Second, + } + + return &Server{ + server: httpServer, + basePath: cfg.Server.BasePath, + } +} + +// Start 启动HTTP服务器 +func (s *Server) Start() error { + fmt.Printf("服务启动在 %s\n", s.server.Addr) + return s.server.ListenAndServe() +} + +// Shutdown 优雅关闭服务器 +func (s *Server) Shutdown(ctx context.Context) error { + fmt.Println("正在关闭HTTP服务器...") + return s.server.Shutdown(ctx) +} diff --git a/internal/models/tts.go b/internal/models/tts.go new file mode 100644 index 0000000..67ee7a5 --- /dev/null +++ b/internal/models/tts.go @@ -0,0 +1,16 @@ +package models + +// TTSRequest 表示一个语音合成请求 +type TTSRequest struct { + Text string `json:"text"` // 要转换的文本 + Voice string `json:"voice"` // 语音ID + Rate string `json:"rate"` // 语速 (-100% 到 +100%) + Pitch string `json:"pitch"` // 语调 (-100% 到 +100%) +} + +// TTSResponse 表示一个语音合成响应 +type TTSResponse struct { + AudioContent []byte `json:"audio_content"` // 音频数据 + ContentType string `json:"content_type"` // MIME类型 + CacheHit bool `json:"cache_hit"` // 是否命中缓存 +} diff --git a/internal/models/voice.go b/internal/models/voice.go new file mode 100644 index 0000000..4391694 --- /dev/null +++ b/internal/models/voice.go @@ -0,0 +1,14 @@ +package models + +// Voice 表示一个语音合成声音 +type Voice struct { + Name string `json:"name"` // 语音唯一标识符 + DisplayName string `json:"display_name"` // 语音显示名称 + LocalName string `json:"local_name"` // 本地化名称 + ShortName string `json:"short_name"` // 简称,例如 zh-CN-XiaoxiaoNeural + Gender string `json:"gender"` // 性别: Female, Male + Locale string `json:"locale"` // 语言区域, 如 zh-CN + LocaleName string `json:"locale_name"` // 语言区域显示名称,如 中文(中国) + StyleList []string `json:"style_list,omitempty"` // 支持的说话风格列表 + SampleRateHertz string `json:"sample_rate_hertz"` // 采样率 +} diff --git a/internal/tts/microsoft/client.go b/internal/tts/microsoft/client.go new file mode 100644 index 0000000..8e869f7 --- /dev/null +++ b/internal/tts/microsoft/client.go @@ -0,0 +1,290 @@ +package microsoft + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "html" + "io" + "log" + "net/http" + "strings" + "sync" + "time" + + "tts/internal/config" + "tts/internal/models" + "tts/internal/utils" +) + +const ( + userAgent = "okhttp/4.5.0" + voicesEndpoint = "https://%s.tts.speech.microsoft.com/cognitiveservices/voices/list" + ttsEndpoint = "https://%s.tts.speech.microsoft.com/cognitiveservices/v1" + ssmlTemplate = ` + + + + %s + + + +` +) + +// Client 是Microsoft TTS API的客户端实现 +type Client struct { + defaultVoice string + defaultRate string + defaultPitch string + defaultFormat string + maxTextLength int + httpClient *http.Client + voicesCache []models.Voice + voicesCacheMu sync.RWMutex + voicesCacheExpiry time.Time + + // 端点和认证信息 + endpoint map[string]interface{} + endpointMu sync.RWMutex + endpointExpiry time.Time +} + +func (c *Client) HandleOpenAITTS(w http.ResponseWriter, r *http.Request) { + //TODO implement me + panic("implement me") +} + +// NewClient 创建一个新的Microsoft TTS客户端 +func NewClient(cfg *config.Config) *Client { + client := &Client{ + defaultVoice: cfg.TTS.DefaultVoice, + defaultRate: cfg.TTS.DefaultRate, + defaultPitch: cfg.TTS.DefaultPitch, + defaultFormat: cfg.TTS.DefaultFormat, + maxTextLength: cfg.TTS.MaxTextLength, + httpClient: &http.Client{ + Timeout: time.Duration(cfg.TTS.RequestTimeout) * time.Second, + }, + voicesCacheExpiry: time.Time{}, // 初始时缓存为空 + endpointExpiry: time.Time{}, // 初始时端点为空 + } + + return client +} + +// getEndpoint 获取或刷新认证端点 +func (c *Client) getEndpoint(ctx context.Context) (map[string]interface{}, error) { + c.endpointMu.RLock() + if !c.endpointExpiry.IsZero() && time.Now().Before(c.endpointExpiry) && c.endpoint != nil { + endpoint := c.endpoint + c.endpointMu.RUnlock() + return endpoint, nil + } + c.endpointMu.RUnlock() + + // 获取新的端点信息 + endpoint, err := utils.GetEndpoint() + if err != nil { + return nil, err + } + + // 更新缓存 + c.endpointMu.Lock() + c.endpoint = endpoint + c.endpointExpiry = time.Now().Add(45 * time.Minute) // 令牌有效期通常是1小时,提前刷新 + c.endpointMu.Unlock() + + return endpoint, nil +} + +// ListVoices 获取可用的语音列表 +func (c *Client) ListVoices(ctx context.Context, locale string) ([]models.Voice, error) { + // 检查缓存是否有效 + c.voicesCacheMu.RLock() + if !c.voicesCacheExpiry.IsZero() && time.Now().Before(c.voicesCacheExpiry) && len(c.voicesCache) > 0 { + voices := c.voicesCache + c.voicesCacheMu.RUnlock() + + // 如果指定了locale,则过滤结果 + if locale != "" { + var filtered []models.Voice + for _, voice := range voices { + if strings.HasPrefix(voice.Locale, locale) { + filtered = append(filtered, voice) + } + } + return filtered, nil + } + return voices, nil + } + c.voicesCacheMu.RUnlock() + + // 缓存无效,需要从API获取 + endpoint, err := c.getEndpoint(ctx) + if err != nil { + return nil, err + } + + url := fmt.Sprintf(voicesEndpoint, endpoint["r"]) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, err + } + + // 使用新的认证方式 + req.Header.Set("Authorization", endpoint["t"].(string)) + + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("API error: %s, status: %d", string(body), resp.StatusCode) + } + + var msVoices []MicrosoftVoice + if err := json.NewDecoder(resp.Body).Decode(&msVoices); err != nil { + return nil, err + } + + // 转换为通用模型 + voices := make([]models.Voice, len(msVoices)) + for i, v := range msVoices { + voices[i] = models.Voice{ + Name: v.Name, + DisplayName: v.DisplayName, + LocalName: v.LocalName, + ShortName: v.ShortName, + Gender: v.Gender, + Locale: v.Locale, + LocaleName: v.LocaleName, + StyleList: v.StyleList, + SampleRateHertz: v.SampleRateHertz, // 直接使用字符串,无需转换 + } + } + + // 更新缓存 + c.voicesCacheMu.Lock() + c.voicesCache = voices + c.voicesCacheExpiry = time.Now().Add(1 * time.Hour) // 缓存1小时 + c.voicesCacheMu.Unlock() + + // 如果指定了locale,则过滤结果 + if locale != "" { + var filtered []models.Voice + for _, voice := range voices { + if strings.HasPrefix(voice.Locale, locale) { + filtered = append(filtered, voice) + } + } + return filtered, nil + } + + return voices, nil +} + +// SynthesizeSpeech 将文本转换为语音 +func (c *Client) SynthesizeSpeech(ctx context.Context, req models.TTSRequest) (*models.TTSResponse, error) { + resp, err := c.createTTSRequest(ctx, req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + // 读取音频数据 + audio, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + return &models.TTSResponse{ + AudioContent: audio, + ContentType: "audio/mpeg", + CacheHit: false, + }, nil +} + +// createTTSRequest 创建并执行TTS请求,返回HTTP响应 +func (c *Client) createTTSRequest(ctx context.Context, req models.TTSRequest) (*http.Response, error) { + // 参数验证 + if req.Text == "" { + return nil, errors.New("文本不能为空") + } + + if len(req.Text) > c.maxTextLength { + return nil, fmt.Errorf("文本长度超过限制 (%d > %d)", len(req.Text), c.maxTextLength) + } + + // 使用默认值填充空白参数 + voice := req.Voice + if voice == "" { + voice = c.defaultVoice + } + + rate := req.Rate + if rate == "" { + rate = c.defaultRate + } + + pitch := req.Pitch + if pitch == "" { + pitch = c.defaultPitch + } + + // 提取语言 + locale := "zh-CN" // 默认 + parts := strings.Split(voice, "-") + if len(parts) >= 2 { + locale = parts[0] + "-" + parts[1] + } + + // 对文本进行HTML转义,防止XML解析错误 + + escapedText := html.EscapeString(req.Text) + + // 准备SSML内容 + ssml := fmt.Sprintf(ssmlTemplate, locale, voice, rate, pitch, escapedText) + + // 获取端点信息 + endpoint, err := c.getEndpoint(ctx) + if err != nil { + return nil, err + } + + // 准备请求 + url := fmt.Sprintf(ttsEndpoint, endpoint["r"]) + reqBody := bytes.NewBufferString(ssml) + + httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, reqBody) + if err != nil { + return nil, err + } + + httpReq.Header.Set("Authorization", endpoint["t"].(string)) + httpReq.Header.Set("Content-Type", "application/ssml+xml") + httpReq.Header.Set("X-Microsoft-OutputFormat", c.defaultFormat) + httpReq.Header.Set("User-Agent", userAgent) + + // 发送请求 + resp, err := c.httpClient.Do(httpReq) + + if err != nil { + return nil, err + } + + if resp.StatusCode != http.StatusOK { + // 获取响应体以便调试 + body, _ := io.ReadAll(resp.Body) + resp.Body.Close() + log.Printf("TTS API错误: %s, 状态码: %d", string(body), resp.StatusCode) + return nil, fmt.Errorf("TTS API错误: %s, 状态码: %d", string(body), resp.StatusCode) + } + + return resp, nil +} diff --git a/internal/tts/microsoft/models.go b/internal/tts/microsoft/models.go new file mode 100644 index 0000000..27fcc6d --- /dev/null +++ b/internal/tts/microsoft/models.go @@ -0,0 +1,45 @@ +package microsoft + +// MicrosoftVoice 表示Microsoft TTS服务中的一个语音 +type MicrosoftVoice struct { + Name string `json:"Name"` + DisplayName string `json:"DisplayName"` + LocalName string `json:"LocalName"` + ShortName string `json:"ShortName"` + Gender string `json:"Gender"` + Locale string `json:"Locale"` + LocaleName string `json:"LocaleName"` + StyleList []string `json:"StyleList,omitempty"` + SampleRateHertz string `json:"SampleRateHertz"` + VoiceType string `json:"VoiceType"` + Status string `json:"Status"` +} + +// SSMLRequest 表示发送给Microsoft TTS服务的SSML请求 +type SSMLRequest struct { + XMLHeader string + Voice string + Language string + Rate string + Pitch string + Text string +} + +// FormatContentTypeMap 定义音频格式到MIME类型的映射 +var FormatContentTypeMap = map[string]string{ + "raw-16khz-16bit-mono-pcm": "audio/pcm", + "raw-8khz-8bit-mono-mulaw": "audio/basic", + "riff-8khz-8bit-mono-alaw": "audio/alaw", + "riff-8khz-8bit-mono-mulaw": "audio/mulaw", + "riff-16khz-16bit-mono-pcm": "audio/wav", + "audio-16khz-128kbitrate-mono-mp3": "audio/mp3", + "audio-16khz-64kbitrate-mono-mp3": "audio/mp3", + "audio-16khz-32kbitrate-mono-mp3": "audio/mp3", + "raw-24khz-16bit-mono-pcm": "audio/pcm", + "riff-24khz-16bit-mono-pcm": "audio/wav", + "audio-24khz-160kbitrate-mono-mp3": "audio/mp3", + "audio-24khz-96kbitrate-mono-mp3": "audio/mp3", + "audio-24khz-48kbitrate-mono-mp3": "audio/mp3", + "ogg-24khz-16bit-mono-opus": "audio/ogg", + "webm-24khz-16bit-mono-opus": "audio/webm", +} diff --git a/internal/tts/service.go b/internal/tts/service.go new file mode 100644 index 0000000..ade289c --- /dev/null +++ b/internal/tts/service.go @@ -0,0 +1,15 @@ +package tts + +import ( + "context" + "tts/internal/models" +) + +// Service 定义TTS服务接口 +type Service interface { + // ListVoices 获取可用的语音列表 + ListVoices(ctx context.Context, locale string) ([]models.Voice, error) + + // SynthesizeSpeech 将文本转换为语音 + SynthesizeSpeech(ctx context.Context, req models.TTSRequest) (*models.TTSResponse, error) +} diff --git a/internal/utils/utils.go b/internal/utils/utils.go new file mode 100644 index 0000000..8d97dd6 --- /dev/null +++ b/internal/utils/utils.go @@ -0,0 +1,87 @@ +package utils + +import ( + "crypto/hmac" + "crypto/sha256" + "encoding/base64" + "encoding/json" + "fmt" + "net/http" + "net/url" + "strings" + "time" + + "github.com/google/uuid" + "github.com/sirupsen/logrus" +) + +var ( + log = logrus.New() + client = &http.Client{} +) + +const ( + endpointURL = "https://dev.microsofttranslator.com/apps/endpoint?api-version=1.0" + userAgent = "okhttp/4.5.0" + clientVersion = "4.0.530a 5fe1dc6c" + userId = "0f04d16a175c411e" + homeGeographicRegion = "zh-Hans-CN" + clientTraceId = "aab069b9-70a7-4844-a734-96cd78d94be9" + voiceDecodeKey = "oik6PdDdMnOXemTbwvMn9de/h9lFnfBaCWbGMMZqqoSaQaqUOqjVGm5NqsmjcBI1x+sS9ugjB55HEJWRiFXYFw==" +) + +// GetEndpoint 获取语音合成服务的端点信息 +func GetEndpoint() (map[string]interface{}, error) { + signature := Sign(endpointURL) + headers := map[string]string{ + "Accept-Language": "zh-Hans", + "X-ClientVersion": clientVersion, + "X-UserId": userId, + "X-HomeGeographicRegion": homeGeographicRegion, + "X-ClientTraceId": clientTraceId, + "X-MT-Signature": signature, + "User-Agent": userAgent, + "Content-Type": "application/json; charset=utf-8", + "Content-Length": "0", + "Accept-Encoding": "gzip", + } + req, err := http.NewRequest("POST", endpointURL, nil) + if err != nil { + return nil, err + } + + for k, v := range headers { + req.Header.Set(k, v) + } + + resp, err := client.Do(req) + if err != nil { + log.Error("failed to do request: ", err) + return nil, err + } + defer resp.Body.Close() + + var result map[string]interface{} + err = json.NewDecoder(resp.Body).Decode(&result) + if err != nil { + return nil, err + } + + return result, nil +} + +// Sign 生成签名 +func Sign(urlStr string) string { + u := strings.Split(urlStr, "://")[1] + encodedUrl := url.QueryEscape(u) + uuidStr := strings.ReplaceAll(uuid.New().String(), "-", "") + formattedDate := strings.ToLower(time.Now().UTC().Format("Mon, 02 Jan 2006 15:04:05")) + "gmt" + bytesToSign := fmt.Sprintf("MSTranslatorAndroidApp%s%s%s", encodedUrl, formattedDate, uuidStr) + bytesToSign = strings.ToLower(bytesToSign) + decode, _ := base64.StdEncoding.DecodeString(voiceDecodeKey) + hash := hmac.New(sha256.New, decode) + hash.Write([]byte(bytesToSign)) + secretKey := hash.Sum(nil) + signBase64 := base64.StdEncoding.EncodeToString(secretKey) + return fmt.Sprintf("MSTranslatorAndroidApp::%s::%s::%s", signBase64, formattedDate, uuidStr) +} diff --git a/routes/routes.go b/routes/routes.go deleted file mode 100644 index d8d0083..0000000 --- a/routes/routes.go +++ /dev/null @@ -1,23 +0,0 @@ -package routes - -import ( - "tts/handlers" - - "github.com/gin-gonic/gin" -) - -func SetupRouter() *gin.Engine { - router := gin.Default() - - // 加载模板文件 - router.LoadHTMLGlob("templates/*") - - router.GET("/voices", handlers.GetVoiceList) - router.POST("/tts", handlers.SynthesizeVoicePost) - router.GET("/tts", handlers.SynthesizeVoice) - router.GET("/v1/audio/speech", handlers.SynthesizeVoiceOpenAI) - router.GET("/", handlers.Index) - router.GET("/doc", handlers.ApiDoc) - - return router -} diff --git a/templates/api-doc.html b/templates/api-doc.html deleted file mode 100644 index 5c5f7dd..0000000 --- a/templates/api-doc.html +++ /dev/null @@ -1,37 +0,0 @@ - - - - - TTS - - -

支持接口

-

语音合成

-
- /tts | GET / POST(json) - try -
- - -
-参数列表:
-1. t: 文本内容 (必填)
-2. v: 语音名称 (可选), 默认为 zh-CN-XiaoxiaoMultilingualNeural
-3. r: 语速 (可选), 默认为 0
-4. p: 语调 (可选), 默认为 0
-5. o: 输出格式 (可选), 默认为audio-24khz-48kbitrate-mono-mp3
-
- - -

声音列表

- -
- /voices | GET try -
-
-参数列表:
-1. l: 语言区域 (可选), 使用 contains 匹配,如 l=zh
-2. d: 显示详细信息 (可选) , 默认为 false, 如需显示详细信息, 请添加参数d , 如 /voices?d
-
- - diff --git a/templates/index.html b/templates/index.html deleted file mode 100644 index 9500d00..0000000 --- a/templates/index.html +++ /dev/null @@ -1,121 +0,0 @@ - - - - - - TTS Demo - - - - - -
- Documentation -
-
-

语音合成演示

- - -
- - -
-
- - -
-
- - -
-
- - -
-
- -
-
- - -
- -
- - -
-
- - -
- - -
- - - - - diff --git a/utils/utils.go b/utils/utils.go deleted file mode 100644 index 6f858b5..0000000 --- a/utils/utils.go +++ /dev/null @@ -1,218 +0,0 @@ -package utils - -import ( - "bytes" - "crypto/hmac" - "crypto/sha256" - "encoding/base64" - "encoding/json" - "fmt" - "github.com/google/uuid" - "github.com/sirupsen/logrus" - "html" - "io" - "net/http" - "net/url" - "strings" - "time" -) - -var ( - log = logrus.New() - client = &http.Client{} - voiceListCache []interface{} - cacheDuration = 1 * time.Hour // 缓存持续时间 -) - -func init() { - ticker := time.NewTicker(cacheDuration) - go func() { - for range ticker.C { - voiceListCache = nil - } - }() -} - -const ( - endpointURL = "https://dev.microsofttranslator.com/apps/endpoint?api-version=1.0" - voicesListURL = "https://eastus.api.speech.microsoft.com/cognitiveservices/voices/list" - userAgent = "okhttp/4.5.0" - clientVersion = "4.0.530a 5fe1dc6c" - userId = "0f04d16a175c411e" - homeGeographicRegion = "zh-Hans-CN" - clientTraceId = "aab069b9-70a7-4844-a734-96cd78d94be9" - voiceDecodeKey = "oik6PdDdMnOXemTbwvMn9de/h9lFnfBaCWbGMMZqqoSaQaqUOqjVGm5NqsmjcBI1x+sS9ugjB55HEJWRiFXYFw==" - defaultVoiceName = "zh-CN-XiaoxiaoMultilingualNeural" - defaultRate = "0" - defaultPitch = "0" - defaultOutputFormat = "audio-24khz-48kbitrate-mono-mp3" - defaultStyle = "general" -) - -// GetEndpoint 获取语音合成服务的端点信息 -func GetEndpoint() (map[string]interface{}, error) { - signature := Sign(endpointURL) - headers := map[string]string{ - "Accept-Language": "zh-Hans", - "X-ClientVersion": clientVersion, - "X-UserId": userId, - "X-HomeGeographicRegion": homeGeographicRegion, - "X-ClientTraceId": clientTraceId, - "X-MT-Signature": signature, - "User-Agent": userAgent, - "Content-Type": "application/json; charset=utf-8", - "Content-Length": "0", - "Accept-Encoding": "gzip", - } - req, err := http.NewRequest("POST", endpointURL, nil) - if err != nil { - return nil, err - } - - for k, v := range headers { - req.Header.Set(k, v) - } - - resp, err := client.Do(req) - if err != nil { - log.Error("failed to do request: ", err) - return nil, err - } - defer resp.Body.Close() - - var result map[string]interface{} - err = json.NewDecoder(resp.Body).Decode(&result) - if err != nil { - return nil, err - } - - return result, nil -} - -// Sign 生成签名 -func Sign(urlStr string) string { - u := strings.Split(urlStr, "://")[1] - encodedUrl := url.QueryEscape(u) - uuidStr := strings.ReplaceAll(uuid.New().String(), "-", "") - formattedDate := strings.ToLower(time.Now().UTC().Format("Mon, 02 Jan 2006 15:04:05")) + "gmt" - bytesToSign := fmt.Sprintf("MSTranslatorAndroidApp%s%s%s", encodedUrl, formattedDate, uuidStr) - bytesToSign = strings.ToLower(bytesToSign) - decode, _ := base64.StdEncoding.DecodeString(voiceDecodeKey) - hash := hmac.New(sha256.New, decode) - hash.Write([]byte(bytesToSign)) - secretKey := hash.Sum(nil) - signBase64 := base64.StdEncoding.EncodeToString(secretKey) - return fmt.Sprintf("MSTranslatorAndroidApp::%s::%s::%s", signBase64, formattedDate, uuidStr) -} - -// GetVoice 获取语音合成结果 -func GetVoice(text, voiceName, rate, pitch, outputFormat, style string) ([]byte, error) { - if voiceName == "" { - voiceName = defaultVoiceName - } - if rate == "" { - rate = defaultRate - } - if pitch == "" { - pitch = defaultPitch - } - if outputFormat == "" { - outputFormat = defaultOutputFormat - } - - if style == "" { - style = defaultStyle - } - - endpoint, err := GetEndpoint() - if err != nil { - return nil, err - } - - u := fmt.Sprintf("https://%s.tts.speech.microsoft.com/cognitiveservices/v1", endpoint["r"]) - headers := map[string]string{ - "Authorization": endpoint["t"].(string), - "Content-Type": "application/ssml+xml", - "X-Microsoft-OutputFormat": outputFormat, - } - - ssml := GetSsml(text, voiceName, rate, pitch, style) - - req, err := http.NewRequest("POST", u, bytes.NewBufferString(ssml)) - if err != nil { - return nil, err - } - - for k, v := range headers { - req.Header.Set(k, v) - } - - resp, err := client.Do(req) - if err != nil { - log.Error("failed to do request: ", err) - return nil, err - } - defer resp.Body.Close() - - return io.ReadAll(resp.Body) -} - -// GetSsml 生成 SSML 格式的文本 -func GetSsml(text, voiceName, rate, pitch, style string) string { - // 对文本进行转义 - text = html.EscapeString(text) - return fmt.Sprintf(` - - - - - %s - - - - - `, voiceName, style, rate, pitch, text) -} - -// VoiceList 获取可用的语音列表 -func VoiceList() ([]interface{}, error) { - // 如果缓存中有值,直接返回缓存的结果 - if voiceListCache != nil { - return voiceListCache, nil - } - - headers := map[string]string{ - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.26", - "X-Ms-Useragent": "SpeechStudio/2021.05.001", - "Content-Type": "application/json", - "Origin": "https://azure.microsoft.com", - "Referer": "https://azure.microsoft.com", - } - - req, err := http.NewRequest("GET", voicesListURL, nil) - if err != nil { - return nil, err - } - - for k, v := range headers { - req.Header.Set(k, v) - } - - resp, err := client.Do(req) - if err != nil { - log.Error("failed to do request: ", err) - return nil, err - } - defer resp.Body.Close() - - var result []interface{} - err = json.NewDecoder(resp.Body).Decode(&result) - if err != nil { - return nil, err - } - - // 将结果存储到缓存中 - voiceListCache = result - - return result, nil -} diff --git a/web/static/css/style.css b/web/static/css/style.css new file mode 100644 index 0000000..5970b68 --- /dev/null +++ b/web/static/css/style.css @@ -0,0 +1,271 @@ +/* 基本样式重置 */ +* { + box-sizing: border-box; + margin: 0; + padding: 0; +} + +body { + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif; + line-height: 1.6; + color: #333; + background-color: #f5f7fa; + padding: 20px; +} + +/* 容器 */ +.container { + max-width: 1000px; + margin: 0 auto; +} + +/* 页眉 */ +header { + text-align: center; + margin-bottom: 30px; + padding: 20px; +} + +header h1 { + font-size: 2.5rem; + margin-bottom: 10px; + color: #2c3e50; +} + +header p { + font-size: 1.2rem; + color: #7f8c8d; + margin-bottom: 20px; +} + +/* 导航 */ +nav { + display: flex; + justify-content: center; + margin-top: 20px; +} + +nav a { + text-decoration: none; + color: #3498db; + margin: 0 15px; + padding: 5px 10px; + border-radius: 5px; + transition: all 0.3s ease; +} + +nav a:hover { + background-color: #3498db; + color: #fff; +} + +nav a.active { + background-color: #3498db; + color: #fff; +} + +/* 卡片 */ +.card { + background-color: #fff; + border-radius: 10px; + box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); + padding: 25px; + margin-bottom: 25px; +} + +/* 标题 */ +h2 { + color: #2c3e50; + margin-bottom: 20px; + border-bottom: 1px solid #ecf0f1; + padding-bottom: 10px; +} + +h3 { + color: #3498db; + margin: 20px 0 10px; +} + +/* 输入区域 */ +.input-group { + position: relative; + margin-bottom: 20px; +} + +textarea { + width: 100%; + padding: 15px; + border: 1px solid #ddd; + border-radius: 5px; + resize: none; + font-size: 1rem; + font-family: inherit; +} + +textarea:focus { + outline: none; + border-color: #3498db; + box-shadow: 0 0 0 2px rgba(52, 152, 219, 0.2); +} + +.char-counter { + position: absolute; + bottom: 10px; + right: 10px; + font-size: 0.8rem; + color: #7f8c8d; +} + +/* 设置区域 */ +.settings { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); + gap: 20px; + margin-bottom: 20px; +} + +.setting-group { + display: flex; + flex-direction: column; +} + +label { + margin-bottom: 5px; + font-weight: bold; + color: #2c3e50; +} + +select, input[type="range"] { + padding: 8px; + border: 1px solid #ddd; + border-radius: 5px; + background-color: #fff; +} + +select:focus { + outline: none; + border-color: #3498db; +} + +/* 按钮 */ +.actions { + display: flex; + justify-content: center; + margin-top: 20px; +} + +button { + padding: 10px 20px; + border: none; + border-radius: 5px; + cursor: pointer; + font-size: 1rem; + transition: all 0.3s ease; +} + +.primary-button { + background-color: #3498db; + color: #fff; +} + +.primary-button:hover { + background-color: #2980b9; +} + +.secondary-button { + background-color: #ecf0f1; + color: #2c3e50; + margin: 0 5px; +} + +.secondary-button:hover { + background-color: #bdc3c7; +} + +/* 音频播放器 */ +.audio-player { + display: flex; + flex-direction: column; + align-items: center; +} + +audio { + width: 100%; + margin-bottom: 15px; +} + +.audio-controls { + display: flex; + justify-content: center; +} + +/* 表格 */ +table { + width: 100%; + border-collapse: collapse; + margin: 20px 0; +} + +th, td { + padding: 12px 15px; + text-align: left; + border-bottom: 1px solid #ddd; +} + +th { + background-color: #f8f9fa; + font-weight: bold; +} + +/* 代码 */ +code, pre { + font-family: SFMono-Regular, Menlo, Monaco, Consolas, monospace; + background-color: #f8f9fa; + border-radius: 3px; + padding: 2px 5px; + font-size: 0.9rem; +} + +pre { + padding: 15px; + overflow-x: auto; + margin: 15px 0; +} + +pre code { + padding: 0; + background-color: transparent; +} + +/* 页脚 */ +footer { + text-align: center; + margin-top: 40px; + padding: 20px; + color: #7f8c8d; + font-size: 0.9rem; +} + +footer a { + color: #3498db; + text-decoration: none; +} + +footer a:hover { + text-decoration: underline; +} + +/* 响应式调整 */ +@media (max-width: 768px) { + .settings { + grid-template-columns: 1fr; + } + + header h1 { + font-size: 2rem; + } + + .card { + padding: 15px; + } +} \ No newline at end of file diff --git a/web/static/js/app.js b/web/static/js/app.js new file mode 100644 index 0000000..d84c5b4 --- /dev/null +++ b/web/static/js/app.js @@ -0,0 +1,176 @@ +document.addEventListener('DOMContentLoaded', function() { + // 获取DOM元素 + const textInput = document.getElementById('text'); + const voiceSelect = document.getElementById('voice'); + const rateInput = document.getElementById('rate'); + const rateValue = document.getElementById('rateValue'); + const pitchInput = document.getElementById('pitch'); + const pitchValue = document.getElementById('pitchValue'); + const speakButton = document.getElementById('speak'); + const downloadButton = document.getElementById('download'); + const copyLinkButton = document.getElementById('copyLink'); + const audioPlayer = document.getElementById('audioPlayer'); + const resultSection = document.getElementById('resultSection'); + const charCount = document.getElementById('charCount'); + + // 保存最后一个音频URL + let lastAudioUrl = ''; + + // 初始化 + initVoicesList(); + initEventListeners(); + + // 更新字符计数 + textInput.addEventListener('input', function() { + charCount.textContent = this.value.length; + }); + + // 更新语速值显示 + rateInput.addEventListener('input', function() { + const value = this.value; + rateValue.textContent = value + '%'; + }); + + // 更新语调值显示 + pitchInput.addEventListener('input', function() { + const value = this.value; + pitchValue.textContent = value + '%'; + }); + + // 获取可用语音列表 + async function initVoicesList() { + try { + const response = await fetch(`${config.basePath}/voices`); + if (!response.ok) throw new Error('获取语音列表失败'); + + const voices = await response.json(); + + // 清空并重建选项 + voiceSelect.innerHTML = ''; + + // 按语言和名称分组 + const voicesByLocale = {}; + + voices.forEach(voice => { + if (!voicesByLocale[voice.locale]) { + voicesByLocale[voice.locale] = []; + } + voicesByLocale[voice.locale].push(voice); + }); + + // 创建选项组 + for (const locale in voicesByLocale) { + const optgroup = document.createElement('optgroup'); + optgroup.label = voicesByLocale[locale][0].locale_name; + + voicesByLocale[locale].forEach(voice => { + const option = document.createElement('option'); + option.value = voice.short_name; + option.textContent = `${voice.local_name || voice.display_name} (${voice.gender})`; + + // 如果是默认语音则选中 + if (voice.short_name === config.defaultVoice) { + option.selected = true; + } + + optgroup.appendChild(option); + }); + + voiceSelect.appendChild(optgroup); + } + } catch (error) { + console.error('获取语音列表失败:', error); + voiceSelect.innerHTML = ''; + } + } + + // 初始化事件监听器 + function initEventListeners() { + // 转换按钮点击事件 + speakButton.addEventListener('click', generateSpeech); + + // 下载按钮点击事件 + downloadButton.addEventListener('click', function() { + if (lastAudioUrl) { + const a = document.createElement('a'); + a.href = lastAudioUrl; + a.download = 'speech.mp3'; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + } + }); + + // 复制链接按钮点击事件 + copyLinkButton.addEventListener('click', function() { + if (lastAudioUrl) { + navigator.clipboard.writeText(lastAudioUrl).then(() => { + alert('链接已复制到剪贴板'); + }).catch(err => { + console.error('复制失败:', err); + // 兼容处理 + const textArea = document.createElement('textarea'); + textArea.value = lastAudioUrl; + document.body.appendChild(textArea); + textArea.focus(); + textArea.select(); + + try { + document.execCommand('copy'); + alert('链接已复制到剪贴板'); + } catch (err) { + console.error('复制失败:', err); + } + + document.body.removeChild(textArea); + }); + } + }); + } + + // 生成语音 + async function generateSpeech() { + const text = textInput.value.trim(); + if (!text) { + alert('请输入要转换的文本'); + return; + } + + const voice = voiceSelect.value; + const rate = rateInput.value; + const pitch = pitchInput.value; + + // 禁用按钮,显示加载状态 + speakButton.disabled = true; + speakButton.textContent = '生成中...'; + + try { + // 构建URL参数 + const params = new URLSearchParams({ + t: text, + v: voice, + r: rate, + p: pitch + }); + + const url = `${config.basePath}/tts?${params.toString()}`; + + // 更新音频播放器 + audioPlayer.src = url; + lastAudioUrl = url; + + // 显示结果区域 + resultSection.style.display = 'block'; + + // 播放音频 + audioPlayer.play(); + } catch (error) { + console.error('生成语音失败:', error); + alert('生成语音失败,请重试'); + } finally { + // 恢复按钮状态 + speakButton.disabled = false; + speakButton.textContent = '转换为语音'; + } + } +}); \ No newline at end of file diff --git a/web/templates/api-doc.html b/web/templates/api-doc.html new file mode 100644 index 0000000..55023e5 --- /dev/null +++ b/web/templates/api-doc.html @@ -0,0 +1,310 @@ + + + + + + API文档 - TTS服务 + + + + +
+
+

TTS服务 API文档

+

快速、高质量的文本转语音API服务

+ +
+ +
+
+

API概述

+

TTS服务API提供了简单而强大的方式将文本转换为自然语音。我们支持多种语言和声音,并允许您调节语速、语调以适应不同场景需求。

+

基础URL: {{.BasePath}}

+

所有API请求均使用HTTP协议,返回标准HTTP状态码表示请求结果。

+
+ +
+

文本转语音 API

+

端点

+ GET {{.BasePath}}/tts + +

参数

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
参数类型必选描述
tstring要转换的文本(需要进行URL编码)
vstring语音名称,使用short_name格式,默认: {{.DefaultVoice}}。可通过/voices接口获取所有可用语音
rstring语速调整,范围: -100%到100%,默认: {{.DefaultRate}}。正值加快语速,负值减慢语速
pstring语调调整,范围: -100%到100%,默认: {{.DefaultPitch}}。正值提高语调,负值降低语调
ostring输出音频格式,默认: {{.DefaultFormat}}。详见下方支持的格式列表
sstring情感风格,可用值取决于所选语音的style_list属性。例如:"cheerful"、"sad"等
+ +

示例请求

+
curl "{{.BasePath}}/tts?t=%E4%BD%A0%E5%A5%BD%EF%BC%8C%E4%B8%96%E7%95%8C&v=zh-CN-XiaoxiaoNeural&r=0%25&p=0%25"
+ +

另一个示例(带情感风格)

+
curl "{{.BasePath}}/tts?t=%E4%BB%8A%E5%A4%A9%E5%A4%A9%E6%B0%94%E7%9C%9F%E5%A5%BD&v=zh-CN-XiaoxiaoNeural&s=cheerful"
+ +

响应

+

返回音频文件,内容类型取决于请求的输出格式。正常响应状态码为200。

+ +

错误响应

+

如果请求参数有误或服务出现问题,将返回对应的HTTP错误码和错误消息。

+ + + + + + + + + + + + + + + + + + + + + +
状态码描述
400参数错误或缺失必要参数
404请求的资源不存在
500服务器内部错误
+
+ +
+

获取可用语音 API

+

端点

+ GET {{.BasePath}}/voices + +

参数

+ + + + + + + + + + + + + + + + + + + + + + + +
参数类型必选描述
localestring筛选特定语言的语音,例如:zh-CN(中文)、en-US(英文)
genderstring筛选特定性别的语音,可选值:Male(男性)、Female(女性)
+ +

示例请求

+
curl "{{.BasePath}}/voices?locale=zh-CN&gender=Female"
+ +

响应

+

返回JSON格式的可用语音列表:

+
[
+  {
+    "name": "Microsoft Server Speech Text to Speech Voice (zh-CN, XiaoxiaoNeural)",
+    "display_name": "Xiaoxiao",
+    "local_name": "晓晓",
+    "short_name": "zh-CN-XiaoxiaoNeural",
+    "gender": "Female",
+    "locale": "zh-CN",
+    "locale_name": "中文(中国)",
+    "style_list": ["cheerful", "sad", "angry", "fearful", "disgruntled"]
+  },
+  ...
+]
+

响应字段说明:

+
    +
  • name:语音的完整名称
  • +
  • display_name:显示用名称(拉丁字符)
  • +
  • local_name:本地化名称
  • +
  • short_name:简短名称(用于API调用的v参数)
  • +
  • gender:性别(Male或Female)
  • +
  • locale:语言代码
  • +
  • locale_name:语言本地化名称
  • +
  • style_list:支持的情感风格列表(如有)
  • +
+
+ +
+

兼容OpenAI接口 API

+

语音合成

+ POST {{.BasePath}}/v1/audio/speech + +

请求体 (JSON)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
参数类型必选描述
modelstring当前仅支持值: "tts-1"
inputstring要转换的文本内容
voicestring声音名称,使用Microsoft语音格式,例如:ja-JP-KeitaNeural、zh-CN-XiaoxiaoNeural
speednumber语速调整,范围: 0.5到2.0,默认: 1.0
+ +

示例请求

+
curl -X POST "{{.BasePath}}/v1/audio/speech" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "tts-1",
+    "input": "你好,世界!",
+    "voice": "zh-CN-XiaoxiaoNeural"
+  }'
+ +

另一个示例(带速度调整)

+
curl -X POST "{{.BasePath}}/v1/audio/speech" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "tts-1",
+    "input": "こんにちは、世界!",
+    "voice": "ja-JP-NanamiNeural",
+    "speed": 1.2
+  }'
+ +

响应

+

返回音频文件,内容类型取决于请求的输出格式。正常响应状态码为200。

+ +

错误响应

+

如果请求有误,将返回JSON格式的错误信息:

+
{
+  "error": {
+    "message": "错误信息描述",
+    "type": "错误类型",
+    "code": "错误代码"
+  }
+}
+
+ +
+

支持的输出格式

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
格式名称描述
audio-16khz-32kbitrate-mono-mp3MP3格式,16kHz, 32kbps
audio-16khz-64kbitrate-mono-mp3MP3格式,16kHz, 64kbps
audio-16khz-128kbitrate-mono-mp3MP3格式,16kHz, 128kbps
audio-24khz-48kbitrate-mono-mp3MP3格式,24kHz, 48kbps
audio-24khz-96kbitrate-mono-mp3MP3格式,24kHz, 96kbps
audio-24khz-160kbitrate-mono-mp3MP3格式,24kHz, 160kbps
riff-16khz-16bit-mono-pcmWAV格式,16kHz
riff-24khz-16bit-mono-pcmWAV格式,24kHz
+
+
+ + + +
+ + diff --git a/web/templates/index.html b/web/templates/index.html new file mode 100644 index 0000000..6bb5bb4 --- /dev/null +++ b/web/templates/index.html @@ -0,0 +1,83 @@ + + + + + + 文本转语音 - TTS服务 + + + + +
+
+

文本转语音 (TTS)

+

将文本转换为自然流畅的语音

+ +
+ +
+
+

输入文本

+
+ +
0/5000
+
+ +
+
+ + +
+ +
+ + + 0% +
+ +
+ + + 0% +
+
+ +
+ +
+
+ + +
+ + +
+ + + + + \ No newline at end of file diff --git a/templates/worker.js b/web/templates/worker.js similarity index 100% rename from templates/worker.js rename to web/templates/worker.js