feat: 重构项目以符合 Go 规范，添加 OpenAI 接口适配，优化长文本朗读功能（切割后合并）

2025-03-09 13:02:28 +08:00
parent 539f6d9ef5
commit 8f2fd68ebe
31 changed files with 2487 additions and 647 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,27 @@
+### Go template
+# If you prefer the allow list template instead of the deny list, see community template:
+# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
+#
+# Binaries for programs and plugins
+*.exe
+*.exe~
+*.dll
+*.so
+*.dylib
+
+# Test binary, built with `go test -c`
+*.test
+
+# Output of the go coverage tool, specifically when used with LiteIDE
+*.out
+
+# Dependency directories (remove the comment below to include it)
+# vendor/
+
+# Go workspace file
+go.work
+go.work.sum
+
+# env file
+.env
+
--- a/.idea/git_toolbox_blame.xml
+++ b/.idea/git_toolbox_blame.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="GitToolBoxBlameSettings">
+    <option name="version" value="2" />
+  </component>
+</project>
--- a/cmd/api/main.go
+++ b/cmd/api/main.go
@@ -0,0 +1,58 @@
+package main
+
+import (
+	"flag"
+	"log"
+	"os"
+	"path/filepath"
+
+	"tts/internal/http/server"
+)
+
+func main() {
+	// 解析命令行参数
+	configPath := flag.String("config", "", "配置文件路径")
+	flag.Parse()
+
+	// 如果没有指定配置文件，尝试默认位置
+	if *configPath == "" {
+		// 尝试多个位置查找配置文件
+		possiblePaths := []string{
+			"./configs/config.yaml",
+			"../configs/config.yaml",
+			"/etc/tts/config.yaml",
+		}
+
+		for _, path := range possiblePaths {
+			if _, err := os.Stat(path); err == nil {
+				*configPath = path
+				break
+			}
+		}
+
+		// 如果还是没找到，使用默认位置
+		if *configPath == "" {
+			*configPath = "./configs/config.yaml"
+		}
+	}
+
+	// 确保配置文件路径是绝对路径
+	absConfigPath, err := filepath.Abs(*configPath)
+	if err != nil {
+		log.Fatalf("无法获取配置文件的绝对路径: %v", err)
+	}
+
+	// 打印使用的配置文件路径
+	log.Printf("使用配置文件: %s", absConfigPath)
+
+	// 创建并启动应用
+	app, err := server.NewApp(absConfigPath)
+	if err != nil {
+		log.Fatalf("初始化应用失败: %v", err)
+	}
+
+	// 启动应用并处理错误
+	if err := app.Start(); err != nil {
+		log.Fatalf("应用运行出错: %v", err)
+	}
+}
--- a/configs/config.yaml
+++ b/configs/config.yaml
@@ -0,0 +1,27 @@
+server:
+  port: 8080
+  read_timeout: 30
+  write_timeout: 30
+  base_path: ""
+
+tts:
+  region: "eastasia"
+  default_voice: "zh-CN-XiaoxiaoNeural"
+  default_rate: "0"
+  default_pitch: "0"
+  default_format: "audio-24khz-48kbitrate-mono-mp3"
+  max_text_length: 65535
+  request_timeout: 30
+  max_concurrent: 10
+  segment_threshold: 300
+  min_sentence_length: 200
+  max_sentence_length: 300
+
+  # OpenAI 到微软 TTS 中文语音的映射
+  voice_mapping:
+    alloy: "zh-CN-XiaoyiNeural"       # 中性女声
+    echo: "zh-CN-YunxiNeural"         # 年轻男声
+    fable: "zh-CN-XiaochenNeural"     # 儿童声
+    onyx: "zh-CN-YunjianNeural"       # 成熟男声
+    nova: "zh-CN-XiaohanNeural"       # 活力女声
+    shimmer: "zh-CN-XiaomoNeural"     # 温柔女声
--- a/go.mod
+++ b/go.mod
@@ -3,36 +3,12 @@ module tts
 go 1.22

 require (
-	github.com/gin-gonic/gin v1.10.0
 	github.com/google/uuid v1.6.0
 	github.com/sirupsen/logrus v1.9.3
+	gopkg.in/yaml.v3 v3.0.1
 )

 require (
-	github.com/bytedance/sonic v1.11.6 // indirect
-	github.com/bytedance/sonic/loader v0.1.1 // indirect
-	github.com/cloudwego/base64x v0.1.4 // indirect
-	github.com/cloudwego/iasm v0.2.0 // indirect
-	github.com/gabriel-vasile/mimetype v1.4.3 // indirect
-	github.com/gin-contrib/sse v0.1.0 // indirect
-	github.com/go-playground/locales v0.14.1 // indirect
-	github.com/go-playground/universal-translator v0.18.1 // indirect
-	github.com/go-playground/validator/v10 v10.20.0 // indirect
-	github.com/goccy/go-json v0.10.2 // indirect
-	github.com/json-iterator/go v1.1.12 // indirect
-	github.com/klauspost/cpuid/v2 v2.2.7 // indirect
-	github.com/leodido/go-urn v1.4.0 // indirect
-	github.com/mattn/go-isatty v0.0.20 // indirect
-	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
-	github.com/modern-go/reflect2 v1.0.2 // indirect
-	github.com/pelletier/go-toml/v2 v2.2.2 // indirect
-	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
-	github.com/ugorji/go/codec v1.2.12 // indirect
-	golang.org/x/arch v0.8.0 // indirect
-	golang.org/x/crypto v0.23.0 // indirect
-	golang.org/x/net v0.25.0 // indirect
+	github.com/stretchr/testify v1.9.0 // indirect
 	golang.org/x/sys v0.20.0 // indirect
-	golang.org/x/text v0.15.0 // indirect
-	google.golang.org/protobuf v1.34.1 // indirect
-	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
--- a/go.sum
+++ b/go.sum
@@ -1,124 +1,21 @@
-github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM=
-github.com/bytedance/sonic v1.10.0-rc/go.mod h1:ElCzW+ufi8qKqNW0FY314xriJhyJhuoJ3gFZdAHF7NM=
-github.com/bytedance/sonic v1.11.4 h1:8+OMLSSDDm2/qJc6ld5K5Sm62NK9VHcUKk0NzBoMAM4=
-github.com/bytedance/sonic v1.11.4/go.mod h1:YrWEqYtlBPS6LUA0vpuG79a1trsh4Ae41uWUWUreHhE=
-github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
-github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
-github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
-github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
-github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY=
-github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk=
-github.com/chenzhuoyu/iasm v0.9.0/go.mod h1:Xjy2NpN3h7aUqeqM+woSuuvxmIe6+DDsiNLIrkAmYog=
-github.com/cloudwego/base64x v0.1.0 h1:Tg5q9tq1khq9Y9UwfoC6zkHK0FypN2GLDvhqFceOL8U=
-github.com/cloudwego/base64x v0.1.0/go.mod h1:lM8nFiNbg74QgesNo6EAtv8N9tlRjBWExmHoNDa3PkU=
-github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
-github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
-github.com/cloudwego/iasm v0.0.9/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
-github.com/cloudwego/iasm v0.1.1 h1:Py/XoYVR3xFd2pXmvmOnoS5vHTlYT9SnGK28ES8JOIk=
-github.com/cloudwego/iasm v0.1.1/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
-github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
-github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
-github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
-github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
-github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
-github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg=
-github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU=
-github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
-github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
-github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
-github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
-github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
-github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
-github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
-github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
-github.com/go-playground/validator/v10 v10.19.0 h1:ol+5Fu+cSq9JD7SoSqe04GMI92cbn0+wvQ3bZ8b/AU4=
-github.com/go-playground/validator/v10 v10.19.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
-github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8=
-github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
-github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
-github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
-github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
-github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
-github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
-github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
-github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
-github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
-github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
-github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
-github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
-github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
-github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
-github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
-github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
-github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
-github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
-github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
-github.com/pelletier/go-toml/v2 v2.2.1 h1:9TA9+T8+8CUCO2+WYnDLCgrYi9+omqKXyjDtosvtEhg=
-github.com/pelletier/go-toml/v2 v2.2.1/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
-github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM=
-github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
 github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
-github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
-github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
-github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
-github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
-github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
-github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
 github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
 github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
-github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
-github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
-github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
-github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
-golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
-golang.org/x/arch v0.7.0 h1:pskyeJh/3AmoQ8CPE95vxHLqp1G1GfGNXTmcl9NEKTc=
-golang.org/x/arch v0.7.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
-golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
-golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
-golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30=
-golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M=
-golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI=
-golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
-golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w=
-golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8=
-golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
-golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
 golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o=
-golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
 golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
-golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
-golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
-golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
-golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
-golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
-google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
-google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
-google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
-nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
-rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
--- a/handlers/handlers.go
+++ b/handlers/handlers.go
@@ -1,119 +0,0 @@
-package handlers
-
-import (
-	"github.com/gin-gonic/gin"
-	"net/http"
-	"strings"
-	"tts/utils"
-)
-
-func GetVoiceList(c *gin.Context) {
-	locale := c.Query("l")
-	voices, err := utils.VoiceList()
-	if err != nil {
-		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
-		return
-	}
-
-	if locale != "" {
-		filteredVoices := make([]interface{}, 0)
-		for _, voice := range voices {
-			if strings.Contains(voice.(map[string]interface{})["Locale"].(string), locale) {
-				filteredVoices = append(filteredVoices, voice)
-			}
-		}
-		voices = filteredVoices
-	}
-
-	_, detail := c.GetQuery("d")
-	if detail {
-		c.JSON(http.StatusOK, gin.H{"voices": voices})
-	} else {
-		voiceSimpleList := make([]map[string]string, 0)
-		for _, voice := range voices {
-			localName := voice.(map[string]interface{})["LocalName"].(string)
-			shortName := voice.(map[string]interface{})["ShortName"].(string)
-			voiceSimpleList = append(voiceSimpleList, map[string]string{
-				"LocalName": localName,
-				"ShortName": shortName,
-			})
-		}
-		c.JSON(http.StatusOK, gin.H{"voices": voiceSimpleList})
-	}
-
-}
-
-func SynthesizeVoice(c *gin.Context) {
-	text := c.Query("t")
-	voiceName := c.DefaultQuery("v", "zh-CN-XiaoxiaoMultilingualNeural")
-	rate := c.DefaultQuery("r", "0")
-	pitch := c.DefaultQuery("p", "0")
-	outputFormat := c.DefaultQuery("o", "audio-24khz-48kbitrate-mono-mp3")
-
-	voice, err := utils.GetVoice(text, voiceName, rate, pitch, outputFormat, c.Query("s"))
-	if err != nil {
-		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
-		return
-	}
-
-	c.Data(http.StatusOK, "audio/mpeg", voice)
-}
-
-func Index(c *gin.Context) {
-	c.HTML(http.StatusOK, "index.html", gin.H{
-		"title": "TTS",
-	})
-}
-
-func ApiDoc(c *gin.Context) {
-	c.HTML(http.StatusOK, "api-doc.html", gin.H{
-		"title": "TTS",
-	})
-}
-
-type SynthesizeVoiceRequest struct {
-	Text         string `json:"t"`
-	VoiceName    string `json:"v"`
-	Rate         string `json:"r"`
-	Pitch        string `json:"p"`
-	OutputFormat string `json:"o"`
-	Style        string `json:"s"`
-}
-
-type SynthesizeVoiceOpenAIRequest struct {
-	Model string `json:"model"`
-	Input string `json:"input"`
-	Voice string `json:"voice"`
-}
-
-func SynthesizeVoicePost(c *gin.Context) {
-	var request SynthesizeVoiceRequest
-	if err := c.BindJSON(&request); err != nil {
-		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
-		return
-	}
-
-	voice, err := utils.GetVoice(request.Text, request.VoiceName, request.Rate, request.Pitch, request.OutputFormat, request.Style)
-	if err != nil {
-		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
-		return
-	}
-
-	c.Data(http.StatusOK, "audio/mpeg", voice)
-}
-
-func SynthesizeVoiceOpenAI(c *gin.Context) {
-	var request SynthesizeVoiceOpenAIRequest
-	if err := c.BindJSON(&request); err != nil {
-		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
-		return
-	}
-
-	voice, err := utils.GetVoice(request.Input, request.Voice, c.Query("r"), c.Query("p"), c.Query("o"), c.Query("s"))
-
-	if err != nil {
-		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
-		return
-	}
-	c.Data(http.StatusOK, "audio/mpeg", voice)
-}
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -0,0 +1,129 @@
+package config
+
+import (
+	"fmt"
+	"os"
+	"sync"
+
+	"gopkg.in/yaml.v3"
+)
+
+// Config 包含应用程序的所有配置
+type Config struct {
+	Server ServerConfig `yaml:"server"`
+	TTS    TTSConfig    `yaml:"tts"`
+}
+
+// ServerConfig 包含HTTP服务器配置
+type ServerConfig struct {
+	Port         int    `yaml:"port"`
+	ReadTimeout  int    `yaml:"read_timeout"`  // 单位：秒
+	WriteTimeout int    `yaml:"write_timeout"` // 单位：秒
+	BasePath     string `yaml:"base_path"`
+}
+
+// TTSConfig 包含Microsoft TTS API配置
+type TTSConfig struct {
+	APIKey            string            `yaml:"api_key"`
+	Region            string            `yaml:"region"`
+	DefaultVoice      string            `yaml:"default_voice"`
+	DefaultRate       string            `yaml:"default_rate"`
+	DefaultPitch      string            `yaml:"default_pitch"`
+	DefaultFormat     string            `yaml:"default_format"`
+	MaxTextLength     int               `yaml:"max_text_length"`
+	RequestTimeout    int               `yaml:"request_timeout"` // 单位：秒
+	MaxConcurrent     int               `yaml:"max_concurrent"`
+	SegmentThreshold  int               `yaml:"segment_threshold"`
+	MinSentenceLength int               `yaml:"min_sentence_length"`
+	MaxSentenceLength int               `yaml:"max_sentence_length"`
+	VoiceMapping      map[string]string `yaml:"voice_mapping"` // OpenAI声音到Azure声音的映射
+}
+
+var (
+	config Config
+	once   sync.Once
+)
+
+// Load 从指定路径加载配置文件
+func Load(configPath string) (*Config, error) {
+	var err error
+	once.Do(func() {
+		// 设置默认配置
+		setDefaults()
+
+		// 从配置文件加载
+		if configPath != "" {
+			err = loadFromFile(configPath)
+			if err != nil {
+				err = fmt.Errorf("加载配置文件失败: %w", err)
+				return
+			}
+		}
+
+		// 从环境变量覆盖
+		overrideFromEnv()
+	})
+
+	if err != nil {
+		return nil, err
+	}
+
+	return &config, nil
+}
+
+// 设置默认配置值
+func setDefaults() {
+	config = Config{
+		Server: ServerConfig{
+			Port:         8080,
+			ReadTimeout:  30,
+			WriteTimeout: 30,
+			BasePath:     "",
+		},
+		TTS: TTSConfig{
+			DefaultVoice:      "zh-CN-XiaoxiaoNeural",
+			DefaultRate:       "0%",
+			DefaultPitch:      "0%",
+			DefaultFormat:     "audio-24khz-48kbitrate-mono-mp3",
+			MaxTextLength:     5000,
+			RequestTimeout:    30,
+			MaxConcurrent:     10,
+			SegmentThreshold:  500,
+			MinSentenceLength: 200,
+			MaxSentenceLength: 300,
+			VoiceMapping:      make(map[string]string),
+		},
+	}
+}
+
+// 从配置文件加载配置
+func loadFromFile(path string) error {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return err
+	}
+
+	return yaml.Unmarshal(data, &config)
+}
+
+// 从环境变量中覆盖配置
+func overrideFromEnv() {
+	if port := os.Getenv("TTS_SERVER_PORT"); port != "" {
+		fmt.Sscanf(port, "%d", &config.Server.Port)
+	}
+
+	if apiKey := os.Getenv("TTS_API_KEY"); apiKey != "" {
+		config.TTS.APIKey = apiKey
+	}
+
+	if region := os.Getenv("TTS_API_REGION"); region != "" {
+		config.TTS.Region = region
+	}
+
+	// 可以添加更多环境变量覆盖
+}
+
+// Get 返回已加载的配置
+func Get() *Config {
+	return &config
+}
--- a/internal/http/handlers/pages.go
+++ b/internal/http/handlers/pages.go
@@ -0,0 +1,76 @@
+package handlers
+
+import (
+	"html/template"
+	"net/http"
+	"path/filepath"
+
+	"tts/internal/config"
+)
+
+// PagesHandler 处理页面请求
+type PagesHandler struct {
+	templates *template.Template
+	config    *config.Config
+}
+
+// NewPagesHandler 创建一个新的页面处理器
+func NewPagesHandler(templatesDir string, cfg *config.Config) (*PagesHandler, error) {
+	// 解析所有模板文件
+	templates, err := template.ParseGlob(filepath.Join(templatesDir, "*.html"))
+	if err != nil {
+		return nil, err
+	}
+
+	return &PagesHandler{
+		templates: templates,
+		config:    cfg,
+	}, nil
+}
+
+// HandleIndex 处理首页请求
+func (h *PagesHandler) HandleIndex(w http.ResponseWriter, r *http.Request) {
+	// 如果不是根路径，返回404
+	if r.URL.Path != "/" && r.URL.Path != "/index.html" {
+		http.NotFound(w, r)
+		return
+	}
+
+	// 准备模板数据
+	data := map[string]interface{}{
+		"BasePath":     h.config.Server.BasePath,
+		"DefaultVoice": h.config.TTS.DefaultVoice,
+		"DefaultRate":  h.config.TTS.DefaultRate,
+		"DefaultPitch": h.config.TTS.DefaultPitch,
+	}
+
+	// 设置内容类型
+	w.Header().Set("Content-Type", "text/html; charset=utf-8")
+
+	// 渲染模板
+	if err := h.templates.ExecuteTemplate(w, "index.html", data); err != nil {
+		http.Error(w, "模板渲染失败: "+err.Error(), http.StatusInternalServerError)
+		return
+	}
+}
+
+// HandleAPIDoc 处理API文档请求
+func (h *PagesHandler) HandleAPIDoc(w http.ResponseWriter, r *http.Request) {
+	// 准备模板数据
+	data := map[string]interface{}{
+		"BasePath":      h.config.Server.BasePath,
+		"DefaultVoice":  h.config.TTS.DefaultVoice,
+		"DefaultRate":   h.config.TTS.DefaultRate,
+		"DefaultPitch":  h.config.TTS.DefaultPitch,
+		"DefaultFormat": h.config.TTS.DefaultFormat,
+	}
+
+	// 设置内容类型
+	w.Header().Set("Content-Type", "text/html; charset=utf-8")
+
+	// 渲染模板
+	if err := h.templates.ExecuteTemplate(w, "api-doc.html", data); err != nil {
+		http.Error(w, "模板渲染失败: "+err.Error(), http.StatusInternalServerError)
+		return
+	}
+}
--- a/internal/http/handlers/tts.go
+++ b/internal/http/handlers/tts.go
@@ -0,0 +1,553 @@
+package handlers
+
+import (
+	"encoding/json"
+	"fmt"
+	"log"
+	"net/http"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"sync"
+	"time"
+	"tts/internal/config"
+	"tts/internal/models"
+	"tts/internal/tts"
+	"unicode/utf8"
+)
+
+// TTSHandler 处理TTS请求
+type TTSHandler struct {
+	ttsService tts.Service
+	config     *config.Config
+}
+
+// NewTTSHandler 创建一个新的TTS处理器
+func NewTTSHandler(service tts.Service, cfg *config.Config) *TTSHandler {
+	return &TTSHandler{
+		ttsService: service,
+		config:     cfg,
+	}
+}
+
+// HandleOpenAITTS 处理OpenAI兼容的TTS请求
+func (h *TTSHandler) HandleOpenAITTS(w http.ResponseWriter, r *http.Request) {
+	// 记录请求开始时间
+	startTime := time.Now()
+
+	// 只支持POST请求
+	if r.Method != http.MethodPost {
+		http.Error(w, "仅支持POST请求", http.StatusMethodNotAllowed)
+		return
+	}
+
+	// 解析请求
+	var openaiReq struct {
+		Model string  `json:"model"`
+		Input string  `json:"input"`
+		Voice string  `json:"voice"`
+		Speed float64 `json:"speed"`
+	}
+
+	if err := json.NewDecoder(r.Body).Decode(&openaiReq); err != nil {
+		http.Error(w, "无效的JSON请求: "+err.Error(), http.StatusBadRequest)
+		return
+	}
+
+	// 记录解析时间
+	parseTime := time.Since(startTime)
+
+	// 检查必需字段
+	if openaiReq.Input == "" {
+		http.Error(w, "input字段不能为空", http.StatusBadRequest)
+		return
+	}
+
+	// 映射OpenAI声音到Microsoft声音
+	msVoice := h.config.TTS.DefaultVoice
+	if openaiReq.Voice != "" {
+		// 检查是否有配置映射
+		if mappedVoice, exists := h.config.TTS.VoiceMapping[openaiReq.Voice]; exists {
+			msVoice = mappedVoice
+		}
+	}
+
+	// 转换速度参数到微软格式
+	msRate := h.config.TTS.DefaultRate
+	if openaiReq.Speed != 0 {
+		// OpenAI速度转换为微软速度格式
+		// OpenAI: 0.5(慢速), 1.0(正常), 2.0(快速)
+		// 微软: "-50%"(慢), "+0%"(中), "+100%"(快)
+		speedPercentage := (openaiReq.Speed - 1.0) * 100
+		if speedPercentage >= 0 {
+			msRate = fmt.Sprintf("+%.0f", speedPercentage)
+		} else {
+			msRate = fmt.Sprintf("%.0f", speedPercentage)
+		}
+	}
+
+	// 创建内部TTS请求
+	req := models.TTSRequest{
+		Text:  openaiReq.Input,
+		Voice: msVoice,
+		Rate:  msRate,
+		Pitch: h.config.TTS.DefaultPitch,
+	}
+
+	log.Printf("OpenAI TTS请求: model=%s, voice=%s → %s, speed=%.2f → %s, 文本长度=%d",
+		openaiReq.Model, openaiReq.Voice, msVoice, openaiReq.Speed, msRate, len(req.Text))
+
+	// 检查文本长度
+	if len(req.Text) > h.config.TTS.MaxTextLength {
+		http.Error(w, "文本长度超过限制", http.StatusBadRequest)
+		return
+	}
+
+	// 检查是否需要分段处理
+	segmentThreshold := h.config.TTS.SegmentThreshold
+	if len(req.Text) > segmentThreshold && len(req.Text) <= h.config.TTS.MaxTextLength {
+		log.Printf("文本长度 %d 超过阈值 %d，使用分段处理", len(req.Text), segmentThreshold)
+		// 使用分段处理
+		h.handleSegmentedTTS(w, r, req)
+		return
+	}
+
+	// 非流式模式处理
+	synthStart := time.Now()
+	resp, err := h.ttsService.SynthesizeSpeech(r.Context(), req)
+	synthTime := time.Since(synthStart)
+	log.Printf("TTS合成耗时: %v, 文本长度: %d", synthTime, len(req.Text))
+
+	if err != nil {
+		http.Error(w, "语音合成失败: "+err.Error(), http.StatusInternalServerError)
+		return
+	}
+
+	// 设置响应
+	w.Header().Set("Content-Type", "audio/mpeg")
+	writeStart := time.Now()
+	w.Write(resp.AudioContent)
+	writeTime := time.Since(writeStart)
+
+	// 记录总耗时
+	totalTime := time.Since(startTime)
+	log.Printf("OpenAI TTS请求总耗时: %v (解析: %v, 合成: %v, 写入: %v), 音频大小: %s",
+		totalTime, parseTime, synthTime, writeTime, formatFileSize(len(resp.AudioContent)))
+}
+
+// HandleTTS 处理TTS请求
+func (h *TTSHandler) HandleTTS(w http.ResponseWriter, r *http.Request) {
+	// 记录请求开始时间
+	startTime := time.Now()
+
+	// 解析请求参数
+	var req models.TTSRequest
+
+	switch r.Method {
+	case http.MethodGet:
+		// 从URL参数获取
+		q := r.URL.Query()
+		req = models.TTSRequest{
+			Text:  q.Get("t"),
+			Voice: q.Get("v"),
+			Rate:  q.Get("r"),
+			Pitch: q.Get("p"),
+		}
+	case http.MethodPost:
+		// 从POST JSON体获取
+		if r.Header.Get("Content-Type") == "application/json" {
+			if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+				log.Printf("JSON解析错误: %v", err)
+				http.Error(w, "无效的JSON请求", http.StatusBadRequest)
+				return
+			}
+		} else {
+			// 表单数据
+			if err := r.ParseForm(); err != nil {
+				log.Printf("表单解析错误: %v", err)
+				http.Error(w, "无法解析表单数据", http.StatusBadRequest)
+				return
+			}
+			req = models.TTSRequest{
+				Text:  r.FormValue("text"),
+				Voice: r.FormValue("voice"),
+				Rate:  r.FormValue("rate"),
+				Pitch: r.FormValue("pitch"),
+			}
+		}
+	default:
+		log.Printf("不支持的HTTP方法: %s", r.Method)
+		http.Error(w, "仅支持GET和POST请求", http.StatusMethodNotAllowed)
+		return
+	}
+
+	// 记录参数解析耗时
+	parseTime := time.Since(startTime)
+	log.Printf("请求参数解析耗时: %v", parseTime)
+
+	// 验证必要参数
+	if req.Text == "" {
+		log.Print("错误: 未提供文本参数")
+		http.Error(w, "必须提供文本参数", http.StatusBadRequest)
+		return
+	}
+
+	// 使用默认值填充空白参数
+	if req.Voice == "" {
+		req.Voice = h.config.TTS.DefaultVoice
+	}
+	if req.Rate == "" {
+		req.Rate = h.config.TTS.DefaultRate
+	}
+	if req.Pitch == "" {
+		req.Pitch = h.config.TTS.DefaultPitch
+	}
+
+	// 检查文本长度
+	if len(req.Text) > h.config.TTS.MaxTextLength {
+		http.Error(w, "文本长度超过限制", http.StatusBadRequest)
+		return
+	}
+
+	// 检查是否需要分段处理
+	segmentThreshold := h.config.TTS.SegmentThreshold
+	if len(req.Text) > segmentThreshold && len(req.Text) <= h.config.TTS.MaxTextLength {
+		log.Printf("文本长度 %d 超过阈值 %d，使用分段处理", len(req.Text), segmentThreshold)
+		// 如果文本长度超过阈值但小于最大限制，使用分段处理
+		h.handleSegmentedTTS(w, r, req)
+		return
+	}
+
+	// 非流式模式处理（保持原有逻辑）
+	synthStart := time.Now()
+	resp, err := h.ttsService.SynthesizeSpeech(r.Context(), req)
+	synthTime := time.Since(synthStart)
+	log.Printf("TTS合成耗时: %v, 文本长度: %d", synthTime, len(req.Text))
+
+	if err != nil {
+		http.Error(w, "语音合成失败: "+err.Error(), http.StatusInternalServerError)
+		return
+	}
+
+	// 设置响应
+	w.Header().Set("Content-Type", "audio/mpeg")
+	writeStart := time.Now()
+	w.Write(resp.AudioContent)
+	writeTime := time.Since(writeStart)
+
+	// 记录总耗时
+	totalTime := time.Since(startTime)
+	log.Printf("TTS请求总耗时: %v (解析: %v, 合成: %v, 写入: %v), 音频大小: %s",
+		totalTime, parseTime, synthTime, writeTime, formatFileSize(len(resp.AudioContent)))
+}
+
+// handleSegmentedTTS 处理长文本的分段TTS请求
+func (h *TTSHandler) handleSegmentedTTS(w http.ResponseWriter, r *http.Request, req models.TTSRequest) {
+	segmentStart := time.Now() // 分段处理开始时间
+	text := req.Text
+
+	// 开始计时：分割文本
+	splitStart := time.Now()
+	// 按句子分段处理
+	sentences := splitTextBySentences(text)
+	segmentCount := len(sentences)
+	splitTime := time.Since(splitStart)
+
+	log.Printf("分割文本耗时: %v, 文本总长度: %d, 分段数: %d, 平均句子长度: %.2f",
+		splitTime, len(text), segmentCount, float64(len(text))/float64(segmentCount))
+
+	// 创建用于存储每段音频的切片
+	results := make([][]byte, segmentCount)
+	errChan := make(chan error, segmentCount)
+	var wg sync.WaitGroup
+
+	// 限制并发数量，避免创建过多goroutine
+	maxConcurrent := h.config.TTS.MaxConcurrent
+	semaphore := make(chan struct{}, maxConcurrent)
+
+	// 用于记录每个分段处理的时间
+	segmentTimes := make([]time.Duration, segmentCount)
+
+	// 合成阶段开始时间
+	synthesisStart := time.Now()
+
+	// 并发处理每一个句子
+	for i := 0; i < segmentCount; i++ {
+		wg.Add(1)
+		semaphore <- struct{}{} // 获取信号量
+		go func(index int) {
+			defer wg.Done()
+			defer func() { <-semaphore }() // 释放信号量
+
+			// 创建该句的请求
+			segReq := models.TTSRequest{
+				Text:  sentences[index],
+				Voice: req.Voice,
+				Rate:  req.Rate,
+				Pitch: req.Pitch,
+			}
+
+			log.Printf("开始处理句子 #%d: 长度=%d, 内容='%s'",
+				index+1,
+				utf8.RuneCountInString(sentences[index]),
+				truncateForLog(sentences[index], 20))
+
+			// 记录该段合成开始时间
+			segStart := time.Now()
+
+			// 合成该段音频
+			resp, err := h.ttsService.SynthesizeSpeech(r.Context(), segReq)
+
+			// 记录该段合成耗时
+			segTime := time.Since(segStart)
+			segmentTimes[index] = segTime
+
+			if err != nil {
+				log.Printf("句子 #%d 合成失败，耗时: %v, 错误: %v", index+1, segTime, err)
+				select {
+				case errChan <- fmt.Errorf("句子 %d 合成失败: %w", index+1, err):
+				default:
+					// 已经有错误了，忽略
+				}
+				return
+			}
+
+			log.Printf("句子 #%d 合成成功:长度=%d, 耗时=%v, 音频大小=%s",
+				index+1, utf8.RuneCountInString(sentences[index]), segTime, formatFileSize(len(resp.AudioContent)))
+
+			// 存储该段结果
+			results[index] = resp.AudioContent
+		}(i)
+	}
+
+	// 等待所有goroutine完成
+	wg.Wait()
+	close(errChan)
+
+	// 记录所有分段合成总耗时
+	synthesisTime := time.Since(synthesisStart)
+	log.Printf("所有分段合成总耗时: %v, 平均每段耗时: %v",
+		synthesisTime, synthesisTime/time.Duration(segmentCount))
+
+	// 检查是否有错误发生
+	if err := <-errChan; err != nil {
+		http.Error(w, "语音合成失败: "+err.Error(), http.StatusInternalServerError)
+		return
+	}
+
+	// 记录写入开始时间
+	writeStart := time.Now()
+
+	var audioData []byte
+	var err error
+
+	audioData, err = audioMerge(results)
+
+	if err != nil {
+		log.Printf("合并音频失败: %v", err)
+		http.Error(w, "音频合并失败: "+err.Error(), http.StatusInternalServerError)
+		return
+	}
+
+	// 设置响应内容类型
+	w.Header().Set("Content-Type", "audio/mpeg")
+
+	// 写入合并后的音频数据
+	totalSize := len(audioData)
+	if _, writeErr := w.Write(audioData); writeErr != nil {
+		log.Printf("写入响应失败: %v", writeErr)
+	}
+
+	// 记录写入耗时
+	writeTime := time.Since(writeStart)
+
+	// 记录总耗时
+	totalTime := time.Since(segmentStart)
+	log.Printf("分段TTS请求总耗时: %v (分割: %v, 合成: %v, 写入: %v), 总音频大小: %s",
+		totalTime, splitTime, synthesisTime, writeTime, formatFileSize(totalSize))
+}
+
+// splitTextBySentences 将文本按句子分割
+func splitTextBySentences(text string) []string {
+	// 定义句子结束的标点符号
+	sentenceEnders := []string{"。", "！", "？", "…", ".", "!", "?", "…", "\n"}
+
+	// 如果文本过短，直接作为一个句子返回
+	if utf8.RuneCountInString(text) < 100 {
+		return []string{text}
+	}
+
+	var sentences []string
+	var currentSentence strings.Builder
+	maxSentenceLength := config.Get().TTS.MaxSentenceLength // 设置单个句子的最大长度，避免过长句子
+	runeCount := 0                                          // 当前句子的实际字符数量
+
+	for _, char := range text {
+		currentSentence.WriteRune(char)
+		runeCount++
+
+		// 检查是否到达句子结束标点
+		lastChar := string(char)
+		isSentenceEnder := false
+		for _, ender := range sentenceEnders {
+			if lastChar == ender {
+				isSentenceEnder = true
+				break
+			}
+		}
+
+		// 判断是否结束一个句子 - 使用字符数量而非字节长度
+		if isSentenceEnder || runeCount >= maxSentenceLength {
+			// 添加当前句子到结果中
+			sentence := currentSentence.String()
+			if len(sentence) > 0 {
+				sentences = append(sentences, sentence)
+			}
+			currentSentence.Reset() // 重置构建器
+			runeCount = 0           // 重置字符计数器
+		}
+	}
+
+	// 处理可能的最后一个句子
+	if currentSentence.Len() > 0 {
+		lastSentence := currentSentence.String()
+		sentences = append(sentences, lastSentence)
+	}
+
+	// 合并过短的句子
+	minSentenceLength := config.Get().TTS.MinSentenceLength // 设置最小句子长度阈值
+
+	if len(sentences) > 1 {
+		mergedSentences := []string{}
+		var currentMerged strings.Builder
+		currentMergedLength := 0
+
+		for i, sentence := range sentences {
+			sentenceLength := utf8.RuneCountInString(sentence)
+
+			// 如果当前句子太短，且不是最后一个，考虑合并
+			if sentenceLength < minSentenceLength && i < len(sentences)-1 {
+				// 检查合并后是否会超过最大长度
+				if currentMergedLength+sentenceLength > maxSentenceLength {
+					// 合并后会超长，先保存当前内容
+					if currentMerged.Len() > 0 {
+						mergedSentences = append(mergedSentences, currentMerged.String())
+						currentMerged.Reset()
+						currentMergedLength = 0
+					}
+				}
+
+				// 当前句子过短，添加到合并缓冲区
+				currentMerged.WriteString(sentence)
+				currentMergedLength += sentenceLength
+			} else {
+				// 句子足够长或是最后一句
+				if currentMerged.Len() > 0 {
+					// 检查合并后是否会超过最大长度
+					if currentMergedLength+sentenceLength <= maxSentenceLength {
+						// 有待合并的内容，将当前句子也合并进去
+						currentMerged.WriteString(sentence)
+						mergedSentence := currentMerged.String()
+						mergedSentences = append(mergedSentences, mergedSentence)
+					} else {
+						// 合并后会超长，分别添加
+						mergedSentence := currentMerged.String()
+						mergedSentences = append(mergedSentences, mergedSentence)
+						mergedSentences = append(mergedSentences, sentence)
+					}
+					currentMerged.Reset()
+					currentMergedLength = 0
+				} else {
+					// 没有待合并内容，直接添加当前句子
+					mergedSentences = append(mergedSentences, sentence)
+				}
+			}
+		}
+
+		// 处理可能剩余的合并内容
+		if currentMerged.Len() > 0 {
+			mergedSentence := currentMerged.String()
+			mergedSentences = append(mergedSentences, mergedSentence)
+			log.Printf("添加最后剩余的合并句子，长度=%d", utf8.RuneCountInString(mergedSentence))
+		}
+
+		return mergedSentences
+	}
+
+	return sentences
+}
+
+// truncateForLog 截断文本用于日志显示，同时显示开头和结尾
+func truncateForLog(text string, maxLength int) string {
+	// 先去除换行符
+	text = strings.ReplaceAll(text, "\n", " ")
+	text = strings.ReplaceAll(text, "\r", " ")
+
+	runes := []rune(text)
+	if len(runes) <= maxLength {
+		return text
+	}
+	// 计算开头和结尾各显示多少字符
+	halfLength := maxLength / 2
+	return string(runes[:halfLength]) + "..." + string(runes[len(runes)-halfLength:])
+}
+
+// audioMerge 音频合并
+func audioMerge(audioSegments [][]byte) ([]byte, error) {
+	if len(audioSegments) == 0 {
+		return nil, fmt.Errorf("没有音频片段可合并")
+	}
+
+	// 使用 ffmpeg 合并音频
+	tempDir, err := os.MkdirTemp("", "audio_merge_")
+	if err != nil {
+		return nil, err
+	}
+	defer os.RemoveAll(tempDir)
+
+	listFile := filepath.Join(tempDir, "concat.txt")
+	lf, err := os.Create(listFile)
+	if err != nil {
+		return nil, err
+	}
+
+	for i, seg := range audioSegments {
+		segFile := filepath.Join(tempDir, fmt.Sprintf("seg_%d.mp3", i))
+		if err := os.WriteFile(segFile, seg, 0644); err != nil {
+			return nil, err
+		}
+		if _, err := lf.WriteString(fmt.Sprintf("file '%s'\n", segFile)); err != nil {
+			return nil, err
+		}
+	}
+	lf.Close()
+
+	outputFile := filepath.Join(tempDir, "output.mp3")
+
+	cmd := exec.Command("ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", listFile, "-c", "copy", outputFile)
+	if err := cmd.Run(); err != nil {
+		return nil, err
+	}
+
+	mergedData, err := os.ReadFile(outputFile)
+	if err != nil {
+		return nil, err
+	}
+	log.Printf("使用ffmpeg合并完成，总大小: %s", formatFileSize(len(mergedData)))
+	return mergedData, nil
+}
+
+// formatFileSize 格式化文件大小
+func formatFileSize(size int) string {
+	switch {
+	case size < 1024:
+		return fmt.Sprintf("%d B", size)
+	case size < 1024*1024:
+		return fmt.Sprintf("%.2f KB", float64(size)/1024.0)
+	case size < 1024*1024*1024:
+		return fmt.Sprintf("%.2f MB", float64(size)/(1024.0*1024.0))
+	default:
+		return fmt.Sprintf("%.2f GB", float64(size)/(1024.0*1024.0*1024.0))
+	}
+}
--- a/internal/http/handlers/voices.go
+++ b/internal/http/handlers/voices.go
@@ -0,0 +1,41 @@
+package handlers
+
+import (
+	"encoding/json"
+	"net/http"
+	"tts/internal/tts"
+)
+
+// VoicesHandler 处理语音列表请求
+type VoicesHandler struct {
+	ttsService tts.Service
+}
+
+// NewVoicesHandler 创建一个新的语音列表处理器
+func NewVoicesHandler(service tts.Service) *VoicesHandler {
+	return &VoicesHandler{
+		ttsService: service,
+	}
+}
+
+// HandleVoices 处理语音列表请求
+func (h *VoicesHandler) HandleVoices(w http.ResponseWriter, r *http.Request) {
+	// 从查询参数中获取语言筛选
+	locale := r.URL.Query().Get("locale")
+
+	// 获取语音列表
+	voices, err := h.ttsService.ListVoices(r.Context(), locale)
+	if err != nil {
+		http.Error(w, "获取语音列表失败: "+err.Error(), http.StatusInternalServerError)
+		return
+	}
+
+	// 设置内容类型
+	w.Header().Set("Content-Type", "application/json")
+
+	// 编码为JSON并返回
+	if err := json.NewEncoder(w).Encode(voices); err != nil {
+		http.Error(w, "JSON编码失败", http.StatusInternalServerError)
+		return
+	}
+}
--- a/internal/http/middleware/cors.go
+++ b/internal/http/middleware/cors.go
@@ -0,0 +1,22 @@
+package middleware
+
+import "net/http"
+
+// CORS 处理跨域资源共享
+func CORS(next http.Handler) http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		// 设置CORS响应头
+		w.Header().Set("Access-Control-Allow-Origin", "*")
+		w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
+		w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization")
+
+		// 如果是预检请求，直接返回200
+		if r.Method == http.MethodOptions {
+			w.WriteHeader(http.StatusOK)
+			return
+		}
+
+		// 继续下一个处理器
+		next.ServeHTTP(w, r)
+	})
+}
--- a/internal/http/middleware/logger.go
+++ b/internal/http/middleware/logger.go
@@ -0,0 +1,46 @@
+package middleware
+
+import (
+	"log"
+	"net/http"
+	"time"
+)
+
+// Logger 是一个HTTP中间件，记录请求的详细信息
+func Logger(next http.Handler) http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		start := time.Now()
+
+		// 包装ResponseWriter以捕获状态码
+		wrapper := &responseWriterWrapper{
+			ResponseWriter: w,
+			statusCode:     http.StatusOK,
+		}
+
+		// 调用下一个处理器
+		next.ServeHTTP(wrapper, r)
+
+		// 记录请求信息
+		duration := time.Since(start)
+		log.Printf(
+			"[%s] %s %s %d %s",
+			r.Method,
+			r.RequestURI,
+			r.RemoteAddr,
+			wrapper.statusCode,
+			duration,
+		)
+	})
+}
+
+// responseWriterWrapper 包装http.ResponseWriter以捕获状态码
+type responseWriterWrapper struct {
+	http.ResponseWriter
+	statusCode int
+}
+
+// WriteHeader 捕获状态码
+func (w *responseWriterWrapper) WriteHeader(statusCode int) {
+	w.statusCode = statusCode
+	w.ResponseWriter.WriteHeader(statusCode)
+}
--- a/internal/http/server/app.go
+++ b/internal/http/server/app.go
@@ -0,0 +1,83 @@
+package server
+
+import (
+	"context"
+	"fmt"
+	"log"
+	"os"
+	"os/signal"
+	"syscall"
+	"time"
+	"tts/internal/config"
+)
+
+// App 表示整个TTS应用程序
+type App struct {
+	server *Server
+	cfg    *config.Config
+}
+
+// NewApp 创建一个新的应用程序实例
+func NewApp(configPath string) (*App, error) {
+	// 加载配置
+	cfg, err := config.Load(configPath)
+	if err != nil {
+		return nil, fmt.Errorf("加载配置失败: %w", err)
+	}
+
+	// 初始化服务
+	ttsService, err := InitializeServices(cfg)
+	if err != nil {
+		return nil, fmt.Errorf("初始化服务失败: %w", err)
+	}
+
+	// 设置路由
+	handler, err := SetupRoutes(cfg, ttsService)
+	if err != nil {
+		return nil, fmt.Errorf("设置路由失败: %w", err)
+	}
+
+	// 创建HTTP服务器
+	server := New(cfg, handler)
+
+	return &App{
+		server: server,
+		cfg:    cfg,
+	}, nil
+}
+
+// Start 启动应用程序
+func (a *App) Start() error {
+	// 创建一个错误通道
+	errChan := make(chan error, 1)
+
+	// 创建一个退出信号通道
+	quit := make(chan os.Signal, 1)
+	signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
+
+	// 在一个goroutine中启动服务器
+	go func() {
+		log.Printf("启动TTS服务，监听端口 %d...\n", a.cfg.Server.Port)
+		errChan <- a.server.Start()
+	}()
+
+	// 等待退出信号或错误
+	select {
+	case err := <-errChan:
+		return err
+	case <-quit:
+		log.Println("接收到退出信号，正在优雅关闭...")
+
+		// 创建一个超时上下文用于优雅关闭
+		ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+		defer cancel()
+
+		// 尝试优雅关闭服务器
+		if err := a.server.Shutdown(ctx); err != nil {
+			return fmt.Errorf("服务器关闭出错: %w", err)
+		}
+
+		log.Println("服务器已优雅关闭")
+		return nil
+	}
+}
--- a/internal/http/server/routes.go
+++ b/internal/http/server/routes.go
@@ -0,0 +1,65 @@
+package server
+
+import (
+	"net/http"
+	"tts/internal/config"
+	"tts/internal/http/handlers"
+	"tts/internal/http/middleware"
+	"tts/internal/tts"
+	"tts/internal/tts/microsoft"
+)
+
+// SetupRoutes 配置所有API路由
+func SetupRoutes(cfg *config.Config, ttsService tts.Service) (http.Handler, error) {
+	// 创建一个新的路由多路复用器
+	mux := http.NewServeMux()
+
+	// 创建处理器
+	ttsHandler := handlers.NewTTSHandler(ttsService, cfg)
+	voicesHandler := handlers.NewVoicesHandler(ttsService)
+
+	// 创建页面处理器
+	pagesHandler, err := handlers.NewPagesHandler("./web/templates", cfg)
+	if err != nil {
+		return nil, err
+	}
+
+	// 设置主页路由
+	mux.HandleFunc("/", pagesHandler.HandleIndex)
+
+	// 设置API文档路由
+	mux.HandleFunc("/api-doc", pagesHandler.HandleAPIDoc)
+
+	// 设置TTS API路由
+	mux.HandleFunc("/tts", ttsHandler.HandleTTS)
+
+	// 设置语音列表API路由
+	mux.HandleFunc("/voices", voicesHandler.HandleVoices)
+
+	mux.HandleFunc("/v1/audio/speech", ttsHandler.HandleOpenAITTS)
+	mux.HandleFunc("/audio/speech", ttsHandler.HandleOpenAITTS)
+
+	// 设置静态文件服务
+	fs := http.FileServer(http.Dir("./web/static"))
+	mux.Handle("/static/", http.StripPrefix("/static/", fs))
+
+	// 应用基础路径前缀
+	var handler http.Handler = mux
+	if cfg.Server.BasePath != "" {
+		handler = http.StripPrefix(cfg.Server.BasePath, mux)
+	}
+
+	// 应用中间件
+	handler = middleware.Logger(handler) // 日志中间件
+	handler = middleware.CORS(handler)   // CORS中间件
+
+	return handler, nil
+}
+
+// InitializeServices 初始化所有服务
+func InitializeServices(cfg *config.Config) (tts.Service, error) {
+	// 创建Microsoft TTS客户端
+	ttsClient := microsoft.NewClient(cfg)
+
+	return ttsClient, nil
+}
--- a/internal/http/server/server.go
+++ b/internal/http/server/server.go
@@ -0,0 +1,45 @@
+package server
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"time"
+
+	"tts/internal/config"
+)
+
+// Server 封装HTTP服务器
+type Server struct {
+	server   *http.Server
+	basePath string
+}
+
+// New 创建新的HTTP服务器
+func New(cfg *config.Config, handler http.Handler) *Server {
+	// 创建HTTP服务器
+	httpServer := &http.Server{
+		Addr:         fmt.Sprintf(":%d", cfg.Server.Port),
+		Handler:      handler,
+		ReadTimeout:  time.Duration(cfg.Server.ReadTimeout) * time.Second,
+		WriteTimeout: time.Duration(cfg.Server.WriteTimeout) * time.Second,
+		IdleTimeout:  120 * time.Second,
+	}
+
+	return &Server{
+		server:   httpServer,
+		basePath: cfg.Server.BasePath,
+	}
+}
+
+// Start 启动HTTP服务器
+func (s *Server) Start() error {
+	fmt.Printf("服务启动在 %s\n", s.server.Addr)
+	return s.server.ListenAndServe()
+}
+
+// Shutdown 优雅关闭服务器
+func (s *Server) Shutdown(ctx context.Context) error {
+	fmt.Println("正在关闭HTTP服务器...")
+	return s.server.Shutdown(ctx)
+}
--- a/internal/models/tts.go
+++ b/internal/models/tts.go
@@ -0,0 +1,16 @@
+package models
+
+// TTSRequest 表示一个语音合成请求
+type TTSRequest struct {
+	Text  string `json:"text"`  // 要转换的文本
+	Voice string `json:"voice"` // 语音ID
+	Rate  string `json:"rate"`  // 语速 (-100% 到 +100%)
+	Pitch string `json:"pitch"` // 语调 (-100% 到 +100%)
+}
+
+// TTSResponse 表示一个语音合成响应
+type TTSResponse struct {
+	AudioContent []byte `json:"audio_content"` // 音频数据
+	ContentType  string `json:"content_type"`  // MIME类型
+	CacheHit     bool   `json:"cache_hit"`     // 是否命中缓存
+}
--- a/internal/models/voice.go
+++ b/internal/models/voice.go
@@ -0,0 +1,14 @@
+package models
+
+// Voice 表示一个语音合成声音
+type Voice struct {
+	Name            string   `json:"name"`                 // 语音唯一标识符
+	DisplayName     string   `json:"display_name"`         // 语音显示名称
+	LocalName       string   `json:"local_name"`           // 本地化名称
+	ShortName       string   `json:"short_name"`           // 简称，例如 zh-CN-XiaoxiaoNeural
+	Gender          string   `json:"gender"`               // 性别: Female, Male
+	Locale          string   `json:"locale"`               // 语言区域, 如 zh-CN
+	LocaleName      string   `json:"locale_name"`          // 语言区域显示名称，如 中文(中国)
+	StyleList       []string `json:"style_list,omitempty"` // 支持的说话风格列表
+	SampleRateHertz string   `json:"sample_rate_hertz"`    // 采样率
+}
--- a/internal/tts/microsoft/client.go
+++ b/internal/tts/microsoft/client.go
@@ -0,0 +1,290 @@
+package microsoft
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"html"
+	"io"
+	"log"
+	"net/http"
+	"strings"
+	"sync"
+	"time"
+
+	"tts/internal/config"
+	"tts/internal/models"
+	"tts/internal/utils"
+)
+
+const (
+	userAgent      = "okhttp/4.5.0"
+	voicesEndpoint = "https://%s.tts.speech.microsoft.com/cognitiveservices/voices/list"
+	ttsEndpoint    = "https://%s.tts.speech.microsoft.com/cognitiveservices/v1"
+	ssmlTemplate   = `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xmlns:mstts="http://www.w3.org/2001/mstts" xml:lang='%s'>
+    <voice name='%s'>
+        <mstts:express-as style="general" styledegree="1.0" role="default">
+            <prosody rate='%s%%' pitch='%s%%' volume="medium">
+                %s
+            </prosody>
+        </mstts:express-as>
+    </voice>
+</speak>`
+)
+
+// Client 是Microsoft TTS API的客户端实现
+type Client struct {
+	defaultVoice      string
+	defaultRate       string
+	defaultPitch      string
+	defaultFormat     string
+	maxTextLength     int
+	httpClient        *http.Client
+	voicesCache       []models.Voice
+	voicesCacheMu     sync.RWMutex
+	voicesCacheExpiry time.Time
+
+	// 端点和认证信息
+	endpoint       map[string]interface{}
+	endpointMu     sync.RWMutex
+	endpointExpiry time.Time
+}
+
+func (c *Client) HandleOpenAITTS(w http.ResponseWriter, r *http.Request) {
+	//TODO implement me
+	panic("implement me")
+}
+
+// NewClient 创建一个新的Microsoft TTS客户端
+func NewClient(cfg *config.Config) *Client {
+	client := &Client{
+		defaultVoice:  cfg.TTS.DefaultVoice,
+		defaultRate:   cfg.TTS.DefaultRate,
+		defaultPitch:  cfg.TTS.DefaultPitch,
+		defaultFormat: cfg.TTS.DefaultFormat,
+		maxTextLength: cfg.TTS.MaxTextLength,
+		httpClient: &http.Client{
+			Timeout: time.Duration(cfg.TTS.RequestTimeout) * time.Second,
+		},
+		voicesCacheExpiry: time.Time{}, // 初始时缓存为空
+		endpointExpiry:    time.Time{}, // 初始时端点为空
+	}
+
+	return client
+}
+
+// getEndpoint 获取或刷新认证端点
+func (c *Client) getEndpoint(ctx context.Context) (map[string]interface{}, error) {
+	c.endpointMu.RLock()
+	if !c.endpointExpiry.IsZero() && time.Now().Before(c.endpointExpiry) && c.endpoint != nil {
+		endpoint := c.endpoint
+		c.endpointMu.RUnlock()
+		return endpoint, nil
+	}
+	c.endpointMu.RUnlock()
+
+	// 获取新的端点信息
+	endpoint, err := utils.GetEndpoint()
+	if err != nil {
+		return nil, err
+	}
+
+	// 更新缓存
+	c.endpointMu.Lock()
+	c.endpoint = endpoint
+	c.endpointExpiry = time.Now().Add(45 * time.Minute) // 令牌有效期通常是1小时，提前刷新
+	c.endpointMu.Unlock()
+
+	return endpoint, nil
+}
+
+// ListVoices 获取可用的语音列表
+func (c *Client) ListVoices(ctx context.Context, locale string) ([]models.Voice, error) {
+	// 检查缓存是否有效
+	c.voicesCacheMu.RLock()
+	if !c.voicesCacheExpiry.IsZero() && time.Now().Before(c.voicesCacheExpiry) && len(c.voicesCache) > 0 {
+		voices := c.voicesCache
+		c.voicesCacheMu.RUnlock()
+
+		// 如果指定了locale，则过滤结果
+		if locale != "" {
+			var filtered []models.Voice
+			for _, voice := range voices {
+				if strings.HasPrefix(voice.Locale, locale) {
+					filtered = append(filtered, voice)
+				}
+			}
+			return filtered, nil
+		}
+		return voices, nil
+	}
+	c.voicesCacheMu.RUnlock()
+
+	// 缓存无效，需要从API获取
+	endpoint, err := c.getEndpoint(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	url := fmt.Sprintf(voicesEndpoint, endpoint["r"])
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	// 使用新的认证方式
+	req.Header.Set("Authorization", endpoint["t"].(string))
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(resp.Body)
+		return nil, fmt.Errorf("API error: %s, status: %d", string(body), resp.StatusCode)
+	}
+
+	var msVoices []MicrosoftVoice
+	if err := json.NewDecoder(resp.Body).Decode(&msVoices); err != nil {
+		return nil, err
+	}
+
+	// 转换为通用模型
+	voices := make([]models.Voice, len(msVoices))
+	for i, v := range msVoices {
+		voices[i] = models.Voice{
+			Name:            v.Name,
+			DisplayName:     v.DisplayName,
+			LocalName:       v.LocalName,
+			ShortName:       v.ShortName,
+			Gender:          v.Gender,
+			Locale:          v.Locale,
+			LocaleName:      v.LocaleName,
+			StyleList:       v.StyleList,
+			SampleRateHertz: v.SampleRateHertz, // 直接使用字符串，无需转换
+		}
+	}
+
+	// 更新缓存
+	c.voicesCacheMu.Lock()
+	c.voicesCache = voices
+	c.voicesCacheExpiry = time.Now().Add(1 * time.Hour) // 缓存1小时
+	c.voicesCacheMu.Unlock()
+
+	// 如果指定了locale，则过滤结果
+	if locale != "" {
+		var filtered []models.Voice
+		for _, voice := range voices {
+			if strings.HasPrefix(voice.Locale, locale) {
+				filtered = append(filtered, voice)
+			}
+		}
+		return filtered, nil
+	}
+
+	return voices, nil
+}
+
+// SynthesizeSpeech 将文本转换为语音
+func (c *Client) SynthesizeSpeech(ctx context.Context, req models.TTSRequest) (*models.TTSResponse, error) {
+	resp, err := c.createTTSRequest(ctx, req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	// 读取音频数据
+	audio, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, err
+	}
+
+	return &models.TTSResponse{
+		AudioContent: audio,
+		ContentType:  "audio/mpeg",
+		CacheHit:     false,
+	}, nil
+}
+
+// createTTSRequest 创建并执行TTS请求，返回HTTP响应
+func (c *Client) createTTSRequest(ctx context.Context, req models.TTSRequest) (*http.Response, error) {
+	// 参数验证
+	if req.Text == "" {
+		return nil, errors.New("文本不能为空")
+	}
+
+	if len(req.Text) > c.maxTextLength {
+		return nil, fmt.Errorf("文本长度超过限制 (%d > %d)", len(req.Text), c.maxTextLength)
+	}
+
+	// 使用默认值填充空白参数
+	voice := req.Voice
+	if voice == "" {
+		voice = c.defaultVoice
+	}
+
+	rate := req.Rate
+	if rate == "" {
+		rate = c.defaultRate
+	}
+
+	pitch := req.Pitch
+	if pitch == "" {
+		pitch = c.defaultPitch
+	}
+
+	// 提取语言
+	locale := "zh-CN" // 默认
+	parts := strings.Split(voice, "-")
+	if len(parts) >= 2 {
+		locale = parts[0] + "-" + parts[1]
+	}
+
+	// 对文本进行HTML转义，防止XML解析错误
+
+	escapedText := html.EscapeString(req.Text)
+
+	// 准备SSML内容
+	ssml := fmt.Sprintf(ssmlTemplate, locale, voice, rate, pitch, escapedText)
+
+	// 获取端点信息
+	endpoint, err := c.getEndpoint(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	// 准备请求
+	url := fmt.Sprintf(ttsEndpoint, endpoint["r"])
+	reqBody := bytes.NewBufferString(ssml)
+
+	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, reqBody)
+	if err != nil {
+		return nil, err
+	}
+
+	httpReq.Header.Set("Authorization", endpoint["t"].(string))
+	httpReq.Header.Set("Content-Type", "application/ssml+xml")
+	httpReq.Header.Set("X-Microsoft-OutputFormat", c.defaultFormat)
+	httpReq.Header.Set("User-Agent", userAgent)
+
+	// 发送请求
+	resp, err := c.httpClient.Do(httpReq)
+
+	if err != nil {
+		return nil, err
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		// 获取响应体以便调试
+		body, _ := io.ReadAll(resp.Body)
+		resp.Body.Close()
+		log.Printf("TTS API错误: %s, 状态码: %d", string(body), resp.StatusCode)
+		return nil, fmt.Errorf("TTS API错误: %s, 状态码: %d", string(body), resp.StatusCode)
+	}
+
+	return resp, nil
+}
--- a/internal/tts/microsoft/models.go
+++ b/internal/tts/microsoft/models.go
@@ -0,0 +1,45 @@
+package microsoft
+
+// MicrosoftVoice 表示Microsoft TTS服务中的一个语音
+type MicrosoftVoice struct {
+	Name            string   `json:"Name"`
+	DisplayName     string   `json:"DisplayName"`
+	LocalName       string   `json:"LocalName"`
+	ShortName       string   `json:"ShortName"`
+	Gender          string   `json:"Gender"`
+	Locale          string   `json:"Locale"`
+	LocaleName      string   `json:"LocaleName"`
+	StyleList       []string `json:"StyleList,omitempty"`
+	SampleRateHertz string   `json:"SampleRateHertz"`
+	VoiceType       string   `json:"VoiceType"`
+	Status          string   `json:"Status"`
+}
+
+// SSMLRequest 表示发送给Microsoft TTS服务的SSML请求
+type SSMLRequest struct {
+	XMLHeader string
+	Voice     string
+	Language  string
+	Rate      string
+	Pitch     string
+	Text      string
+}
+
+// FormatContentTypeMap 定义音频格式到MIME类型的映射
+var FormatContentTypeMap = map[string]string{
+	"raw-16khz-16bit-mono-pcm":         "audio/pcm",
+	"raw-8khz-8bit-mono-mulaw":         "audio/basic",
+	"riff-8khz-8bit-mono-alaw":         "audio/alaw",
+	"riff-8khz-8bit-mono-mulaw":        "audio/mulaw",
+	"riff-16khz-16bit-mono-pcm":        "audio/wav",
+	"audio-16khz-128kbitrate-mono-mp3": "audio/mp3",
+	"audio-16khz-64kbitrate-mono-mp3":  "audio/mp3",
+	"audio-16khz-32kbitrate-mono-mp3":  "audio/mp3",
+	"raw-24khz-16bit-mono-pcm":         "audio/pcm",
+	"riff-24khz-16bit-mono-pcm":        "audio/wav",
+	"audio-24khz-160kbitrate-mono-mp3": "audio/mp3",
+	"audio-24khz-96kbitrate-mono-mp3":  "audio/mp3",
+	"audio-24khz-48kbitrate-mono-mp3":  "audio/mp3",
+	"ogg-24khz-16bit-mono-opus":        "audio/ogg",
+	"webm-24khz-16bit-mono-opus":       "audio/webm",
+}
--- a/internal/tts/service.go
+++ b/internal/tts/service.go
@@ -0,0 +1,15 @@
+package tts
+
+import (
+	"context"
+	"tts/internal/models"
+)
+
+// Service 定义TTS服务接口
+type Service interface {
+	// ListVoices 获取可用的语音列表
+	ListVoices(ctx context.Context, locale string) ([]models.Voice, error)
+
+	// SynthesizeSpeech 将文本转换为语音
+	SynthesizeSpeech(ctx context.Context, req models.TTSRequest) (*models.TTSResponse, error)
+}
--- a/internal/utils/utils.go
+++ b/internal/utils/utils.go
@@ -0,0 +1,87 @@
+package utils
+
+import (
+	"crypto/hmac"
+	"crypto/sha256"
+	"encoding/base64"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"net/url"
+	"strings"
+	"time"
+
+	"github.com/google/uuid"
+	"github.com/sirupsen/logrus"
+)
+
+var (
+	log    = logrus.New()
+	client = &http.Client{}
+)
+
+const (
+	endpointURL          = "https://dev.microsofttranslator.com/apps/endpoint?api-version=1.0"
+	userAgent            = "okhttp/4.5.0"
+	clientVersion        = "4.0.530a 5fe1dc6c"
+	userId               = "0f04d16a175c411e"
+	homeGeographicRegion = "zh-Hans-CN"
+	clientTraceId        = "aab069b9-70a7-4844-a734-96cd78d94be9"
+	voiceDecodeKey       = "oik6PdDdMnOXemTbwvMn9de/h9lFnfBaCWbGMMZqqoSaQaqUOqjVGm5NqsmjcBI1x+sS9ugjB55HEJWRiFXYFw=="
+)
+
+// GetEndpoint 获取语音合成服务的端点信息
+func GetEndpoint() (map[string]interface{}, error) {
+	signature := Sign(endpointURL)
+	headers := map[string]string{
+		"Accept-Language":        "zh-Hans",
+		"X-ClientVersion":        clientVersion,
+		"X-UserId":               userId,
+		"X-HomeGeographicRegion": homeGeographicRegion,
+		"X-ClientTraceId":        clientTraceId,
+		"X-MT-Signature":         signature,
+		"User-Agent":             userAgent,
+		"Content-Type":           "application/json; charset=utf-8",
+		"Content-Length":         "0",
+		"Accept-Encoding":        "gzip",
+	}
+	req, err := http.NewRequest("POST", endpointURL, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	for k, v := range headers {
+		req.Header.Set(k, v)
+	}
+
+	resp, err := client.Do(req)
+	if err != nil {
+		log.Error("failed to do request: ", err)
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	var result map[string]interface{}
+	err = json.NewDecoder(resp.Body).Decode(&result)
+	if err != nil {
+		return nil, err
+	}
+
+	return result, nil
+}
+
+// Sign 生成签名
+func Sign(urlStr string) string {
+	u := strings.Split(urlStr, "://")[1]
+	encodedUrl := url.QueryEscape(u)
+	uuidStr := strings.ReplaceAll(uuid.New().String(), "-", "")
+	formattedDate := strings.ToLower(time.Now().UTC().Format("Mon, 02 Jan 2006 15:04:05")) + "gmt"
+	bytesToSign := fmt.Sprintf("MSTranslatorAndroidApp%s%s%s", encodedUrl, formattedDate, uuidStr)
+	bytesToSign = strings.ToLower(bytesToSign)
+	decode, _ := base64.StdEncoding.DecodeString(voiceDecodeKey)
+	hash := hmac.New(sha256.New, decode)
+	hash.Write([]byte(bytesToSign))
+	secretKey := hash.Sum(nil)
+	signBase64 := base64.StdEncoding.EncodeToString(secretKey)
+	return fmt.Sprintf("MSTranslatorAndroidApp::%s::%s::%s", signBase64, formattedDate, uuidStr)
+}
--- a/routes/routes.go
+++ b/routes/routes.go
@@ -1,23 +0,0 @@
-package routes
-
-import (
-	"tts/handlers"
-
-	"github.com/gin-gonic/gin"
-)
-
-func SetupRouter() *gin.Engine {
-	router := gin.Default()
-
-	// 加载模板文件
-	router.LoadHTMLGlob("templates/*")
-
-	router.GET("/voices", handlers.GetVoiceList)
-	router.POST("/tts", handlers.SynthesizeVoicePost)
-	router.GET("/tts", handlers.SynthesizeVoice)
-	router.GET("/v1/audio/speech", handlers.SynthesizeVoiceOpenAI)
-	router.GET("/", handlers.Index)
-	router.GET("/doc", handlers.ApiDoc)
-
-	return router
-}
--- a/templates/api-doc.html
+++ b/templates/api-doc.html
@@ -1,37 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <title>TTS</title>
-</head>
-<body>
-<h1> 支持接口 </h1>
-<h2>语音合成</h2>
-<div>
-    <strong>/tts</strong> | GET / POST(json)
-    <a target="_blank" href="/tts?t=岂曰无衣？与子同袍。王于兴师，修我戈矛，与子同仇！岂曰无衣？与子同泽。王于兴师，修我矛戟，与子偕作！岂曰无衣？与子同裳。王于兴师，修我甲兵，与子偕行!&v=zh-CN-XiaoxiaoMultilingualNeural&r=0&p=0&o=audio-24khz-48kbitrate-mono-mp3">try</a>
-</div>
-
-
-<pre>
-参数列表：
-1. t: 文本内容 (必填)
-2. v: 语音名称 (可选), 默认为 zh-CN-XiaoxiaoMultilingualNeural
-3. r: 语速 (可选), 默认为 0
-4. p: 语调 (可选), 默认为 0
-5. o: 输出格式 (可选), 默认为audio-24khz-48kbitrate-mono-mp3
-</pre>
-
-
-<h2>声音列表</h2>
-
-<div>
-    <strong>/voices</strong> | GET <a target="_blank" href="/voices?l=zh">try</a>
-</div>
-<pre>
-参数列表：
-1. l: 语言区域 (可选), 使用 contains 匹配,如 l=zh
-2. d: 显示详细信息 (可选) , 默认为 false, 如需显示详细信息, 请添加参数d , 如 /voices?d
-</pre>
-</body>
-</html>
--- a/templates/index.html
+++ b/templates/index.html
@@ -1,121 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>TTS Demo</title>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script>
-    <script src="https://cdn.tailwindcss.com"></script>
-    <style>
-        .top-right {
-            position: absolute;
-            top: 20px;
-            right: 20px;
-        }
-    </style>
-</head>
-<body class="bg-gradient-to-r from-blue-100 to-purple-100 min-h-screen flex items-center justify-center p-4">
-<div class="top-right">
-    <a href="/doc" class="hover:underline p-2 rounded">Documentation</a>
-</div>
-<div class="bg-white p-8 rounded-xl shadow-lg w-full max-w-4xl">
-    <h1 class="text-4xl font-bold mb-8 text-center text-gray-800">语音合成演示</h1>
-
-
-    <div id="ttsForm" class="space-y-6">
-        <textarea id="textInput" rows="6" class="w-full p-4 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 text-gray-700 text-lg resize-none" placeholder="请输入要合成的文本">欢迎使用我们的语音合成演示系统。这项技术能够将文字转换成自然流畅的语音。您可以尝试调整语速和语调，体验不同的合成效果。我们提供多种语言和声音选项，满足您的各种需求。无论是阅读文章、语言学习，还是辅助视障人士，语音合成技术都能发挥重要作用。希望这个演示能让您感受到科技的魅力。祝您使用愉快！</textarea>
-
-        <div class="grid grid-cols-2 gap-4">
-            <div>
-                <label for="localeSelect" class="block text-sm font-medium text-gray-700 mb-1">语言</label>
-                <select id="localeSelect" class="w-full p-3 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 text-gray-700">
-                    <option value="zh-CN">中文 (中国)</option>
-                    <option value="en-US">English (US)</option>
-                    <option value="ja-JP">日本語 (日本)</option>
-                </select>
-            </div>
-            <div>
-                <label for="voiceSelect" class="block text-sm font-medium text-gray-700 mb-1">声音</label>
-                <select id="voiceSelect" class="w-full p-3 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 text-gray-700"></select>
-            </div>
-            <div>
-                <label for="styleSelect" class="block text-sm font-medium text-gray-700 mb-1">风格</label>
-                <select id="styleSelect" class="w-full p-3 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 text-gray-700"></select>
-            </div>
-        </div>
-
-        <div class="flex space-x-4">
-            <div class="w-1/2 space-y-2">
-                <label for="rateInput" class="block text-sm font-medium text-gray-700">语速</label>
-                <input type="range" id="rateInput" min="-100" max="100" value="0" class="w-full h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer">
-            </div>
-
-            <div class="w-1/2 space-y-2">
-                <label for="pitchInput" class="block text-sm font-medium text-gray-700">语调</label>
-                <input type="range" id="pitchInput" min="-100" max="100" value="0" class="w-full h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer">
-            </div>
-        </div>
-
-        <button id="synthesizeButton" class="w-full bg-gradient-to-r from-blue-500 to-purple-600 text-white py-3 px-6 rounded-lg hover:from-blue-600 hover:to-purple-700 focus:outline-none focus:ring-2 focus:ring-purple-500 focus:ring-opacity-50 transition duration-300 text-lg font-semibold shadow-md">合成并播放</button>
-    </div>
-
-    <audio id="audioPlayer" controls class="w-full mt-6 hidden"></audio>
-</div>
-
-<script>
-    $(document).ready(function() {
-        let globalVoices = []
-        function updateVoices(locale) {
-            $('#voiceSelect').empty();
-            $.get('/voices?d&l=' + locale, function(voices) {
-                globalVoices = voices.voices
-                globalVoices.forEach(function(voice) {
-                    $('#voiceSelect').append($('<option>', {
-                        value: voice.ShortName,
-                        text: voice.LocalName + ' (' + voice.ShortName + ')'
-                    }));
-                });
-                updateStyles($('#voiceSelect').val());
-            });
-        }
-
-        function updateStyles(voice) {
-            const currentVoice = globalVoices.filter(v => v.ShortName === voice)[0]
-            if (currentVoice) {
-                $('#styleSelect').empty()
-                currentVoice?.StyleList?.forEach(function(style) {
-                    $('#styleSelect').append($('<option>', {
-                        value: style,
-                        text: style
-                    }));
-                });
-            }
-        }
-
-        updateVoices($('#localeSelect').val());
-
-        $('#localeSelect').change(function() {
-            updateVoices($(this).val());
-        });
-
-        $('#voiceSelect').change(function() {
-            updateStyles($(this).val());
-        });
-
-        $('#synthesizeButton').click(function() {
-            var text = $('#textInput').val();
-            var voice = $('#voiceSelect').val();
-            var rate = $('#rateInput').val();
-            var pitch = $('#pitchInput').val();
-            var locale = $('#localeSelect').val();
-            var style = $('#styleSelect').val();
-
-            var url = `/tts?t=${encodeURIComponent(text)}&v=${encodeURIComponent(voice)}&r=${rate}&p=${pitch}&l=${locale}&s=${style}`;
-
-            $('#audioPlayer').attr('src', url).removeClass('hidden')[0].play();
-        });
-    });
-</script>
-</body>
-</html>
-
--- a/utils/utils.go
+++ b/utils/utils.go
@@ -1,218 +0,0 @@
-package utils
-
-import (
-	"bytes"
-	"crypto/hmac"
-	"crypto/sha256"
-	"encoding/base64"
-	"encoding/json"
-	"fmt"
-	"github.com/google/uuid"
-	"github.com/sirupsen/logrus"
-	"html"
-	"io"
-	"net/http"
-	"net/url"
-	"strings"
-	"time"
-)
-
-var (
-	log            = logrus.New()
-	client         = &http.Client{}
-	voiceListCache []interface{}
-	cacheDuration  = 1 * time.Hour // 缓存持续时间
-)
-
-func init() {
-	ticker := time.NewTicker(cacheDuration)
-	go func() {
-		for range ticker.C {
-			voiceListCache = nil
-		}
-	}()
-}
-
-const (
-	endpointURL          = "https://dev.microsofttranslator.com/apps/endpoint?api-version=1.0"
-	voicesListURL        = "https://eastus.api.speech.microsoft.com/cognitiveservices/voices/list"
-	userAgent            = "okhttp/4.5.0"
-	clientVersion        = "4.0.530a 5fe1dc6c"
-	userId               = "0f04d16a175c411e"
-	homeGeographicRegion = "zh-Hans-CN"
-	clientTraceId        = "aab069b9-70a7-4844-a734-96cd78d94be9"
-	voiceDecodeKey       = "oik6PdDdMnOXemTbwvMn9de/h9lFnfBaCWbGMMZqqoSaQaqUOqjVGm5NqsmjcBI1x+sS9ugjB55HEJWRiFXYFw=="
-	defaultVoiceName     = "zh-CN-XiaoxiaoMultilingualNeural"
-	defaultRate          = "0"
-	defaultPitch         = "0"
-	defaultOutputFormat  = "audio-24khz-48kbitrate-mono-mp3"
-	defaultStyle         = "general"
-)
-
-// GetEndpoint 获取语音合成服务的端点信息
-func GetEndpoint() (map[string]interface{}, error) {
-	signature := Sign(endpointURL)
-	headers := map[string]string{
-		"Accept-Language":        "zh-Hans",
-		"X-ClientVersion":        clientVersion,
-		"X-UserId":               userId,
-		"X-HomeGeographicRegion": homeGeographicRegion,
-		"X-ClientTraceId":        clientTraceId,
-		"X-MT-Signature":         signature,
-		"User-Agent":             userAgent,
-		"Content-Type":           "application/json; charset=utf-8",
-		"Content-Length":         "0",
-		"Accept-Encoding":        "gzip",
-	}
-	req, err := http.NewRequest("POST", endpointURL, nil)
-	if err != nil {
-		return nil, err
-	}
-
-	for k, v := range headers {
-		req.Header.Set(k, v)
-	}
-
-	resp, err := client.Do(req)
-	if err != nil {
-		log.Error("failed to do request: ", err)
-		return nil, err
-	}
-	defer resp.Body.Close()
-
-	var result map[string]interface{}
-	err = json.NewDecoder(resp.Body).Decode(&result)
-	if err != nil {
-		return nil, err
-	}
-
-	return result, nil
-}
-
-// Sign 生成签名
-func Sign(urlStr string) string {
-	u := strings.Split(urlStr, "://")[1]
-	encodedUrl := url.QueryEscape(u)
-	uuidStr := strings.ReplaceAll(uuid.New().String(), "-", "")
-	formattedDate := strings.ToLower(time.Now().UTC().Format("Mon, 02 Jan 2006 15:04:05")) + "gmt"
-	bytesToSign := fmt.Sprintf("MSTranslatorAndroidApp%s%s%s", encodedUrl, formattedDate, uuidStr)
-	bytesToSign = strings.ToLower(bytesToSign)
-	decode, _ := base64.StdEncoding.DecodeString(voiceDecodeKey)
-	hash := hmac.New(sha256.New, decode)
-	hash.Write([]byte(bytesToSign))
-	secretKey := hash.Sum(nil)
-	signBase64 := base64.StdEncoding.EncodeToString(secretKey)
-	return fmt.Sprintf("MSTranslatorAndroidApp::%s::%s::%s", signBase64, formattedDate, uuidStr)
-}
-
-// GetVoice 获取语音合成结果
-func GetVoice(text, voiceName, rate, pitch, outputFormat, style string) ([]byte, error) {
-	if voiceName == "" {
-		voiceName = defaultVoiceName
-	}
-	if rate == "" {
-		rate = defaultRate
-	}
-	if pitch == "" {
-		pitch = defaultPitch
-	}
-	if outputFormat == "" {
-		outputFormat = defaultOutputFormat
-	}
-
-	if style == "" {
-		style = defaultStyle
-	}
-
-	endpoint, err := GetEndpoint()
-	if err != nil {
-		return nil, err
-	}
-
-	u := fmt.Sprintf("https://%s.tts.speech.microsoft.com/cognitiveservices/v1", endpoint["r"])
-	headers := map[string]string{
-		"Authorization":            endpoint["t"].(string),
-		"Content-Type":             "application/ssml+xml",
-		"X-Microsoft-OutputFormat": outputFormat,
-	}
-
-	ssml := GetSsml(text, voiceName, rate, pitch, style)
-
-	req, err := http.NewRequest("POST", u, bytes.NewBufferString(ssml))
-	if err != nil {
-		return nil, err
-	}
-
-	for k, v := range headers {
-		req.Header.Set(k, v)
-	}
-
-	resp, err := client.Do(req)
-	if err != nil {
-		log.Error("failed to do request: ", err)
-		return nil, err
-	}
-	defer resp.Body.Close()
-
-	return io.ReadAll(resp.Body)
-}
-
-// GetSsml 生成 SSML 格式的文本
-func GetSsml(text, voiceName, rate, pitch, style string) string {
-	// 对文本进行转义
-	text = html.EscapeString(text)
-	return fmt.Sprintf(`
-   <speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" version="1.0" xml:lang="zh-CN">
-     <voice name="%s">
-       <mstts:express-as style="%s" styledegree="1.0" role="default">
-         <prosody rate="%s%%" pitch="%s%%" volume="medium">
-			%s
-		</prosody>
-       </mstts:express-as>
-     </voice>
-   </speak>
- `, voiceName, style, rate, pitch, text)
-}
-
-// VoiceList 获取可用的语音列表
-func VoiceList() ([]interface{}, error) {
-	// 如果缓存中有值，直接返回缓存的结果
-	if voiceListCache != nil {
-		return voiceListCache, nil
-	}
-
-	headers := map[string]string{
-		"User-Agent":     "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.26",
-		"X-Ms-Useragent": "SpeechStudio/2021.05.001",
-		"Content-Type":   "application/json",
-		"Origin":         "https://azure.microsoft.com",
-		"Referer":        "https://azure.microsoft.com",
-	}
-
-	req, err := http.NewRequest("GET", voicesListURL, nil)
-	if err != nil {
-		return nil, err
-	}
-
-	for k, v := range headers {
-		req.Header.Set(k, v)
-	}
-
-	resp, err := client.Do(req)
-	if err != nil {
-		log.Error("failed to do request: ", err)
-		return nil, err
-	}
-	defer resp.Body.Close()
-
-	var result []interface{}
-	err = json.NewDecoder(resp.Body).Decode(&result)
-	if err != nil {
-		return nil, err
-	}
-
-	// 将结果存储到缓存中
-	voiceListCache = result
-
-	return result, nil
-}
--- a/web/static/css/style.css
+++ b/web/static/css/style.css
@@ -0,0 +1,271 @@
+/* 基本样式重置 */
+* {
+    box-sizing: border-box;
+    margin: 0;
+    padding: 0;
+}
+
+body {
+    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
+    line-height: 1.6;
+    color: #333;
+    background-color: #f5f7fa;
+    padding: 20px;
+}
+
+/* 容器 */
+.container {
+    max-width: 1000px;
+    margin: 0 auto;
+}
+
+/* 页眉 */
+header {
+    text-align: center;
+    margin-bottom: 30px;
+    padding: 20px;
+}
+
+header h1 {
+    font-size: 2.5rem;
+    margin-bottom: 10px;
+    color: #2c3e50;
+}
+
+header p {
+    font-size: 1.2rem;
+    color: #7f8c8d;
+    margin-bottom: 20px;
+}
+
+/* 导航 */
+nav {
+    display: flex;
+    justify-content: center;
+    margin-top: 20px;
+}
+
+nav a {
+    text-decoration: none;
+    color: #3498db;
+    margin: 0 15px;
+    padding: 5px 10px;
+    border-radius: 5px;
+    transition: all 0.3s ease;
+}
+
+nav a:hover {
+    background-color: #3498db;
+    color: #fff;
+}
+
+nav a.active {
+    background-color: #3498db;
+    color: #fff;
+}
+
+/* 卡片 */
+.card {
+    background-color: #fff;
+    border-radius: 10px;
+    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+    padding: 25px;
+    margin-bottom: 25px;
+}
+
+/* 标题 */
+h2 {
+    color: #2c3e50;
+    margin-bottom: 20px;
+    border-bottom: 1px solid #ecf0f1;
+    padding-bottom: 10px;
+}
+
+h3 {
+    color: #3498db;
+    margin: 20px 0 10px;
+}
+
+/* 输入区域 */
+.input-group {
+    position: relative;
+    margin-bottom: 20px;
+}
+
+textarea {
+    width: 100%;
+    padding: 15px;
+    border: 1px solid #ddd;
+    border-radius: 5px;
+    resize: none;
+    font-size: 1rem;
+    font-family: inherit;
+}
+
+textarea:focus {
+    outline: none;
+    border-color: #3498db;
+    box-shadow: 0 0 0 2px rgba(52, 152, 219, 0.2);
+}
+
+.char-counter {
+    position: absolute;
+    bottom: 10px;
+    right: 10px;
+    font-size: 0.8rem;
+    color: #7f8c8d;
+}
+
+/* 设置区域 */
+.settings {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
+    gap: 20px;
+    margin-bottom: 20px;
+}
+
+.setting-group {
+    display: flex;
+    flex-direction: column;
+}
+
+label {
+    margin-bottom: 5px;
+    font-weight: bold;
+    color: #2c3e50;
+}
+
+select, input[type="range"] {
+    padding: 8px;
+    border: 1px solid #ddd;
+    border-radius: 5px;
+    background-color: #fff;
+}
+
+select:focus {
+    outline: none;
+    border-color: #3498db;
+}
+
+/* 按钮 */
+.actions {
+    display: flex;
+    justify-content: center;
+    margin-top: 20px;
+}
+
+button {
+    padding: 10px 20px;
+    border: none;
+    border-radius: 5px;
+    cursor: pointer;
+    font-size: 1rem;
+    transition: all 0.3s ease;
+}
+
+.primary-button {
+    background-color: #3498db;
+    color: #fff;
+}
+
+.primary-button:hover {
+    background-color: #2980b9;
+}
+
+.secondary-button {
+    background-color: #ecf0f1;
+    color: #2c3e50;
+    margin: 0 5px;
+}
+
+.secondary-button:hover {
+    background-color: #bdc3c7;
+}
+
+/* 音频播放器 */
+.audio-player {
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+}
+
+audio {
+    width: 100%;
+    margin-bottom: 15px;
+}
+
+.audio-controls {
+    display: flex;
+    justify-content: center;
+}
+
+/* 表格 */
+table {
+    width: 100%;
+    border-collapse: collapse;
+    margin: 20px 0;
+}
+
+th, td {
+    padding: 12px 15px;
+    text-align: left;
+    border-bottom: 1px solid #ddd;
+}
+
+th {
+    background-color: #f8f9fa;
+    font-weight: bold;
+}
+
+/* 代码 */
+code, pre {
+    font-family: SFMono-Regular, Menlo, Monaco, Consolas, monospace;
+    background-color: #f8f9fa;
+    border-radius: 3px;
+    padding: 2px 5px;
+    font-size: 0.9rem;
+}
+
+pre {
+    padding: 15px;
+    overflow-x: auto;
+    margin: 15px 0;
+}
+
+pre code {
+    padding: 0;
+    background-color: transparent;
+}
+
+/* 页脚 */
+footer {
+    text-align: center;
+    margin-top: 40px;
+    padding: 20px;
+    color: #7f8c8d;
+    font-size: 0.9rem;
+}
+
+footer a {
+    color: #3498db;
+    text-decoration: none;
+}
+
+footer a:hover {
+    text-decoration: underline;
+}
+
+/* 响应式调整 */
+@media (max-width: 768px) {
+    .settings {
+        grid-template-columns: 1fr;
+    }
+    
+    header h1 {
+        font-size: 2rem;
+    }
+    
+    .card {
+        padding: 15px;
+    }
+}
--- a/web/static/js/app.js
+++ b/web/static/js/app.js
@@ -0,0 +1,176 @@
+document.addEventListener('DOMContentLoaded', function() {
+    // 获取DOM元素
+    const textInput = document.getElementById('text');
+    const voiceSelect = document.getElementById('voice');
+    const rateInput = document.getElementById('rate');
+    const rateValue = document.getElementById('rateValue');
+    const pitchInput = document.getElementById('pitch');
+    const pitchValue = document.getElementById('pitchValue');
+    const speakButton = document.getElementById('speak');
+    const downloadButton = document.getElementById('download');
+    const copyLinkButton = document.getElementById('copyLink');
+    const audioPlayer = document.getElementById('audioPlayer');
+    const resultSection = document.getElementById('resultSection');
+    const charCount = document.getElementById('charCount');
+    
+    // 保存最后一个音频URL
+    let lastAudioUrl = '';
+    
+    // 初始化
+    initVoicesList();
+    initEventListeners();
+    
+    // 更新字符计数
+    textInput.addEventListener('input', function() {
+        charCount.textContent = this.value.length;
+    });
+    
+    // 更新语速值显示
+    rateInput.addEventListener('input', function() {
+        const value = this.value;
+        rateValue.textContent = value + '%';
+    });
+    
+    // 更新语调值显示
+    pitchInput.addEventListener('input', function() {
+        const value = this.value;
+        pitchValue.textContent = value + '%';
+    });
+    
+    // 获取可用语音列表
+    async function initVoicesList() {
+        try {
+            const response = await fetch(`${config.basePath}/voices`);
+            if (!response.ok) throw new Error('获取语音列表失败');
+            
+            const voices = await response.json();
+            
+            // 清空并重建选项
+            voiceSelect.innerHTML = '';
+            
+            // 按语言和名称分组
+            const voicesByLocale = {};
+            
+            voices.forEach(voice => {
+                if (!voicesByLocale[voice.locale]) {
+                    voicesByLocale[voice.locale] = [];
+                }
+                voicesByLocale[voice.locale].push(voice);
+            });
+            
+            // 创建选项组
+            for (const locale in voicesByLocale) {
+                const optgroup = document.createElement('optgroup');
+                optgroup.label = voicesByLocale[locale][0].locale_name;
+                
+                voicesByLocale[locale].forEach(voice => {
+                    const option = document.createElement('option');
+                    option.value = voice.short_name;
+                    option.textContent = `${voice.local_name || voice.display_name} (${voice.gender})`;
+                    
+                    // 如果是默认语音则选中
+                    if (voice.short_name === config.defaultVoice) {
+                        option.selected = true;
+                    }
+                    
+                    optgroup.appendChild(option);
+                });
+                
+                voiceSelect.appendChild(optgroup);
+            }
+        } catch (error) {
+            console.error('获取语音列表失败:', error);
+            voiceSelect.innerHTML = '<option value="">无法加载语音列表</option>';
+        }
+    }
+    
+    // 初始化事件监听器
+    function initEventListeners() {
+        // 转换按钮点击事件
+        speakButton.addEventListener('click', generateSpeech);
+        
+        // 下载按钮点击事件
+        downloadButton.addEventListener('click', function() {
+            if (lastAudioUrl) {
+                const a = document.createElement('a');
+                a.href = lastAudioUrl;
+                a.download = 'speech.mp3';
+                document.body.appendChild(a);
+                a.click();
+                document.body.removeChild(a);
+            }
+        });
+        
+        // 复制链接按钮点击事件
+        copyLinkButton.addEventListener('click', function() {
+            if (lastAudioUrl) {
+                navigator.clipboard.writeText(lastAudioUrl).then(() => {
+                    alert('链接已复制到剪贴板');
+                }).catch(err => {
+                    console.error('复制失败:', err);
+                    // 兼容处理
+                    const textArea = document.createElement('textarea');
+                    textArea.value = lastAudioUrl;
+                    document.body.appendChild(textArea);
+                    textArea.focus();
+                    textArea.select();
+                    
+                    try {
+                        document.execCommand('copy');
+                        alert('链接已复制到剪贴板');
+                    } catch (err) {
+                        console.error('复制失败:', err);
+                    }
+                    
+                    document.body.removeChild(textArea);
+                });
+            }
+        });
+    }
+    
+    // 生成语音
+    async function generateSpeech() {
+        const text = textInput.value.trim();
+        if (!text) {
+            alert('请输入要转换的文本');
+            return;
+        }
+        
+        const voice = voiceSelect.value;
+        const rate = rateInput.value;
+        const pitch = pitchInput.value;
+        
+        // 禁用按钮，显示加载状态
+        speakButton.disabled = true;
+        speakButton.textContent = '生成中...';
+        
+        try {
+            // 构建URL参数
+            const params = new URLSearchParams({
+                t: text,
+                v: voice,
+                r: rate,
+                p: pitch
+            });
+            
+            const url = `${config.basePath}/tts?${params.toString()}`;
+            
+            // 更新音频播放器
+            audioPlayer.src = url;
+            lastAudioUrl = url;
+            
+            // 显示结果区域
+            resultSection.style.display = 'block';
+            
+            // 播放音频
+            audioPlayer.play();
+        } catch (error) {
+            console.error('生成语音失败:', error);
+            alert('生成语音失败，请重试');
+        } finally {
+            // 恢复按钮状态
+            speakButton.disabled = false;
+            speakButton.textContent = '转换为语音';
+        }
+    }
+});
--- a/web/templates/api-doc.html
+++ b/web/templates/api-doc.html
@@ -0,0 +1,310 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>API文档 - TTS服务</title>
+    <link rel="stylesheet" href="{{.BasePath}}/static/css/style.css">
+    <meta name="description" content="TTS服务API文档">
+</head>
+<body>
+    <div class="container">
+        <header>
+            <h1>TTS服务 API文档</h1>
+            <p>快速、高质量的文本转语音API服务</p>
+            <nav>
+                <a href="{{.BasePath}}/">主页</a>
+                <a href="{{.BasePath}}/api-doc" class="active">API文档</a>
+            </nav>
+        </header>
+
+        <main>
+            <section class="card">
+                <h2>API概述</h2>
+                <p>TTS服务API提供了简单而强大的方式将文本转换为自然语音。我们支持多种语言和声音，并允许您调节语速、语调以适应不同场景需求。</p>
+                <p>基础URL: <code>{{.BasePath}}</code></p>
+                <p>所有API请求均使用HTTP协议，返回标准HTTP状态码表示请求结果。</p>
+            </section>
+
+            <section class="card">
+                <h2>文本转语音 API</h2>
+                <h3>端点</h3>
+                <code>GET {{.BasePath}}/tts</code>
+
+                <h3>参数</h3>
+                <table>
+                    <thead>
+                        <tr>
+                            <th>参数</th>
+                            <th>类型</th>
+                            <th>必选</th>
+                            <th>描述</th>
+                        </tr>
+                    </thead>
+                    <tbody>
+                        <tr>
+                            <td><code>t</code></td>
+                            <td>string</td>
+                            <td>是</td>
+                            <td>要转换的文本（需要进行URL编码）</td>
+                        </tr>
+                        <tr>
+                            <td><code>v</code></td>
+                            <td>string</td>
+                            <td>否</td>
+                            <td>语音名称，使用short_name格式，默认: {{.DefaultVoice}}。可通过/voices接口获取所有可用语音</td>
+                        </tr>
+                        <tr>
+                            <td><code>r</code></td>
+                            <td>string</td>
+                            <td>否</td>
+                            <td>语速调整，范围: -100%到100%，默认: {{.DefaultRate}}。正值加快语速，负值减慢语速</td>
+                        </tr>
+                        <tr>
+                            <td><code>p</code></td>
+                            <td>string</td>
+                            <td>否</td>
+                            <td>语调调整，范围: -100%到100%，默认: {{.DefaultPitch}}。正值提高语调，负值降低语调</td>
+                        </tr>
+                        <tr>
+                            <td><code>o</code></td>
+                            <td>string</td>
+                            <td>否</td>
+                            <td>输出音频格式，默认: {{.DefaultFormat}}。详见下方支持的格式列表</td>
+                        </tr>
+                        <tr>
+                            <td><code>s</code></td>
+                            <td>string</td>
+                            <td>否</td>
+                            <td>情感风格，可用值取决于所选语音的style_list属性。例如："cheerful"、"sad"等</td>
+                        </tr>
+                    </tbody>
+                </table>
+
+                <h3>示例请求</h3>
+                <pre><code>curl "{{.BasePath}}/tts?t=%E4%BD%A0%E5%A5%BD%EF%BC%8C%E4%B8%96%E7%95%8C&v=zh-CN-XiaoxiaoNeural&r=0%25&p=0%25"</code></pre>
+
+                <h3>另一个示例（带情感风格）</h3>
+                <pre><code>curl "{{.BasePath}}/tts?t=%E4%BB%8A%E5%A4%A9%E5%A4%A9%E6%B0%94%E7%9C%9F%E5%A5%BD&v=zh-CN-XiaoxiaoNeural&s=cheerful"</code></pre>
+
+                <h3>响应</h3>
+                <p>返回音频文件，内容类型取决于请求的输出格式。正常响应状态码为200。</p>
+
+                <h3>错误响应</h3>
+                <p>如果请求参数有误或服务出现问题，将返回对应的HTTP错误码和错误消息。</p>
+                <table>
+                    <thead>
+                        <tr>
+                            <th>状态码</th>
+                            <th>描述</th>
+                        </tr>
+                    </thead>
+                    <tbody>
+                        <tr>
+                            <td>400</td>
+                            <td>参数错误或缺失必要参数</td>
+                        </tr>
+                        <tr>
+                            <td>404</td>
+                            <td>请求的资源不存在</td>
+                        </tr>
+                        <tr>
+                            <td>500</td>
+                            <td>服务器内部错误</td>
+                        </tr>
+                    </tbody>
+                </table>
+            </section>
+
+            <section class="card">
+                <h2>获取可用语音 API</h2>
+                <h3>端点</h3>
+                <code>GET {{.BasePath}}/voices</code>
+
+                <h3>参数</h3>
+                <table>
+                    <thead>
+                        <tr>
+                            <th>参数</th>
+                            <th>类型</th>
+                            <th>必选</th>
+                            <th>描述</th>
+                        </tr>
+                    </thead>
+                    <tbody>
+                        <tr>
+                            <td><code>locale</code></td>
+                            <td>string</td>
+                            <td>否</td>
+                            <td>筛选特定语言的语音，例如：zh-CN（中文）、en-US（英文）</td>
+                        </tr>
+                        <tr>
+                            <td><code>gender</code></td>
+                            <td>string</td>
+                            <td>否</td>
+                            <td>筛选特定性别的语音，可选值：Male（男性）、Female（女性）</td>
+                        </tr>
+                    </tbody>
+                </table>
+
+                <h3>示例请求</h3>
+                <pre><code>curl "{{.BasePath}}/voices?locale=zh-CN&gender=Female"</code></pre>
+
+                <h3>响应</h3>
+                <p>返回JSON格式的可用语音列表：</p>
+                <pre><code>[
+  {
+    "name": "Microsoft Server Speech Text to Speech Voice (zh-CN, XiaoxiaoNeural)",
+    "display_name": "Xiaoxiao",
+    "local_name": "晓晓",
+    "short_name": "zh-CN-XiaoxiaoNeural",
+    "gender": "Female",
+    "locale": "zh-CN",
+    "locale_name": "中文(中国)",
+    "style_list": ["cheerful", "sad", "angry", "fearful", "disgruntled"]
+  },
+  ...
+]</code></pre>
+                <p>响应字段说明：</p>
+                <ul>
+                    <li><strong>name</strong>：语音的完整名称</li>
+                    <li><strong>display_name</strong>：显示用名称（拉丁字符）</li>
+                    <li><strong>local_name</strong>：本地化名称</li>
+                    <li><strong>short_name</strong>：简短名称（用于API调用的v参数）</li>
+                    <li><strong>gender</strong>：性别（Male或Female）</li>
+                    <li><strong>locale</strong>：语言代码</li>
+                    <li><strong>locale_name</strong>：语言本地化名称</li>
+                    <li><strong>style_list</strong>：支持的情感风格列表（如有）</li>
+                </ul>
+            </section>
+
+            <section class="card">
+                <h2>兼容OpenAI接口 API</h2>
+                <h3>语音合成</h3>
+                <code>POST {{.BasePath}}/v1/audio/speech</code>
+
+                <h3>请求体 (JSON)</h3>
+                <table>
+                    <thead>
+                        <tr>
+                            <th>参数</th>
+                            <th>类型</th>
+                            <th>必选</th>
+                            <th>描述</th>
+                        </tr>
+                    </thead>
+                    <tbody>
+                        <tr>
+                            <td><code>model</code></td>
+                            <td>string</td>
+                            <td>是</td>
+                            <td>当前仅支持值: "tts-1"</td>
+                        </tr>
+                        <tr>
+                            <td><code>input</code></td>
+                            <td>string</td>
+                            <td>是</td>
+                            <td>要转换的文本内容</td>
+                        </tr>
+                        <tr>
+                            <td><code>voice</code></td>
+                            <td>string</td>
+                            <td>是</td>
+                            <td>声音名称，使用Microsoft语音格式，例如：ja-JP-KeitaNeural、zh-CN-XiaoxiaoNeural</td>
+                        </tr>
+                        <tr>
+                            <td><code>speed</code></td>
+                            <td>number</td>
+                            <td>否</td>
+                            <td>语速调整，范围: 0.5到2.0，默认: 1.0</td>
+                        </tr>
+                    </tbody>
+                </table>
+
+                <h3>示例请求</h3>
+                <pre><code>curl -X POST "{{.BasePath}}/v1/audio/speech" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "tts-1",
+    "input": "你好，世界！",
+    "voice": "zh-CN-XiaoxiaoNeural"
+  }'</code></pre>
+
+                <h3>另一个示例（带速度调整）</h3>
+                <pre><code>curl -X POST "{{.BasePath}}/v1/audio/speech" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "tts-1",
+    "input": "こんにちは、世界！",
+    "voice": "ja-JP-NanamiNeural",
+    "speed": 1.2
+  }'</code></pre>
+
+                <h3>响应</h3>
+                <p>返回音频文件，内容类型取决于请求的输出格式。正常响应状态码为200。</p>
+
+                <h3>错误响应</h3>
+                <p>如果请求有误，将返回JSON格式的错误信息：</p>
+                <pre><code>{
+  "error": {
+    "message": "错误信息描述",
+    "type": "错误类型",
+    "code": "错误代码"
+  }
+}</code></pre>
+            </section>
+
+            <section class="card">
+                <h2>支持的输出格式</h2>
+                <table>
+                    <thead>
+                        <tr>
+                            <th>格式名称</th>
+                            <th>描述</th>
+                        </tr>
+                    </thead>
+                    <tbody>
+                        <tr>
+                            <td><code>audio-16khz-32kbitrate-mono-mp3</code></td>
+                            <td>MP3格式，16kHz, 32kbps</td>
+                        </tr>
+                        <tr>
+                            <td><code>audio-16khz-64kbitrate-mono-mp3</code></td>
+                            <td>MP3格式，16kHz, 64kbps</td>
+                        </tr>
+                        <tr>
+                            <td><code>audio-16khz-128kbitrate-mono-mp3</code></td>
+                            <td>MP3格式，16kHz, 128kbps</td>
+                        </tr>
+                        <tr>
+                            <td><code>audio-24khz-48kbitrate-mono-mp3</code></td>
+                            <td>MP3格式，24kHz, 48kbps</td>
+                        </tr>
+                        <tr>
+                            <td><code>audio-24khz-96kbitrate-mono-mp3</code></td>
+                            <td>MP3格式，24kHz, 96kbps</td>
+                        </tr>
+                        <tr>
+                            <td><code>audio-24khz-160kbitrate-mono-mp3</code></td>
+                            <td>MP3格式，24kHz, 160kbps</td>
+                        </tr>
+                        <tr>
+                            <td><code>riff-16khz-16bit-mono-pcm</code></td>
+                            <td>WAV格式，16kHz</td>
+                        </tr>
+                        <tr>
+                            <td><code>riff-24khz-16bit-mono-pcm</code></td>
+                            <td>WAV格式，24kHz</td>
+                        </tr>
+                    </tbody>
+                </table>
+            </section>
+        </main>
+
+        <footer>
+            <p>© 2025 TTS服务 | <a href="{{.BasePath}}/">返回主页</a></p>
+        </footer>
+
+    </div>
+</body>
+</html>
--- a/web/templates/index.html
+++ b/web/templates/index.html
@@ -0,0 +1,83 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>文本转语音 - TTS服务</title>
+    <link rel="stylesheet" href="{{.BasePath}}/static/css/style.css">
+    <meta name="description" content="基于Microsoft Azure语音服务的在线文本转语音工具">
+</head>
+<body>
+    <div class="container">
+        <header>
+            <h1>文本转语音 (TTS)</h1>
+            <p>将文本转换为自然流畅的语音</p>
+            <nav>
+                <a href="{{.BasePath}}/" class="active">主页</a>
+                <a href="{{.BasePath}}/api-doc">API文档</a>
+            </nav>
+        </header>
+
+        <main>
+            <section class="card">
+                <h2>输入文本</h2>
+                <div class="input-group">
+                    <textarea id="text" placeholder="输入要转换的文本..." rows="6" maxlength="5000"></textarea>
+                    <div class="char-counter"><span id="charCount">0</span>/5000</div>
+                </div>
+
+                <div class="settings">
+                    <div class="setting-group">
+                        <label for="voice">语音:</label>
+                        <select id="voice">
+                            <option value="loading">加载中...</option>
+                        </select>
+                    </div>
+
+                    <div class="setting-group">
+                        <label for="rate">语速:</label>
+                        <input type="range" id="rate" min="-50" max="50" value="0">
+                        <span id="rateValue">0%</span>
+                    </div>
+
+                    <div class="setting-group">
+                        <label for="pitch">语调:</label>
+                        <input type="range" id="pitch" min="-50" max="50" value="0">
+                        <span id="pitchValue">0%</span>
+                    </div>
+                </div>
+
+                <div class="actions">
+                    <button id="speak" class="primary-button">转换为语音</button>
+                </div>
+            </section>
+
+            <section class="card" id="resultSection" style="display:none;">
+                <h2>语音输出</h2>
+                <div class="audio-player">
+                    <audio id="audioPlayer" controls></audio>
+                    <div class="audio-controls">
+                        <button id="download" class="secondary-button">下载音频</button>
+                        <button id="copyLink" class="secondary-button">复制链接</button>
+                    </div>
+                </div>
+            </section>
+        </main>
+
+        <footer>
+            <p>© 2025 TTS服务 | <a href="{{.BasePath}}/api-doc">API文档</a></p>
+        </footer>
+    </div>
+
+    <script>
+        // 存储一些全局配置
+        const config = {
+            basePath: "{{.BasePath}}",
+            defaultVoice: "{{.DefaultVoice}}",
+            defaultRate: "{{.DefaultRate}}",
+            defaultPitch: "{{.DefaultPitch}}"
+        };
+    </script>
+    <script src="{{.BasePath}}/static/js/app.js"></script>
+</body>
+</html>
--- a/web/templates/worker.js
+++ b/web/templates/worker.js