From 1cd2ac1624996a33cc6ed07e874fb59d0c465ef2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=8E=8B=E9=94=A6=E5=BC=BA?= <1061669148@qq.com>
Date: Sun, 9 Mar 2025 17:21:52 +0800
Subject: [PATCH] feat: add style selection for TTS and update related
 functionality

---
 internal/http/handlers/tts.go    |  2 ++
 internal/models/tts.go           |  1 +
 internal/tts/microsoft/client.go |  9 ++++--
 web/static/js/app.js             | 51 ++++++++++++++++++++++++++++++--
 web/templates/index.html         | 14 +++++++--
 5 files changed, 70 insertions(+), 7 deletions(-)
diff --git a/internal/http/handlers/tts.go b/internal/http/handlers/tts.go
index 8cf45b9..c51dcf9 100644
--- a/internal/http/handlers/tts.go
+++ b/internal/http/handlers/tts.go
@@ -153,6 +153,7 @@ func (h *TTSHandler) HandleTTS(w http.ResponseWriter, r *http.Request) {
 			Voice: q.Get("v"),
 			Rate:  q.Get("r"),
 			Pitch: q.Get("p"),
+			Style: q.Get("s"),
 		}
 	case http.MethodPost:
 		// 从POST JSON体获取
@@ -174,6 +175,7 @@ func (h *TTSHandler) HandleTTS(w http.ResponseWriter, r *http.Request) {
 				Voice: r.FormValue("voice"),
 				Rate:  r.FormValue("rate"),
 				Pitch: r.FormValue("pitch"),
+				Style: r.FormValue("style"),
 			}
 		}
 	default:
diff --git a/internal/models/tts.go b/internal/models/tts.go
index 67ee7a5..622e897 100644
--- a/internal/models/tts.go
+++ b/internal/models/tts.go
@@ -6,6 +6,7 @@ type TTSRequest struct {
 	Voice string `json:"voice"` // 语音ID
 	Rate  string `json:"rate"`  // 语速 (-100% 到 +100%)
 	Pitch string `json:"pitch"` // 语调 (-100% 到 +100%)
+	Style string `json:"style"` // 说话风格
 }
 
 // TTSResponse 表示一个语音合成响应
diff --git a/internal/tts/microsoft/client.go b/internal/tts/microsoft/client.go
index 8e869f7..c41fb2c 100644
--- a/internal/tts/microsoft/client.go
+++ b/internal/tts/microsoft/client.go
@@ -25,7 +25,7 @@ const (
 	ttsEndpoint    = "https://%s.tts.speech.microsoft.com/cognitiveservices/v1"
 	ssmlTemplate   = `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xmlns:mstts="http://www.w3.org/2001/mstts" xml:lang='%s'>
     <voice name='%s'>
-        <mstts:express-as style="general" styledegree="1.0" role="default">
+        <mstts:express-as style="%s" styledegree="1.0" role="default">
             <prosody rate='%s%%' pitch='%s%%' volume="medium">
                 %s
             </prosody>
@@ -227,6 +227,11 @@ func (c *Client) createTTSRequest(ctx context.Context, req models.TTSRequest) (*
 		voice = c.defaultVoice
 	}
 
+	style := req.Style
+	if req.Style == "" {
+		style = "general"
+	}
+
 	rate := req.Rate
 	if rate == "" {
 		rate = c.defaultRate
@@ -249,7 +254,7 @@ func (c *Client) createTTSRequest(ctx context.Context, req models.TTSRequest) (*
 	escapedText := html.EscapeString(req.Text)
 
 	// 准备SSML内容
-	ssml := fmt.Sprintf(ssmlTemplate, locale, voice, rate, pitch, escapedText)
+	ssml := fmt.Sprintf(ssmlTemplate, locale, voice, style, rate, pitch, escapedText)
 
 	// 获取端点信息
 	endpoint, err := c.getEndpoint(ctx)
diff --git a/web/static/js/app.js b/web/static/js/app.js
index d84c5b4..835ba7b 100644
--- a/web/static/js/app.js
+++ b/web/static/js/app.js
@@ -2,6 +2,7 @@ document.addEventListener('DOMContentLoaded', function() {
     // 获取DOM元素
     const textInput = document.getElementById('text');
     const voiceSelect = document.getElementById('voice');
+    const styleSelect = document.getElementById('style');
     const rateInput = document.getElementById('rate');
     const rateValue = document.getElementById('rateValue');
     const pitchInput = document.getElementById('pitch');
@@ -15,6 +16,8 @@ document.addEventListener('DOMContentLoaded', function() {
     
     // 保存最后一个音频URL
     let lastAudioUrl = '';
+    // 存储语音数据
+    let voicesData = [];
     
     // 初始化
     initVoicesList();
@@ -37,13 +40,18 @@ document.addEventListener('DOMContentLoaded', function() {
         pitchValue.textContent = value + '%';
     });
     
+    // 语音选择变化时更新可用风格
+    voiceSelect.addEventListener('change', function() {
+        updateStyleOptions();
+    });
+
     // 获取可用语音列表
     async function initVoicesList() {
         try {
             const response = await fetch(`${config.basePath}/voices`);
             if (!response.ok) throw new Error('获取语音列表失败');
             
-            const voices = await response.json();
+            voicesData = await response.json();
             
             // 清空并重建选项
             voiceSelect.innerHTML = '';
@@ -51,7 +59,7 @@ document.addEventListener('DOMContentLoaded', function() {
             // 按语言和名称分组
             const voicesByLocale = {};
             
-            voices.forEach(voice => {
+            voicesData.forEach(voice => {
                 if (!voicesByLocale[voice.locale]) {
                     voicesByLocale[voice.locale] = [];
                 }
@@ -78,12 +86,49 @@ document.addEventListener('DOMContentLoaded', function() {
                 
                 voiceSelect.appendChild(optgroup);
             }
+
+            // 初始化风格列表
+            updateStyleOptions();
         } catch (error) {
             console.error('获取语音列表失败:', error);
             voiceSelect.innerHTML = '<option value="">无法加载语音列表</option>';
         }
     }
     
+    // 更新风格选项
+    function updateStyleOptions() {
+        // 清空风格选择
+        styleSelect.innerHTML = '';
+
+        // 获取当前选中的语音
+        const selectedVoice = voiceSelect.value;
+        const voiceData = voicesData.find(v => v.short_name === selectedVoice);
+
+        if (!voiceData || !voiceData.style_list || voiceData.style_list.length === 0) {
+            // 如果没有可用风格，添加默认选项
+            const option = document.createElement('option');
+            option.value = "general";
+            option.textContent = "普通";
+            styleSelect.appendChild(option);
+            return;
+        }
+
+        // 添加可用风格选项
+        voiceData.style_list.forEach(style => {
+            const option = document.createElement('option');
+            option.value = style
+            option.textContent = style
+
+            // 如果是默认风格则选中
+            if (style === config.defaultStyle ||
+                (!config.defaultStyle && style === "general")) {
+                option.selected = true;
+            }
+
+            styleSelect.appendChild(option);
+        });
+    }
+
     // 初始化事件监听器
     function initEventListeners() {
         // 转换按钮点击事件
@@ -137,6 +182,7 @@ document.addEventListener('DOMContentLoaded', function() {
         }
         
         const voice = voiceSelect.value;
+        const style = styleSelect.value;
         const rate = rateInput.value;
         const pitch = pitchInput.value;
         
@@ -149,6 +195,7 @@ document.addEventListener('DOMContentLoaded', function() {
             const params = new URLSearchParams({
                 t: text,
                 v: voice,
+                s: style,
                 r: rate,
                 p: pitch
             });
diff --git a/web/templates/index.html b/web/templates/index.html
index 6bb5bb4..a71e2c3 100644
--- a/web/templates/index.html
+++ b/web/templates/index.html
@@ -34,15 +34,22 @@
                         </select>
                     </div>
 
+                    <div class="setting-group">
+                        <label for="style">风格:</label>
+                        <select id="style">
+                            <option value="loading">加载中...</option>
+                        </select>
+                    </div>
+
                     <div class="setting-group">
                         <label for="rate">语速:</label>
-                        <input type="range" id="rate" min="-50" max="50" value="0">
+                        <input type="range" id="rate" min="-100" max="100" value="0">
                         <span id="rateValue">0%</span>
                     </div>
 
                     <div class="setting-group">
                         <label for="pitch">语调:</label>
-                        <input type="range" id="pitch" min="-50" max="50" value="0">
+                        <input type="range" id="pitch" min="-100" max="100" value="0">
                         <span id="pitchValue">0%</span>
                     </div>
                 </div>
@@ -75,7 +82,8 @@
             basePath: "{{.BasePath}}",
             defaultVoice: "{{.DefaultVoice}}",
             defaultRate: "{{.DefaultRate}}",
-            defaultPitch: "{{.DefaultPitch}}"
+            defaultPitch: "{{.DefaultPitch}}",
+            defaultStyle: "{{.DefaultStyle}}"
         };
     </script>
     <script src="{{.BasePath}}/static/js/app.js"></script>