feat: add style selection for TTS and update related functionality

2025-03-09 17:21:52 +08:00
parent 4ec09304f6
commit 1cd2ac1624
5 changed files with 70 additions and 7 deletions
--- a/internal/http/handlers/tts.go
+++ b/internal/http/handlers/tts.go
@@ -153,6 +153,7 @@ func (h *TTSHandler) HandleTTS(w http.ResponseWriter, r *http.Request) {
 			Voice: q.Get("v"),
 			Rate:  q.Get("r"),
 			Pitch: q.Get("p"),
 			Style: q.Get("s"),
 		}
 	case http.MethodPost:
 		// 从POST JSON体获取
@@ -174,6 +175,7 @@ func (h *TTSHandler) HandleTTS(w http.ResponseWriter, r *http.Request) {
 				Voice: r.FormValue("voice"),
 				Rate:  r.FormValue("rate"),
 				Pitch: r.FormValue("pitch"),
 				Style: r.FormValue("style"),
 			}
 		}
 	default:
--- a/internal/models/tts.go
+++ b/internal/models/tts.go
@@ -6,6 +6,7 @@ type TTSRequest struct {
 	Voice string `json:"voice"` // 语音ID
 	Rate  string `json:"rate"`  // 语速 (-100% 到 +100%)
 	Pitch string `json:"pitch"` // 语调 (-100% 到 +100%)
 	Style string `json:"style"` // 说话风格
 }
 // TTSResponse 表示一个语音合成响应
--- a/internal/tts/microsoft/client.go
+++ b/internal/tts/microsoft/client.go
@@ -25,7 +25,7 @@ const (
 	ttsEndpoint    = "https://%s.tts.speech.microsoft.com/cognitiveservices/v1"
 	ssmlTemplate   = `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xmlns:mstts="http://www.w3.org/2001/mstts" xml:lang='%s'>
    <voice name='%s'>
-        <mstts:express-as style="general" styledegree="1.0" role="default">
+        <mstts:express-as style="%s" styledegree="1.0" role="default">
            <prosody rate='%s%%' pitch='%s%%' volume="medium">
                %s
            </prosody>
@@ -227,6 +227,11 @@ func (c *Client) createTTSRequest(ctx context.Context, req models.TTSRequest) (*
 		voice = c.defaultVoice
 	}
 	style := req.Style
 	if req.Style == "" {
 		style = "general"
 	}
 	rate := req.Rate
 	if rate == "" {
 		rate = c.defaultRate
@@ -249,7 +254,7 @@ func (c *Client) createTTSRequest(ctx context.Context, req models.TTSRequest) (*
 	escapedText := html.EscapeString(req.Text)
 	// 准备SSML内容
-	ssml := fmt.Sprintf(ssmlTemplate, locale, voice, rate, pitch, escapedText)
+	ssml := fmt.Sprintf(ssmlTemplate, locale, voice, style, rate, pitch, escapedText)
 	// 获取端点信息
 	endpoint, err := c.getEndpoint(ctx)
--- a/web/static/js/app.js
+++ b/web/static/js/app.js
@@ -2,6 +2,7 @@ document.addEventListener('DOMContentLoaded', function() {
    // 获取DOM元素
    const textInput = document.getElementById('text');
    const voiceSelect = document.getElementById('voice');
    const styleSelect = document.getElementById('style');
    const rateInput = document.getElementById('rate');
    const rateValue = document.getElementById('rateValue');
    const pitchInput = document.getElementById('pitch');
@@ -15,6 +16,8 @@ document.addEventListener('DOMContentLoaded', function() {
    // 保存最后一个音频URL
    let lastAudioUrl = '';
    // 存储语音数据
    let voicesData = [];
    // 初始化
    initVoicesList();
@@ -37,13 +40,18 @@ document.addEventListener('DOMContentLoaded', function() {
        pitchValue.textContent = value + '%';
    });
    // 语音选择变化时更新可用风格
    voiceSelect.addEventListener('change', function() {
        updateStyleOptions();
    });
    // 获取可用语音列表
    async function initVoicesList() {
        try {
            const response = await fetch(`${config.basePath}/voices`);
            if (!response.ok) throw new Error('获取语音列表失败');
-            const voices = await response.json();
+            voicesData = await response.json();
            // 清空并重建选项
            voiceSelect.innerHTML = '';
@@ -51,7 +59,7 @@ document.addEventListener('DOMContentLoaded', function() {
            // 按语言和名称分组
            const voicesByLocale = {};
-            voices.forEach(voice => {
+            voicesData.forEach(voice => {
                if (!voicesByLocale[voice.locale]) {
                    voicesByLocale[voice.locale] = [];
                }
@@ -78,12 +86,49 @@ document.addEventListener('DOMContentLoaded', function() {
                voiceSelect.appendChild(optgroup);
            }
            // 初始化风格列表
            updateStyleOptions();
        } catch (error) {
            console.error('获取语音列表失败:', error);
            voiceSelect.innerHTML = '<option value="">无法加载语音列表</option>';
        }
    }
    // 更新风格选项
    function updateStyleOptions() {
        // 清空风格选择
        styleSelect.innerHTML = '';
        // 获取当前选中的语音
        const selectedVoice = voiceSelect.value;
        const voiceData = voicesData.find(v => v.short_name === selectedVoice);
        if (!voiceData || !voiceData.style_list || voiceData.style_list.length === 0) {
            // 如果没有可用风格，添加默认选项
            const option = document.createElement('option');
            option.value = "general";
            option.textContent = "普通";
            styleSelect.appendChild(option);
            return;
        }
        // 添加可用风格选项
        voiceData.style_list.forEach(style => {
            const option = document.createElement('option');
            option.value = style
            option.textContent = style
            // 如果是默认风格则选中
            if (style === config.defaultStyle ||
                (!config.defaultStyle && style === "general")) {
                option.selected = true;
            }
            styleSelect.appendChild(option);
        });
    }
    // 初始化事件监听器
    function initEventListeners() {
        // 转换按钮点击事件
@@ -137,6 +182,7 @@ document.addEventListener('DOMContentLoaded', function() {
        }
        const voice = voiceSelect.value;
        const style = styleSelect.value;
        const rate = rateInput.value;
        const pitch = pitchInput.value;
@@ -149,6 +195,7 @@ document.addEventListener('DOMContentLoaded', function() {
            const params = new URLSearchParams({
                t: text,
                v: voice,
                s: style,
                r: rate,
                p: pitch
            });
--- a/web/templates/index.html
+++ b/web/templates/index.html
@@ -34,15 +34,22 @@
                        </select>
                    </div>
                    <div class="setting-group">
                        <label for="style">风格:</label>
                        <select id="style">
                            <option value="loading">加载中...</option>
                        </select>
                    </div>
                    <div class="setting-group">
                        <label for="rate">语速:</label>
-                        <input type="range" id="rate" min="-50" max="50" value="0">
+                        <input type="range" id="rate" min="-100" max="100" value="0">
                        <span id="rateValue">0%</span>
                    </div>
                    <div class="setting-group">
                        <label for="pitch">语调:</label>
-                        <input type="range" id="pitch" min="-50" max="50" value="0">
+                        <input type="range" id="pitch" min="-100" max="100" value="0">
                        <span id="pitchValue">0%</span>
                    </div>
                </div>
@@ -75,7 +82,8 @@
            basePath: "{{.BasePath}}",
            defaultVoice: "{{.DefaultVoice}}",
            defaultRate: "{{.DefaultRate}}",
-            defaultPitch: "{{.DefaultPitch}}"
+            defaultPitch: "{{.DefaultPitch}}",
            defaultStyle: "{{.DefaultStyle}}"
        };
    </script>
    <script src="{{.BasePath}}/static/js/app.js"></script>