From 1cd2ac1624996a33cc6ed07e874fb59d0c465ef2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E9=94=A6=E5=BC=BA?= <1061669148@qq.com> Date: Sun, 9 Mar 2025 17:21:52 +0800 Subject: [PATCH] feat: add style selection for TTS and update related functionality --- internal/http/handlers/tts.go | 2 ++ internal/models/tts.go | 1 + internal/tts/microsoft/client.go | 9 ++++-- web/static/js/app.js | 51 ++++++++++++++++++++++++++++++-- web/templates/index.html | 14 +++++++-- 5 files changed, 70 insertions(+), 7 deletions(-) diff --git a/internal/http/handlers/tts.go b/internal/http/handlers/tts.go index 8cf45b9..c51dcf9 100644 --- a/internal/http/handlers/tts.go +++ b/internal/http/handlers/tts.go @@ -153,6 +153,7 @@ func (h *TTSHandler) HandleTTS(w http.ResponseWriter, r *http.Request) { Voice: q.Get("v"), Rate: q.Get("r"), Pitch: q.Get("p"), + Style: q.Get("s"), } case http.MethodPost: // 从POST JSON体获取 @@ -174,6 +175,7 @@ func (h *TTSHandler) HandleTTS(w http.ResponseWriter, r *http.Request) { Voice: r.FormValue("voice"), Rate: r.FormValue("rate"), Pitch: r.FormValue("pitch"), + Style: r.FormValue("style"), } } default: diff --git a/internal/models/tts.go b/internal/models/tts.go index 67ee7a5..622e897 100644 --- a/internal/models/tts.go +++ b/internal/models/tts.go @@ -6,6 +6,7 @@ type TTSRequest struct { Voice string `json:"voice"` // 语音ID Rate string `json:"rate"` // 语速 (-100% 到 +100%) Pitch string `json:"pitch"` // 语调 (-100% 到 +100%) + Style string `json:"style"` // 说话风格 } // TTSResponse 表示一个语音合成响应 diff --git a/internal/tts/microsoft/client.go b/internal/tts/microsoft/client.go index 8e869f7..c41fb2c 100644 --- a/internal/tts/microsoft/client.go +++ b/internal/tts/microsoft/client.go @@ -25,7 +25,7 @@ const ( ttsEndpoint = "https://%s.tts.speech.microsoft.com/cognitiveservices/v1" ssmlTemplate = ` - + %s @@ -227,6 +227,11 @@ func (c *Client) createTTSRequest(ctx context.Context, req models.TTSRequest) (* voice = c.defaultVoice } + style := req.Style + if req.Style == "" { + style = "general" + } + rate := req.Rate if rate == "" { rate = c.defaultRate @@ -249,7 +254,7 @@ func (c *Client) createTTSRequest(ctx context.Context, req models.TTSRequest) (* escapedText := html.EscapeString(req.Text) // 准备SSML内容 - ssml := fmt.Sprintf(ssmlTemplate, locale, voice, rate, pitch, escapedText) + ssml := fmt.Sprintf(ssmlTemplate, locale, voice, style, rate, pitch, escapedText) // 获取端点信息 endpoint, err := c.getEndpoint(ctx) diff --git a/web/static/js/app.js b/web/static/js/app.js index d84c5b4..835ba7b 100644 --- a/web/static/js/app.js +++ b/web/static/js/app.js @@ -2,6 +2,7 @@ document.addEventListener('DOMContentLoaded', function() { // 获取DOM元素 const textInput = document.getElementById('text'); const voiceSelect = document.getElementById('voice'); + const styleSelect = document.getElementById('style'); const rateInput = document.getElementById('rate'); const rateValue = document.getElementById('rateValue'); const pitchInput = document.getElementById('pitch'); @@ -15,6 +16,8 @@ document.addEventListener('DOMContentLoaded', function() { // 保存最后一个音频URL let lastAudioUrl = ''; + // 存储语音数据 + let voicesData = []; // 初始化 initVoicesList(); @@ -37,13 +40,18 @@ document.addEventListener('DOMContentLoaded', function() { pitchValue.textContent = value + '%'; }); + // 语音选择变化时更新可用风格 + voiceSelect.addEventListener('change', function() { + updateStyleOptions(); + }); + // 获取可用语音列表 async function initVoicesList() { try { const response = await fetch(`${config.basePath}/voices`); if (!response.ok) throw new Error('获取语音列表失败'); - const voices = await response.json(); + voicesData = await response.json(); // 清空并重建选项 voiceSelect.innerHTML = ''; @@ -51,7 +59,7 @@ document.addEventListener('DOMContentLoaded', function() { // 按语言和名称分组 const voicesByLocale = {}; - voices.forEach(voice => { + voicesData.forEach(voice => { if (!voicesByLocale[voice.locale]) { voicesByLocale[voice.locale] = []; } @@ -78,12 +86,49 @@ document.addEventListener('DOMContentLoaded', function() { voiceSelect.appendChild(optgroup); } + + // 初始化风格列表 + updateStyleOptions(); } catch (error) { console.error('获取语音列表失败:', error); voiceSelect.innerHTML = ''; } } + // 更新风格选项 + function updateStyleOptions() { + // 清空风格选择 + styleSelect.innerHTML = ''; + + // 获取当前选中的语音 + const selectedVoice = voiceSelect.value; + const voiceData = voicesData.find(v => v.short_name === selectedVoice); + + if (!voiceData || !voiceData.style_list || voiceData.style_list.length === 0) { + // 如果没有可用风格,添加默认选项 + const option = document.createElement('option'); + option.value = "general"; + option.textContent = "普通"; + styleSelect.appendChild(option); + return; + } + + // 添加可用风格选项 + voiceData.style_list.forEach(style => { + const option = document.createElement('option'); + option.value = style + option.textContent = style + + // 如果是默认风格则选中 + if (style === config.defaultStyle || + (!config.defaultStyle && style === "general")) { + option.selected = true; + } + + styleSelect.appendChild(option); + }); + } + // 初始化事件监听器 function initEventListeners() { // 转换按钮点击事件 @@ -137,6 +182,7 @@ document.addEventListener('DOMContentLoaded', function() { } const voice = voiceSelect.value; + const style = styleSelect.value; const rate = rateInput.value; const pitch = pitchInput.value; @@ -149,6 +195,7 @@ document.addEventListener('DOMContentLoaded', function() { const params = new URLSearchParams({ t: text, v: voice, + s: style, r: rate, p: pitch }); diff --git a/web/templates/index.html b/web/templates/index.html index 6bb5bb4..a71e2c3 100644 --- a/web/templates/index.html +++ b/web/templates/index.html @@ -34,15 +34,22 @@ +
+ + +
+
- + 0%
- + 0%
@@ -75,7 +82,8 @@ basePath: "{{.BasePath}}", defaultVoice: "{{.DefaultVoice}}", defaultRate: "{{.DefaultRate}}", - defaultPitch: "{{.DefaultPitch}}" + defaultPitch: "{{.DefaultPitch}}", + defaultStyle: "{{.DefaultStyle}}" };