feat: add style selection for TTS and update related functionality

This commit is contained in:
王锦强
2025-03-09 17:21:52 +08:00
parent 4ec09304f6
commit 1cd2ac1624
5 changed files with 70 additions and 7 deletions

View File

@@ -153,6 +153,7 @@ func (h *TTSHandler) HandleTTS(w http.ResponseWriter, r *http.Request) {
Voice: q.Get("v"),
Rate: q.Get("r"),
Pitch: q.Get("p"),
Style: q.Get("s"),
}
case http.MethodPost:
// 从POST JSON体获取
@@ -174,6 +175,7 @@ func (h *TTSHandler) HandleTTS(w http.ResponseWriter, r *http.Request) {
Voice: r.FormValue("voice"),
Rate: r.FormValue("rate"),
Pitch: r.FormValue("pitch"),
Style: r.FormValue("style"),
}
}
default:

View File

@@ -6,6 +6,7 @@ type TTSRequest struct {
Voice string `json:"voice"` // 语音ID
Rate string `json:"rate"` // 语速 (-100% 到 +100%)
Pitch string `json:"pitch"` // 语调 (-100% 到 +100%)
Style string `json:"style"` // 说话风格
}
// TTSResponse 表示一个语音合成响应

View File

@@ -25,7 +25,7 @@ const (
ttsEndpoint = "https://%s.tts.speech.microsoft.com/cognitiveservices/v1"
ssmlTemplate = `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xmlns:mstts="http://www.w3.org/2001/mstts" xml:lang='%s'>
<voice name='%s'>
<mstts:express-as style="general" styledegree="1.0" role="default">
<mstts:express-as style="%s" styledegree="1.0" role="default">
<prosody rate='%s%%' pitch='%s%%' volume="medium">
%s
</prosody>
@@ -227,6 +227,11 @@ func (c *Client) createTTSRequest(ctx context.Context, req models.TTSRequest) (*
voice = c.defaultVoice
}
style := req.Style
if req.Style == "" {
style = "general"
}
rate := req.Rate
if rate == "" {
rate = c.defaultRate
@@ -249,7 +254,7 @@ func (c *Client) createTTSRequest(ctx context.Context, req models.TTSRequest) (*
escapedText := html.EscapeString(req.Text)
// 准备SSML内容
ssml := fmt.Sprintf(ssmlTemplate, locale, voice, rate, pitch, escapedText)
ssml := fmt.Sprintf(ssmlTemplate, locale, voice, style, rate, pitch, escapedText)
// 获取端点信息
endpoint, err := c.getEndpoint(ctx)