feat: add style selection for TTS and update related functionality
This commit is contained in:
@@ -153,6 +153,7 @@ func (h *TTSHandler) HandleTTS(w http.ResponseWriter, r *http.Request) {
|
|||||||
Voice: q.Get("v"),
|
Voice: q.Get("v"),
|
||||||
Rate: q.Get("r"),
|
Rate: q.Get("r"),
|
||||||
Pitch: q.Get("p"),
|
Pitch: q.Get("p"),
|
||||||
|
Style: q.Get("s"),
|
||||||
}
|
}
|
||||||
case http.MethodPost:
|
case http.MethodPost:
|
||||||
// 从POST JSON体获取
|
// 从POST JSON体获取
|
||||||
@@ -174,6 +175,7 @@ func (h *TTSHandler) HandleTTS(w http.ResponseWriter, r *http.Request) {
|
|||||||
Voice: r.FormValue("voice"),
|
Voice: r.FormValue("voice"),
|
||||||
Rate: r.FormValue("rate"),
|
Rate: r.FormValue("rate"),
|
||||||
Pitch: r.FormValue("pitch"),
|
Pitch: r.FormValue("pitch"),
|
||||||
|
Style: r.FormValue("style"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ type TTSRequest struct {
|
|||||||
Voice string `json:"voice"` // 语音ID
|
Voice string `json:"voice"` // 语音ID
|
||||||
Rate string `json:"rate"` // 语速 (-100% 到 +100%)
|
Rate string `json:"rate"` // 语速 (-100% 到 +100%)
|
||||||
Pitch string `json:"pitch"` // 语调 (-100% 到 +100%)
|
Pitch string `json:"pitch"` // 语调 (-100% 到 +100%)
|
||||||
|
Style string `json:"style"` // 说话风格
|
||||||
}
|
}
|
||||||
|
|
||||||
// TTSResponse 表示一个语音合成响应
|
// TTSResponse 表示一个语音合成响应
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ const (
|
|||||||
ttsEndpoint = "https://%s.tts.speech.microsoft.com/cognitiveservices/v1"
|
ttsEndpoint = "https://%s.tts.speech.microsoft.com/cognitiveservices/v1"
|
||||||
ssmlTemplate = `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xmlns:mstts="http://www.w3.org/2001/mstts" xml:lang='%s'>
|
ssmlTemplate = `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xmlns:mstts="http://www.w3.org/2001/mstts" xml:lang='%s'>
|
||||||
<voice name='%s'>
|
<voice name='%s'>
|
||||||
<mstts:express-as style="general" styledegree="1.0" role="default">
|
<mstts:express-as style="%s" styledegree="1.0" role="default">
|
||||||
<prosody rate='%s%%' pitch='%s%%' volume="medium">
|
<prosody rate='%s%%' pitch='%s%%' volume="medium">
|
||||||
%s
|
%s
|
||||||
</prosody>
|
</prosody>
|
||||||
@@ -227,6 +227,11 @@ func (c *Client) createTTSRequest(ctx context.Context, req models.TTSRequest) (*
|
|||||||
voice = c.defaultVoice
|
voice = c.defaultVoice
|
||||||
}
|
}
|
||||||
|
|
||||||
|
style := req.Style
|
||||||
|
if req.Style == "" {
|
||||||
|
style = "general"
|
||||||
|
}
|
||||||
|
|
||||||
rate := req.Rate
|
rate := req.Rate
|
||||||
if rate == "" {
|
if rate == "" {
|
||||||
rate = c.defaultRate
|
rate = c.defaultRate
|
||||||
@@ -249,7 +254,7 @@ func (c *Client) createTTSRequest(ctx context.Context, req models.TTSRequest) (*
|
|||||||
escapedText := html.EscapeString(req.Text)
|
escapedText := html.EscapeString(req.Text)
|
||||||
|
|
||||||
// 准备SSML内容
|
// 准备SSML内容
|
||||||
ssml := fmt.Sprintf(ssmlTemplate, locale, voice, rate, pitch, escapedText)
|
ssml := fmt.Sprintf(ssmlTemplate, locale, voice, style, rate, pitch, escapedText)
|
||||||
|
|
||||||
// 获取端点信息
|
// 获取端点信息
|
||||||
endpoint, err := c.getEndpoint(ctx)
|
endpoint, err := c.getEndpoint(ctx)
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ document.addEventListener('DOMContentLoaded', function() {
|
|||||||
// 获取DOM元素
|
// 获取DOM元素
|
||||||
const textInput = document.getElementById('text');
|
const textInput = document.getElementById('text');
|
||||||
const voiceSelect = document.getElementById('voice');
|
const voiceSelect = document.getElementById('voice');
|
||||||
|
const styleSelect = document.getElementById('style');
|
||||||
const rateInput = document.getElementById('rate');
|
const rateInput = document.getElementById('rate');
|
||||||
const rateValue = document.getElementById('rateValue');
|
const rateValue = document.getElementById('rateValue');
|
||||||
const pitchInput = document.getElementById('pitch');
|
const pitchInput = document.getElementById('pitch');
|
||||||
@@ -15,6 +16,8 @@ document.addEventListener('DOMContentLoaded', function() {
|
|||||||
|
|
||||||
// 保存最后一个音频URL
|
// 保存最后一个音频URL
|
||||||
let lastAudioUrl = '';
|
let lastAudioUrl = '';
|
||||||
|
// 存储语音数据
|
||||||
|
let voicesData = [];
|
||||||
|
|
||||||
// 初始化
|
// 初始化
|
||||||
initVoicesList();
|
initVoicesList();
|
||||||
@@ -37,13 +40,18 @@ document.addEventListener('DOMContentLoaded', function() {
|
|||||||
pitchValue.textContent = value + '%';
|
pitchValue.textContent = value + '%';
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// 语音选择变化时更新可用风格
|
||||||
|
voiceSelect.addEventListener('change', function() {
|
||||||
|
updateStyleOptions();
|
||||||
|
});
|
||||||
|
|
||||||
// 获取可用语音列表
|
// 获取可用语音列表
|
||||||
async function initVoicesList() {
|
async function initVoicesList() {
|
||||||
try {
|
try {
|
||||||
const response = await fetch(`${config.basePath}/voices`);
|
const response = await fetch(`${config.basePath}/voices`);
|
||||||
if (!response.ok) throw new Error('获取语音列表失败');
|
if (!response.ok) throw new Error('获取语音列表失败');
|
||||||
|
|
||||||
const voices = await response.json();
|
voicesData = await response.json();
|
||||||
|
|
||||||
// 清空并重建选项
|
// 清空并重建选项
|
||||||
voiceSelect.innerHTML = '';
|
voiceSelect.innerHTML = '';
|
||||||
@@ -51,7 +59,7 @@ document.addEventListener('DOMContentLoaded', function() {
|
|||||||
// 按语言和名称分组
|
// 按语言和名称分组
|
||||||
const voicesByLocale = {};
|
const voicesByLocale = {};
|
||||||
|
|
||||||
voices.forEach(voice => {
|
voicesData.forEach(voice => {
|
||||||
if (!voicesByLocale[voice.locale]) {
|
if (!voicesByLocale[voice.locale]) {
|
||||||
voicesByLocale[voice.locale] = [];
|
voicesByLocale[voice.locale] = [];
|
||||||
}
|
}
|
||||||
@@ -78,12 +86,49 @@ document.addEventListener('DOMContentLoaded', function() {
|
|||||||
|
|
||||||
voiceSelect.appendChild(optgroup);
|
voiceSelect.appendChild(optgroup);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 初始化风格列表
|
||||||
|
updateStyleOptions();
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('获取语音列表失败:', error);
|
console.error('获取语音列表失败:', error);
|
||||||
voiceSelect.innerHTML = '<option value="">无法加载语音列表</option>';
|
voiceSelect.innerHTML = '<option value="">无法加载语音列表</option>';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 更新风格选项
|
||||||
|
function updateStyleOptions() {
|
||||||
|
// 清空风格选择
|
||||||
|
styleSelect.innerHTML = '';
|
||||||
|
|
||||||
|
// 获取当前选中的语音
|
||||||
|
const selectedVoice = voiceSelect.value;
|
||||||
|
const voiceData = voicesData.find(v => v.short_name === selectedVoice);
|
||||||
|
|
||||||
|
if (!voiceData || !voiceData.style_list || voiceData.style_list.length === 0) {
|
||||||
|
// 如果没有可用风格,添加默认选项
|
||||||
|
const option = document.createElement('option');
|
||||||
|
option.value = "general";
|
||||||
|
option.textContent = "普通";
|
||||||
|
styleSelect.appendChild(option);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 添加可用风格选项
|
||||||
|
voiceData.style_list.forEach(style => {
|
||||||
|
const option = document.createElement('option');
|
||||||
|
option.value = style
|
||||||
|
option.textContent = style
|
||||||
|
|
||||||
|
// 如果是默认风格则选中
|
||||||
|
if (style === config.defaultStyle ||
|
||||||
|
(!config.defaultStyle && style === "general")) {
|
||||||
|
option.selected = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
styleSelect.appendChild(option);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// 初始化事件监听器
|
// 初始化事件监听器
|
||||||
function initEventListeners() {
|
function initEventListeners() {
|
||||||
// 转换按钮点击事件
|
// 转换按钮点击事件
|
||||||
@@ -137,6 +182,7 @@ document.addEventListener('DOMContentLoaded', function() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const voice = voiceSelect.value;
|
const voice = voiceSelect.value;
|
||||||
|
const style = styleSelect.value;
|
||||||
const rate = rateInput.value;
|
const rate = rateInput.value;
|
||||||
const pitch = pitchInput.value;
|
const pitch = pitchInput.value;
|
||||||
|
|
||||||
@@ -149,6 +195,7 @@ document.addEventListener('DOMContentLoaded', function() {
|
|||||||
const params = new URLSearchParams({
|
const params = new URLSearchParams({
|
||||||
t: text,
|
t: text,
|
||||||
v: voice,
|
v: voice,
|
||||||
|
s: style,
|
||||||
r: rate,
|
r: rate,
|
||||||
p: pitch
|
p: pitch
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -34,15 +34,22 @@
|
|||||||
</select>
|
</select>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div class="setting-group">
|
||||||
|
<label for="style">风格:</label>
|
||||||
|
<select id="style">
|
||||||
|
<option value="loading">加载中...</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div class="setting-group">
|
<div class="setting-group">
|
||||||
<label for="rate">语速:</label>
|
<label for="rate">语速:</label>
|
||||||
<input type="range" id="rate" min="-50" max="50" value="0">
|
<input type="range" id="rate" min="-100" max="100" value="0">
|
||||||
<span id="rateValue">0%</span>
|
<span id="rateValue">0%</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="setting-group">
|
<div class="setting-group">
|
||||||
<label for="pitch">语调:</label>
|
<label for="pitch">语调:</label>
|
||||||
<input type="range" id="pitch" min="-50" max="50" value="0">
|
<input type="range" id="pitch" min="-100" max="100" value="0">
|
||||||
<span id="pitchValue">0%</span>
|
<span id="pitchValue">0%</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -75,7 +82,8 @@
|
|||||||
basePath: "{{.BasePath}}",
|
basePath: "{{.BasePath}}",
|
||||||
defaultVoice: "{{.DefaultVoice}}",
|
defaultVoice: "{{.DefaultVoice}}",
|
||||||
defaultRate: "{{.DefaultRate}}",
|
defaultRate: "{{.DefaultRate}}",
|
||||||
defaultPitch: "{{.DefaultPitch}}"
|
defaultPitch: "{{.DefaultPitch}}",
|
||||||
|
defaultStyle: "{{.DefaultStyle}}"
|
||||||
};
|
};
|
||||||
</script>
|
</script>
|
||||||
<script src="{{.BasePath}}/static/js/app.js"></script>
|
<script src="{{.BasePath}}/static/js/app.js"></script>
|
||||||
|
|||||||
Reference in New Issue
Block a user