feat 添加风格支持及首页
This commit is contained in:
@@ -50,7 +50,7 @@ func SynthesizeVoice(c *gin.Context) {
|
||||
pitch := c.DefaultQuery("p", "0")
|
||||
outputFormat := c.DefaultQuery("o", "audio-24khz-48kbitrate-mono-mp3")
|
||||
|
||||
voice, err := utils.GetVoice(text, voiceName, rate, pitch, outputFormat)
|
||||
voice, err := utils.GetVoice(text, voiceName, rate, pitch, outputFormat, c.Query("s"))
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
@@ -65,12 +65,25 @@ func Index(c *gin.Context) {
|
||||
})
|
||||
}
|
||||
|
||||
func ApiDoc(c *gin.Context) {
|
||||
c.HTML(http.StatusOK, "api-doc.html", gin.H{
|
||||
"title": "TTS",
|
||||
})
|
||||
}
|
||||
|
||||
type SynthesizeVoiceRequest struct {
|
||||
Text string `json:"t"`
|
||||
VoiceName string `json:"v"`
|
||||
Rate string `json:"r"`
|
||||
Pitch string `json:"p"`
|
||||
OutputFormat string `json:"o"`
|
||||
Style string `json:"s"`
|
||||
}
|
||||
|
||||
type SynthesizeVoiceOpenAIRequest struct {
|
||||
Model string `json:"model"`
|
||||
Input string `json:"input"`
|
||||
Voice string `json:"voice"`
|
||||
}
|
||||
|
||||
func SynthesizeVoicePost(c *gin.Context) {
|
||||
@@ -80,7 +93,7 @@ func SynthesizeVoicePost(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
voice, err := utils.GetVoice(request.Text, request.VoiceName, request.Rate, request.Pitch, request.OutputFormat)
|
||||
voice, err := utils.GetVoice(request.Text, request.VoiceName, request.Rate, request.Pitch, request.OutputFormat, request.Style)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
@@ -88,3 +101,19 @@ func SynthesizeVoicePost(c *gin.Context) {
|
||||
|
||||
c.Data(http.StatusOK, "audio/mpeg", voice)
|
||||
}
|
||||
|
||||
func SynthesizeVoiceOpenAI(c *gin.Context) {
|
||||
var request SynthesizeVoiceOpenAIRequest
|
||||
if err := c.BindJSON(&request); err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
voice, err := utils.GetVoice(request.Input, request.Voice, c.Query("r"), c.Query("p"), c.Query("o"), c.Query("s"))
|
||||
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
c.Data(http.StatusOK, "audio/mpeg", voice)
|
||||
}
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
package routes
|
||||
|
||||
import (
|
||||
"github.com/gin-gonic/gin"
|
||||
"tts/handlers"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func SetupRouter() *gin.Engine {
|
||||
@@ -14,7 +15,9 @@ func SetupRouter() *gin.Engine {
|
||||
router.GET("/voices", handlers.GetVoiceList)
|
||||
router.POST("/tts", handlers.SynthesizeVoicePost)
|
||||
router.GET("/tts", handlers.SynthesizeVoice)
|
||||
router.GET("/v1/audio/speech", handlers.SynthesizeVoiceOpenAI)
|
||||
router.GET("/", handlers.Index)
|
||||
router.GET("/doc", handlers.ApiDoc)
|
||||
|
||||
return router
|
||||
}
|
||||
|
||||
37
templates/api-doc.html
Normal file
37
templates/api-doc.html
Normal file
@@ -0,0 +1,37 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>TTS</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1> 支持接口 </h1>
|
||||
<h2>语音合成</h2>
|
||||
<div>
|
||||
<strong>/tts</strong> | GET / POST(json)
|
||||
<a target="_blank" href="/tts?t=岂曰无衣?与子同袍。王于兴师,修我戈矛,与子同仇!岂曰无衣?与子同泽。王于兴师,修我矛戟,与子偕作!岂曰无衣?与子同裳。王于兴师,修我甲兵,与子偕行!&v=zh-CN-XiaoxiaoMultilingualNeural&r=0&p=0&o=audio-24khz-48kbitrate-mono-mp3">try</a>
|
||||
</div>
|
||||
|
||||
|
||||
<pre>
|
||||
参数列表:
|
||||
1. t: 文本内容 (必填)
|
||||
2. v: 语音名称 (可选), 默认为 zh-CN-XiaoxiaoMultilingualNeural
|
||||
3. r: 语速 (可选), 默认为 0
|
||||
4. p: 语调 (可选), 默认为 0
|
||||
5. o: 输出格式 (可选), 默认为audio-24khz-48kbitrate-mono-mp3
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>声音列表</h2>
|
||||
|
||||
<div>
|
||||
<strong>/voices</strong> | GET <a target="_blank" href="/voices?l=zh">try</a>
|
||||
</div>
|
||||
<pre>
|
||||
参数列表:
|
||||
1. l: 语言区域 (可选), 使用 contains 匹配,如 l=zh
|
||||
2. d: 显示详细信息 (可选) , 默认为 false, 如需显示详细信息, 请添加参数d , 如 /voices?d
|
||||
</pre>
|
||||
</body>
|
||||
</html>
|
||||
@@ -2,36 +2,120 @@
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>TTS</title>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>TTS Demo</title>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script>
|
||||
<script src="https://cdn.tailwindcss.com"></script>
|
||||
<style>
|
||||
.top-right {
|
||||
position: absolute;
|
||||
top: 20px;
|
||||
right: 20px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1> 支持接口 </h1>
|
||||
<h2>语音合成</h2>
|
||||
<div>
|
||||
<strong>/tts</strong> | GET / POST(json)
|
||||
<a target="_blank" href="/tts?t=岂曰无衣?与子同袍。王于兴师,修我戈矛,与子同仇!岂曰无衣?与子同泽。王于兴师,修我矛戟,与子偕作!岂曰无衣?与子同裳。王于兴师,修我甲兵,与子偕行!&v=zh-CN-XiaoxiaoMultilingualNeural&r=0&p=0&o=audio-24khz-48kbitrate-mono-mp3">try</a>
|
||||
<body class="bg-gradient-to-r from-blue-100 to-purple-100 min-h-screen flex items-center justify-center p-4">
|
||||
<div class="top-right">
|
||||
<a href="/doc" class="hover:underline p-2 rounded">Documentation</a>
|
||||
</div>
|
||||
<div class="bg-white p-8 rounded-xl shadow-lg w-full max-w-4xl">
|
||||
<h1 class="text-4xl font-bold mb-8 text-center text-gray-800">语音合成演示</h1>
|
||||
|
||||
|
||||
<div id="ttsForm" class="space-y-6">
|
||||
<textarea id="textInput" rows="6" class="w-full p-4 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 text-gray-700 text-lg resize-none" placeholder="请输入要合成的文本">欢迎使用我们的语音合成演示系统。这项技术能够将文字转换成自然流畅的语音。您可以尝试调整语速和语调,体验不同的合成效果。我们提供多种语言和声音选项,满足您的各种需求。无论是阅读文章、语言学习,还是辅助视障人士,语音合成技术都能发挥重要作用。希望这个演示能让您感受到科技的魅力。祝您使用愉快!</textarea>
|
||||
|
||||
<div class="grid grid-cols-2 gap-4">
|
||||
<div>
|
||||
<label for="localeSelect" class="block text-sm font-medium text-gray-700 mb-1">语言</label>
|
||||
<select id="localeSelect" class="w-full p-3 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 text-gray-700">
|
||||
<option value="zh-CN">中文 (中国)</option>
|
||||
<option value="en-US">English (US)</option>
|
||||
<option value="ja-JP">日本語 (日本)</option>
|
||||
</select>
|
||||
</div>
|
||||
<div>
|
||||
<label for="voiceSelect" class="block text-sm font-medium text-gray-700 mb-1">声音</label>
|
||||
<select id="voiceSelect" class="w-full p-3 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 text-gray-700"></select>
|
||||
</div>
|
||||
<div>
|
||||
<label for="styleSelect" class="block text-sm font-medium text-gray-700 mb-1">风格</label>
|
||||
<select id="styleSelect" class="w-full p-3 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 text-gray-700"></select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="flex space-x-4">
|
||||
<div class="w-1/2 space-y-2">
|
||||
<label for="rateInput" class="block text-sm font-medium text-gray-700">语速</label>
|
||||
<input type="range" id="rateInput" min="-100" max="100" value="0" class="w-full h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer">
|
||||
</div>
|
||||
|
||||
<div class="w-1/2 space-y-2">
|
||||
<label for="pitchInput" class="block text-sm font-medium text-gray-700">语调</label>
|
||||
<input type="range" id="pitchInput" min="-100" max="100" value="0" class="w-full h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<button id="synthesizeButton" class="w-full bg-gradient-to-r from-blue-500 to-purple-600 text-white py-3 px-6 rounded-lg hover:from-blue-600 hover:to-purple-700 focus:outline-none focus:ring-2 focus:ring-purple-500 focus:ring-opacity-50 transition duration-300 text-lg font-semibold shadow-md">合成并播放</button>
|
||||
</div>
|
||||
|
||||
<audio id="audioPlayer" controls class="w-full mt-6 hidden"></audio>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
$(document).ready(function() {
|
||||
let globalVoices = []
|
||||
function updateVoices(locale) {
|
||||
$('#voiceSelect').empty();
|
||||
$.get('/voices?d&l=' + locale, function(voices) {
|
||||
globalVoices = voices.voices
|
||||
globalVoices.forEach(function(voice) {
|
||||
$('#voiceSelect').append($('<option>', {
|
||||
value: voice.ShortName,
|
||||
text: voice.LocalName + ' (' + voice.ShortName + ')'
|
||||
}));
|
||||
});
|
||||
updateStyles($('#voiceSelect').val());
|
||||
});
|
||||
}
|
||||
|
||||
<pre>
|
||||
参数列表:
|
||||
1. t: 文本内容 (必填)
|
||||
2. v: 语音名称 (可选), 默认为 zh-CN-XiaoxiaoMultilingualNeural
|
||||
3. r: 语速 (可选), 默认为 0
|
||||
4. p: 语调 (可选), 默认为 0
|
||||
5. o: 输出格式 (可选), 默认为audio-24khz-48kbitrate-mono-mp3
|
||||
</pre>
|
||||
function updateStyles(voice) {
|
||||
const currentVoice = globalVoices.filter(v => v.ShortName === voice)[0]
|
||||
if (currentVoice) {
|
||||
$('#styleSelect').empty()
|
||||
currentVoice?.StyleList?.forEach(function(style) {
|
||||
$('#styleSelect').append($('<option>', {
|
||||
value: style,
|
||||
text: style
|
||||
}));
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
updateVoices($('#localeSelect').val());
|
||||
|
||||
<h2>声音列表</h2>
|
||||
$('#localeSelect').change(function() {
|
||||
updateVoices($(this).val());
|
||||
});
|
||||
|
||||
<div>
|
||||
<strong>/voices</strong> | GET <a target="_blank" href="/voices?l=zh">try</a>
|
||||
</div>
|
||||
<pre>
|
||||
参数列表:
|
||||
1. l: 语言区域 (可选), 使用 contains 匹配,如 l=zh
|
||||
2. d: 显示详细信息 (可选) , 默认为 false, 如需显示详细信息, 请添加参数d , 如 /voices?d
|
||||
</pre>
|
||||
$('#voiceSelect').change(function() {
|
||||
updateStyles($(this).val());
|
||||
});
|
||||
|
||||
$('#synthesizeButton').click(function() {
|
||||
var text = $('#textInput').val();
|
||||
var voice = $('#voiceSelect').val();
|
||||
var rate = $('#rateInput').val();
|
||||
var pitch = $('#pitchInput').val();
|
||||
var locale = $('#localeSelect').val();
|
||||
var style = $('#styleSelect').val();
|
||||
|
||||
var url = `/tts?t=${encodeURIComponent(text)}&v=${encodeURIComponent(voice)}&r=${rate}&p=${pitch}&l=${locale}&s=${style}`;
|
||||
|
||||
$('#audioPlayer').attr('src', url).removeClass('hidden')[0].play();
|
||||
});
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
</html>
|
||||
|
||||
|
||||
@@ -47,6 +47,7 @@ const (
|
||||
defaultRate = "0"
|
||||
defaultPitch = "0"
|
||||
defaultOutputFormat = "audio-24khz-48kbitrate-mono-mp3"
|
||||
defaultStyle = "general"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -110,8 +111,7 @@ func Sign(urlStr string) string {
|
||||
}
|
||||
|
||||
// GetVoice 获取语音合成结果
|
||||
// GetVoice 获取语音合成结果
|
||||
func GetVoice(text, voiceName, rate, pitch, outputFormat string) ([]byte, error) {
|
||||
func GetVoice(text, voiceName, rate, pitch, outputFormat, style string) ([]byte, error) {
|
||||
if voiceName == "" {
|
||||
voiceName = defaultVoiceName
|
||||
}
|
||||
@@ -125,6 +125,10 @@ func GetVoice(text, voiceName, rate, pitch, outputFormat string) ([]byte, error)
|
||||
outputFormat = defaultOutputFormat
|
||||
}
|
||||
|
||||
if style == "" {
|
||||
style = defaultStyle
|
||||
}
|
||||
|
||||
endpoint, err := GetEndpoint()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -137,7 +141,7 @@ func GetVoice(text, voiceName, rate, pitch, outputFormat string) ([]byte, error)
|
||||
"X-Microsoft-OutputFormat": outputFormat,
|
||||
}
|
||||
|
||||
ssml := GetSsml(text, voiceName, rate, pitch)
|
||||
ssml := GetSsml(text, voiceName, rate, pitch, style)
|
||||
|
||||
req, err := http.NewRequest("POST", u, bytes.NewBufferString(ssml))
|
||||
if err != nil {
|
||||
@@ -159,18 +163,20 @@ func GetVoice(text, voiceName, rate, pitch, outputFormat string) ([]byte, error)
|
||||
}
|
||||
|
||||
// GetSsml 生成 SSML 格式的文本
|
||||
func GetSsml(text, voiceName, rate, pitch string) string {
|
||||
func GetSsml(text, voiceName, rate, pitch, style string) string {
|
||||
// 对文本进行转义
|
||||
text = html.EscapeString(text)
|
||||
return fmt.Sprintf(`
|
||||
<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" version="1.0" xml:lang="zh-CN">
|
||||
<voice name="%s">
|
||||
<mstts:express-as style="general" styledegree="1.0" role="default">
|
||||
<prosody rate="%s%%" pitch="%s%%" volume="50">%s</prosody>
|
||||
<mstts:express-as style="%s" styledegree="1.0" role="default">
|
||||
<prosody rate="%s%%" pitch="%s%%" volume="medium">
|
||||
%s
|
||||
</prosody>
|
||||
</mstts:express-as>
|
||||
</voice>
|
||||
</speak>
|
||||
`, voiceName, rate, pitch, text)
|
||||
`, voiceName, style, rate, pitch, text)
|
||||
}
|
||||
|
||||
// VoiceList 获取可用的语音列表
|
||||
|
||||
Reference in New Issue
Block a user