feat: add SSML processing capabilities and configuration support
This commit is contained in:
@@ -28,3 +28,30 @@ tts:
|
|||||||
shimmer: "zh-CN-XiaomoNeural" # 温柔女声
|
shimmer: "zh-CN-XiaomoNeural" # 温柔女声
|
||||||
openai:
|
openai:
|
||||||
api_key: ''
|
api_key: ''
|
||||||
|
|
||||||
|
ssml:
|
||||||
|
preserve_tags:
|
||||||
|
- name: break
|
||||||
|
pattern: <break\s+[^>]*/>
|
||||||
|
- name: speak
|
||||||
|
pattern: <speak>|</speak>
|
||||||
|
- name: prosody
|
||||||
|
pattern: <prosody\s+[^>]*>|</prosody>
|
||||||
|
- name: emphasis
|
||||||
|
pattern: <emphasis\s+[^>]*>|</emphasis>
|
||||||
|
- name: voice
|
||||||
|
pattern: <voice\s+[^>]*>|</voice>
|
||||||
|
- name: say-as
|
||||||
|
pattern: <say-as\s+[^>]*>|</say-as>
|
||||||
|
- name: phoneme
|
||||||
|
pattern: <phoneme\s+[^>]*>|</phoneme>
|
||||||
|
- name: audio
|
||||||
|
pattern: <audio\s+[^>]*>|</audio>
|
||||||
|
- name: p
|
||||||
|
pattern: <p>|</p>
|
||||||
|
- name: s
|
||||||
|
pattern: <s>|</s>
|
||||||
|
- name: sub
|
||||||
|
pattern: <sub\s+[^>]*>|</sub>
|
||||||
|
- name: mstts
|
||||||
|
pattern: <mstts:[^>]*>|</mstts:[^>]*>
|
||||||
@@ -2,6 +2,8 @@ package config
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"html"
|
||||||
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
@@ -13,6 +15,7 @@ type Config struct {
|
|||||||
Server ServerConfig `mapstructure:"server"`
|
Server ServerConfig `mapstructure:"server"`
|
||||||
TTS TTSConfig `mapstructure:"tts"`
|
TTS TTSConfig `mapstructure:"tts"`
|
||||||
OpenAI OpenAIConfig `mapstructure:"openai"`
|
OpenAI OpenAIConfig `mapstructure:"openai"`
|
||||||
|
SSML SSMLConfig `mapstructure:"ssml"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// OpenAIConfig 包含OpenAI API配置
|
// OpenAIConfig 包含OpenAI API配置
|
||||||
@@ -89,3 +92,69 @@ func Load(configPath string) (*Config, error) {
|
|||||||
func Get() *Config {
|
func Get() *Config {
|
||||||
return &config
|
return &config
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TagPattern 定义标签模式及其名称
|
||||||
|
type TagPattern struct {
|
||||||
|
Name string `mapstructure:"name"` // 标签名称,用于日志和调试
|
||||||
|
Pattern string `mapstructure:"pattern"` // 标签的正则表达式模式
|
||||||
|
}
|
||||||
|
|
||||||
|
// SSMLConfig 存储SSML标签配置
|
||||||
|
type SSMLConfig struct {
|
||||||
|
// PreserveTags 包含所有需要保留的标签的正则表达式模式
|
||||||
|
PreserveTags []TagPattern `mapstructure:"preserve_tags"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// SSMLProcessor 处理SSML内容
|
||||||
|
type SSMLProcessor struct {
|
||||||
|
config *SSMLConfig
|
||||||
|
patternCache map[string]*regexp.Regexp
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewSSMLProcessor 从配置对象创建SSMLProcessor
|
||||||
|
func NewSSMLProcessor(config *SSMLConfig) (*SSMLProcessor, error) {
|
||||||
|
processor := &SSMLProcessor{
|
||||||
|
config: config,
|
||||||
|
patternCache: make(map[string]*regexp.Regexp),
|
||||||
|
}
|
||||||
|
|
||||||
|
// 预编译正则表达式
|
||||||
|
for _, tagPattern := range config.PreserveTags {
|
||||||
|
regex, err := regexp.Compile(tagPattern.Pattern)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("编译正则表达式'%s'失败: %w", tagPattern.Name, err)
|
||||||
|
}
|
||||||
|
processor.patternCache[tagPattern.Name] = regex
|
||||||
|
}
|
||||||
|
|
||||||
|
return processor, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// EscapeSSML 转义SSML内容,但保留配置的标签
|
||||||
|
func (p *SSMLProcessor) EscapeSSML(ssml string) string {
|
||||||
|
// 使用占位符替换标签
|
||||||
|
placeholders := make(map[string]string)
|
||||||
|
processedSSML := ssml
|
||||||
|
|
||||||
|
counter := 0
|
||||||
|
|
||||||
|
// 处理所有配置的标签
|
||||||
|
for name, pattern := range p.patternCache {
|
||||||
|
processedSSML = pattern.ReplaceAllStringFunc(processedSSML, func(match string) string {
|
||||||
|
placeholder := fmt.Sprintf("__SSML_PLACEHOLDER_%s_%d__", name, counter)
|
||||||
|
placeholders[placeholder] = match
|
||||||
|
counter++
|
||||||
|
return placeholder
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// 对处理后的文本进行HTML转义
|
||||||
|
escapedContent := html.EscapeString(processedSSML)
|
||||||
|
|
||||||
|
// 恢复所有标签占位符
|
||||||
|
for placeholder, tag := range placeholders {
|
||||||
|
escapedContent = strings.Replace(escapedContent, placeholder, tag, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
return escapedContent
|
||||||
|
}
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"html"
|
|
||||||
"io"
|
"io"
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
@@ -50,10 +49,16 @@ type Client struct {
|
|||||||
endpoint map[string]interface{}
|
endpoint map[string]interface{}
|
||||||
endpointMu sync.RWMutex
|
endpointMu sync.RWMutex
|
||||||
endpointExpiry time.Time
|
endpointExpiry time.Time
|
||||||
|
ssmProcessor *config.SSMLProcessor
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewClient 创建一个新的Microsoft TTS客户端
|
// NewClient 创建一个新的Microsoft TTS客户端
|
||||||
func NewClient(cfg *config.Config) *Client {
|
func NewClient(cfg *config.Config) *Client {
|
||||||
|
// 从Viper配置中创建SSML处理器
|
||||||
|
ssmProcessor, err := config.NewSSMLProcessor(&cfg.SSML)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("创建SSML处理器失败: %v", err)
|
||||||
|
}
|
||||||
client := &Client{
|
client := &Client{
|
||||||
defaultVoice: cfg.TTS.DefaultVoice,
|
defaultVoice: cfg.TTS.DefaultVoice,
|
||||||
defaultRate: cfg.TTS.DefaultRate,
|
defaultRate: cfg.TTS.DefaultRate,
|
||||||
@@ -65,6 +70,7 @@ func NewClient(cfg *config.Config) *Client {
|
|||||||
},
|
},
|
||||||
voicesCacheExpiry: time.Time{}, // 初始时缓存为空
|
voicesCacheExpiry: time.Time{}, // 初始时缓存为空
|
||||||
endpointExpiry: time.Time{}, // 初始时端点为空
|
endpointExpiry: time.Time{}, // 初始时端点为空
|
||||||
|
ssmProcessor: ssmProcessor,
|
||||||
}
|
}
|
||||||
|
|
||||||
return client
|
return client
|
||||||
@@ -245,8 +251,7 @@ func (c *Client) createTTSRequest(ctx context.Context, req models.TTSRequest) (*
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 对文本进行HTML转义,防止XML解析错误
|
// 对文本进行HTML转义,防止XML解析错误
|
||||||
|
escapedText := c.ssmProcessor.EscapeSSML(req.Text)
|
||||||
escapedText := html.EscapeString(req.Text)
|
|
||||||
|
|
||||||
// 准备SSML内容
|
// 准备SSML内容
|
||||||
ssml := fmt.Sprintf(ssmlTemplate, locale, voice, style, rate, pitch, escapedText)
|
ssml := fmt.Sprintf(ssmlTemplate, locale, voice, style, rate, pitch, escapedText)
|
||||||
|
|||||||
Reference in New Issue
Block a user