feat: add SSML processing capabilities and configuration support

This commit is contained in:
王锦强
2025-03-11 12:23:46 +08:00
parent 886377b77d
commit 4225e5771f
3 changed files with 104 additions and 3 deletions

View File

@@ -28,3 +28,30 @@ tts:
shimmer: "zh-CN-XiaomoNeural" # 温柔女声 shimmer: "zh-CN-XiaomoNeural" # 温柔女声
openai: openai:
api_key: '' api_key: ''
ssml:
preserve_tags:
- name: break
pattern: <break\s+[^>]*/>
- name: speak
pattern: <speak>|</speak>
- name: prosody
pattern: <prosody\s+[^>]*>|</prosody>
- name: emphasis
pattern: <emphasis\s+[^>]*>|</emphasis>
- name: voice
pattern: <voice\s+[^>]*>|</voice>
- name: say-as
pattern: <say-as\s+[^>]*>|</say-as>
- name: phoneme
pattern: <phoneme\s+[^>]*>|</phoneme>
- name: audio
pattern: <audio\s+[^>]*>|</audio>
- name: p
pattern: <p>|</p>
- name: s
pattern: <s>|</s>
- name: sub
pattern: <sub\s+[^>]*>|</sub>
- name: mstts
pattern: <mstts:[^>]*>|</mstts:[^>]*>

View File

@@ -2,6 +2,8 @@ package config
import ( import (
"fmt" "fmt"
"html"
"regexp"
"strings" "strings"
"sync" "sync"
@@ -13,6 +15,7 @@ type Config struct {
Server ServerConfig `mapstructure:"server"` Server ServerConfig `mapstructure:"server"`
TTS TTSConfig `mapstructure:"tts"` TTS TTSConfig `mapstructure:"tts"`
OpenAI OpenAIConfig `mapstructure:"openai"` OpenAI OpenAIConfig `mapstructure:"openai"`
SSML SSMLConfig `mapstructure:"ssml"`
} }
// OpenAIConfig 包含OpenAI API配置 // OpenAIConfig 包含OpenAI API配置
@@ -89,3 +92,69 @@ func Load(configPath string) (*Config, error) {
func Get() *Config { func Get() *Config {
return &config return &config
} }
// TagPattern 定义标签模式及其名称
type TagPattern struct {
Name string `mapstructure:"name"` // 标签名称,用于日志和调试
Pattern string `mapstructure:"pattern"` // 标签的正则表达式模式
}
// SSMLConfig 存储SSML标签配置
type SSMLConfig struct {
// PreserveTags 包含所有需要保留的标签的正则表达式模式
PreserveTags []TagPattern `mapstructure:"preserve_tags"`
}
// SSMLProcessor 处理SSML内容
type SSMLProcessor struct {
config *SSMLConfig
patternCache map[string]*regexp.Regexp
}
// NewSSMLProcessor 从配置对象创建SSMLProcessor
func NewSSMLProcessor(config *SSMLConfig) (*SSMLProcessor, error) {
processor := &SSMLProcessor{
config: config,
patternCache: make(map[string]*regexp.Regexp),
}
// 预编译正则表达式
for _, tagPattern := range config.PreserveTags {
regex, err := regexp.Compile(tagPattern.Pattern)
if err != nil {
return nil, fmt.Errorf("编译正则表达式'%s'失败: %w", tagPattern.Name, err)
}
processor.patternCache[tagPattern.Name] = regex
}
return processor, nil
}
// EscapeSSML 转义SSML内容但保留配置的标签
func (p *SSMLProcessor) EscapeSSML(ssml string) string {
// 使用占位符替换标签
placeholders := make(map[string]string)
processedSSML := ssml
counter := 0
// 处理所有配置的标签
for name, pattern := range p.patternCache {
processedSSML = pattern.ReplaceAllStringFunc(processedSSML, func(match string) string {
placeholder := fmt.Sprintf("__SSML_PLACEHOLDER_%s_%d__", name, counter)
placeholders[placeholder] = match
counter++
return placeholder
})
}
// 对处理后的文本进行HTML转义
escapedContent := html.EscapeString(processedSSML)
// 恢复所有标签占位符
for placeholder, tag := range placeholders {
escapedContent = strings.Replace(escapedContent, placeholder, tag, 1)
}
return escapedContent
}

View File

@@ -6,7 +6,6 @@ import (
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
"html"
"io" "io"
"log" "log"
"net/http" "net/http"
@@ -50,10 +49,16 @@ type Client struct {
endpoint map[string]interface{} endpoint map[string]interface{}
endpointMu sync.RWMutex endpointMu sync.RWMutex
endpointExpiry time.Time endpointExpiry time.Time
ssmProcessor *config.SSMLProcessor
} }
// NewClient 创建一个新的Microsoft TTS客户端 // NewClient 创建一个新的Microsoft TTS客户端
func NewClient(cfg *config.Config) *Client { func NewClient(cfg *config.Config) *Client {
// 从Viper配置中创建SSML处理器
ssmProcessor, err := config.NewSSMLProcessor(&cfg.SSML)
if err != nil {
log.Fatalf("创建SSML处理器失败: %v", err)
}
client := &Client{ client := &Client{
defaultVoice: cfg.TTS.DefaultVoice, defaultVoice: cfg.TTS.DefaultVoice,
defaultRate: cfg.TTS.DefaultRate, defaultRate: cfg.TTS.DefaultRate,
@@ -65,6 +70,7 @@ func NewClient(cfg *config.Config) *Client {
}, },
voicesCacheExpiry: time.Time{}, // 初始时缓存为空 voicesCacheExpiry: time.Time{}, // 初始时缓存为空
endpointExpiry: time.Time{}, // 初始时端点为空 endpointExpiry: time.Time{}, // 初始时端点为空
ssmProcessor: ssmProcessor,
} }
return client return client
@@ -245,8 +251,7 @@ func (c *Client) createTTSRequest(ctx context.Context, req models.TTSRequest) (*
} }
// 对文本进行HTML转义防止XML解析错误 // 对文本进行HTML转义防止XML解析错误
escapedText := c.ssmProcessor.EscapeSSML(req.Text)
escapedText := html.EscapeString(req.Text)
// 准备SSML内容 // 准备SSML内容
ssml := fmt.Sprintf(ssmlTemplate, locale, voice, style, rate, pitch, escapedText) ssml := fmt.Sprintf(ssmlTemplate, locale, voice, style, rate, pitch, escapedText)