diff --git a/configs/config.yaml b/configs/config.yaml index 1cd1d85..f8079ee 100644 --- a/configs/config.yaml +++ b/configs/config.yaml @@ -28,3 +28,30 @@ tts: shimmer: "zh-CN-XiaomoNeural" # 温柔女声 openai: api_key: '' + +ssml: + preserve_tags: + - name: break + pattern: ]*/> + - name: speak + pattern: | + - name: prosody + pattern: ]*>| + - name: emphasis + pattern: ]*>| + - name: voice + pattern: ]*>| + - name: say-as + pattern: ]*>| + - name: phoneme + pattern: ]*>| + - name: audio + pattern: ]*>| + - name: p + pattern:

|

+ - name: s + pattern: | + - name: sub + pattern: ]*>| + - name: mstts + pattern: ]*>|]*> \ No newline at end of file diff --git a/internal/config/config.go b/internal/config/config.go index 37c39a5..ce17965 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -2,6 +2,8 @@ package config import ( "fmt" + "html" + "regexp" "strings" "sync" @@ -13,6 +15,7 @@ type Config struct { Server ServerConfig `mapstructure:"server"` TTS TTSConfig `mapstructure:"tts"` OpenAI OpenAIConfig `mapstructure:"openai"` + SSML SSMLConfig `mapstructure:"ssml"` } // OpenAIConfig 包含OpenAI API配置 @@ -89,3 +92,69 @@ func Load(configPath string) (*Config, error) { func Get() *Config { return &config } + +// TagPattern 定义标签模式及其名称 +type TagPattern struct { + Name string `mapstructure:"name"` // 标签名称,用于日志和调试 + Pattern string `mapstructure:"pattern"` // 标签的正则表达式模式 +} + +// SSMLConfig 存储SSML标签配置 +type SSMLConfig struct { + // PreserveTags 包含所有需要保留的标签的正则表达式模式 + PreserveTags []TagPattern `mapstructure:"preserve_tags"` +} + +// SSMLProcessor 处理SSML内容 +type SSMLProcessor struct { + config *SSMLConfig + patternCache map[string]*regexp.Regexp +} + +// NewSSMLProcessor 从配置对象创建SSMLProcessor +func NewSSMLProcessor(config *SSMLConfig) (*SSMLProcessor, error) { + processor := &SSMLProcessor{ + config: config, + patternCache: make(map[string]*regexp.Regexp), + } + + // 预编译正则表达式 + for _, tagPattern := range config.PreserveTags { + regex, err := regexp.Compile(tagPattern.Pattern) + if err != nil { + return nil, fmt.Errorf("编译正则表达式'%s'失败: %w", tagPattern.Name, err) + } + processor.patternCache[tagPattern.Name] = regex + } + + return processor, nil +} + +// EscapeSSML 转义SSML内容,但保留配置的标签 +func (p *SSMLProcessor) EscapeSSML(ssml string) string { + // 使用占位符替换标签 + placeholders := make(map[string]string) + processedSSML := ssml + + counter := 0 + + // 处理所有配置的标签 + for name, pattern := range p.patternCache { + processedSSML = pattern.ReplaceAllStringFunc(processedSSML, func(match string) string { + placeholder := fmt.Sprintf("__SSML_PLACEHOLDER_%s_%d__", name, counter) + placeholders[placeholder] = match + counter++ + return placeholder + }) + } + + // 对处理后的文本进行HTML转义 + escapedContent := html.EscapeString(processedSSML) + + // 恢复所有标签占位符 + for placeholder, tag := range placeholders { + escapedContent = strings.Replace(escapedContent, placeholder, tag, 1) + } + + return escapedContent +} diff --git a/internal/tts/microsoft/client.go b/internal/tts/microsoft/client.go index b6f30e8..e3a19f2 100644 --- a/internal/tts/microsoft/client.go +++ b/internal/tts/microsoft/client.go @@ -6,7 +6,6 @@ import ( "encoding/json" "errors" "fmt" - "html" "io" "log" "net/http" @@ -50,10 +49,16 @@ type Client struct { endpoint map[string]interface{} endpointMu sync.RWMutex endpointExpiry time.Time + ssmProcessor *config.SSMLProcessor } // NewClient 创建一个新的Microsoft TTS客户端 func NewClient(cfg *config.Config) *Client { + // 从Viper配置中创建SSML处理器 + ssmProcessor, err := config.NewSSMLProcessor(&cfg.SSML) + if err != nil { + log.Fatalf("创建SSML处理器失败: %v", err) + } client := &Client{ defaultVoice: cfg.TTS.DefaultVoice, defaultRate: cfg.TTS.DefaultRate, @@ -65,6 +70,7 @@ func NewClient(cfg *config.Config) *Client { }, voicesCacheExpiry: time.Time{}, // 初始时缓存为空 endpointExpiry: time.Time{}, // 初始时端点为空 + ssmProcessor: ssmProcessor, } return client @@ -245,8 +251,7 @@ func (c *Client) createTTSRequest(ctx context.Context, req models.TTSRequest) (* } // 对文本进行HTML转义,防止XML解析错误 - - escapedText := html.EscapeString(req.Text) + escapedText := c.ssmProcessor.EscapeSSML(req.Text) // 准备SSML内容 ssml := fmt.Sprintf(ssmlTemplate, locale, voice, style, rate, pitch, escapedText)