feat: add voice style selection and caching for voice list

2025-03-11 23:38:22 +08:00
parent 869080287e
commit 5daa99252e
1 changed files with 176 additions and 35 deletions
--- a/workers/src/index.js
+++ b/workers/src/index.js
@@ -1,6 +1,10 @@
 const encoder = new TextEncoder();
 let expiredAt = null;
 let endpoint = null;
 // 添加缓存相关变量
 let voiceListCache = null;
 let voiceListCacheTime = null;
 const VOICE_CACHE_DURATION =4 *60 * 60 * 1000; // 4小时，单位毫秒
 // 定义需要保留的 SSML 标签模式
 const preserveTags = [
@@ -200,11 +204,55 @@ async function handleRequest(request) {
                      placeholder="请输入要转换的文本"></textarea>
                  </div>
-                  <div>
+                  <div class="grid grid-cols-1 md:grid-cols-2 gap-6">
-                    <label for="voice" class="block text-sm font-medium text-gray-700">选择语音</label>
+                    <div>
-                    <select id="voice" name="voice"
+                      <label for="voice" class="block text-sm font-medium text-gray-700">选择语音</label>
-                      class="mt-1 block w-full py-2 px-3 border border-gray-300 bg-white rounded-md shadow-sm focus:outline-none focus:ring-ms-blue focus:border-ms-blue sm:text-sm">
+                      <select id="voice" name="voice"
-                    </select>
+                        class="mt-1 block w-full py-2 px-3 border border-gray-300 bg-white rounded-md shadow-sm focus:outline-none focus:ring-ms-blue focus:border-ms-blue sm:text-sm">
                      </select>
                    </div>
                    <div>
                      <label for="style" class="block text-sm font-medium text-gray-700">语音风格</label>
                      <select id="style" name="style"
                        class="mt-1 block w-full py-2 px-3 border border-gray-300 bg-white rounded-md shadow-sm focus:outline-none focus:ring-ms-blue focus:border-ms-blue sm:text-sm">
                        <option value="general" selected>标准</option>
                        <option value="advertisement_upbeat">广告热情</option>
                        <option value="affectionate">亲切</option>
                        <option value="angry">愤怒</option>
                        <option value="assistant">助理</option>
                        <option value="calm">平静</option>
                        <option value="chat">随意</option>
                        <option value="cheerful">愉快</option>
                        <option value="customerservice">客服</option>
                        <option value="depressed">沮丧</option>
                        <option value="disgruntled">不满</option>
                        <option value="documentary-narration">纪录片解说</option>
                        <option value="embarrassed">尴尬</option>
                        <option value="empathetic">共情</option>
                        <option value="envious">羡慕</option>
                        <option value="excited">兴奋</option>
                        <option value="fearful">恐惧</option>
                        <option value="friendly">友好</option>
                        <option value="gentle">温柔</option>
                        <option value="hopeful">希望</option>
                        <option value="lyrical">抒情</option>
                        <option value="narration-professional">专业叙述</option>
                        <option value="narration-relaxed">轻松叙述</option>
                        <option value="newscast">新闻播报</option>
                        <option value="newscast-casual">随意新闻</option>
                        <option value="newscast-formal">正式新闻</option>
                        <option value="poetry-reading">诗朗诵</option>
                        <option value="sad">悲伤</option>
                        <option value="serious">严肃</option>
                        <option value="shouting">大喊</option>
                        <option value="sports_commentary">体育解说</option>
                        <option value="sports_commentary_excited">激动体育解说</option>
                        <option value="whispering">低语</option>
                        <option value="terrified">恐慌</option>
                        <option value="unfriendly">冷漠</option>
                      </select>
                    </div>
                  </div>
                  <div class="grid grid-cols-1 md:grid-cols-2 gap-6">
@@ -274,13 +322,14 @@ async function handleRequest(request) {
            </div>
            <div class="px-4 py-5 sm:p-6">
              <h4 class="text-base font-medium text-gray-900 mb-2">文本转语音 API</h4>
-              <code class="text-sm block bg-gray-50 p-2 rounded mb-2 overflow-auto">/tts?api_key={key}&t={text}&v={voice}&r={rate}&p={pitch}</code>
+              <code class="text-sm block bg-gray-50 p-2 rounded mb-2 overflow-auto">/tts?api_key={key}&t={text}&v={voice}&r={rate}&p={pitch}&s={style}</code>
              <ul class="list-disc pl-5 text-sm space-y-1">
                <li><span class="text-red-600">api_key</span>: API密钥 [必填]</li>
                <li><span class="text-red-600">t</span>: 文本内容 [必填]</li>
                <li>v: 语音名称 [可选]</li>
                <li>r: 语速调整 (-100~100) [可选]</li>
                <li>p: 音调调整 (-100~100) [可选]</li>
                <li>s: 语音风格 (general, cheerful, sad等) [可选]</li>
              </ul>
              <h4 class="text-base font-medium text-gray-900 mt-6 mb-2">OpenAI 兼容接口</h4>
@@ -382,6 +431,33 @@ curl ${baseUrl}/v1/audio/speech \\
      // 存储所有语音数据
      let allVoices = [];
      // 在表单提交事件监听器之前添加语音选择变更事件监听
      document.addEventListener('DOMContentLoaded', function() {
        // 添加对语音选择变化的监听
        document.getElementById('voice').addEventListener('change', function() {
          updateStyleOptions(this.value);
        });
        const tabLinks = document.querySelectorAll('.tab-link');
        const tabContents = document.querySelectorAll('.tab-content');
        tabLinks.forEach(link => {
          link.addEventListener('click', () => {
            tabLinks.forEach(l => {
              l.classList.remove('text-ms-blue', 'border-ms-blue');
              l.classList.add('text-gray-500');
            });
            link.classList.add('text-ms-blue', 'border-ms-blue');
            link.classList.remove('text-gray-500');
            const targetId = link.getAttribute('data-tab');
            tabContents.forEach(tc => {
              tc.style.display = (tc.id === targetId) ? '' : 'none';
            });
          });
        });
      });
      document.getElementById('ttsForm').addEventListener('submit', async function(e) {
        e.preventDefault();
@@ -393,6 +469,7 @@ curl ${baseUrl}/v1/audio/speech \\
        const voice = document.getElementById('voice').value;
        const rate = document.getElementById('rate').value;
        const pitch = document.getElementById('pitch').value;
        const style = document.getElementById('style').value; // 获取选择的风格
        if (!text) {
          showError('请输入要转换的文本', '文本内容不能为空');
@@ -404,7 +481,7 @@ curl ${baseUrl}/v1/audio/speech \\
          return;
        }
-        const url = \`${baseUrl}/tts?api_key=\${apiKey}&t=\${text}&v=\${voice}&r=\${rate}&p=\${pitch}\`;
+        const url = \`${baseUrl}/tts?api_key=\${apiKey}&t=\${text}&v=\${voice}&r=\${rate}&p=\${pitch}&s=\${style}\`;
        try {
          const response = await fetch(url);
@@ -452,6 +529,69 @@ curl ${baseUrl}/v1/audio/speech \\
        errorAlert.scrollIntoView({ behavior: 'smooth', block: 'center' });
      }
      // 更新风格选项的函数
      function updateStyleOptions(voiceName) {
        const styleSelect = document.getElementById('style');
        // 清空现有选项
        styleSelect.innerHTML = '';
        // 默认添加标准风格
        const defaultOption = document.createElement('option');
        defaultOption.value = 'general';
        defaultOption.text = '标准';
        styleSelect.appendChild(defaultOption);
        // 查找选定的语音对象
        const selectedVoice = allVoices.find(v => v.ShortName === voiceName);
        if (selectedVoice && selectedVoice.StyleList && selectedVoice.StyleList.length > 0) {
          // 对风格列表进行排序
          const styles = [...selectedVoice.StyleList].sort();
          // 为每个风格创建选项
          styles.forEach(style => {
            // 跳过已经添加的"general"
            if (style.toLowerCase() === 'general') return;
            const option = document.createElement('option');
            option.value = style;
            // 根据风格名称进行本地化显示
            option.text = getStyleDisplayName(style);
            styleSelect.appendChild(option);
          });
        }
      }
      // 风格名称本地化显示
      function getStyleDisplayName(styleName) {
        const styleMap = {
          'angry': '愤怒',
          'cheerful': '欢快',
          'sad': '悲伤',
          'fearful': '恐惧',
          'disgruntled': '不满',
          'serious': '严肃',
          'affectionate': '深情',
          'gentle': '温柔',
          'embarrassed': '尴尬',
          'assistant': '助手',
          'calm': '平静',
          'chat': '聊天',
          'excited': '兴奋',
          'friendly': '友好',
          'hopeful': '希望',
          'narration-professional': '专业叙述',
          'newscast': '新闻播报',
          'newscast-casual': '随性新闻',
          'poetry-reading': '诗歌朗诵',
          'shouting': '喊叫',
          'sports-commentary': '体育解说',
          'whispering': '低语'
        };
        return styleMap[styleName.toLowerCase()] || styleName;
      }
      // 加载可用语音列表
      async function loadVoices() {
        try {
@@ -500,6 +640,9 @@ curl ${baseUrl}/v1/audio/speech \\
            if (voiceSelect.querySelector(\`option[value="\${defaultVoice}"]\`)) {
              voiceSelect.value = defaultVoice;
            }
            // 加载初始选择语音的风格选项
            updateStyleOptions(voiceSelect.value);
          } else {
            console.error('获取语音列表失败：', response.status);
            showDefaultVoices();
@@ -563,31 +706,19 @@ curl ${baseUrl}/v1/audio/speech \\
        // 默认选择晓晓多语言
        voiceSelect.value = "zh-CN-XiaoxiaoMultilingualNeural";
        // 设置默认风格
        const styleSelect = document.getElementById('style');
        styleSelect.innerHTML = '';
        const defaultOption = document.createElement('option');
        defaultOption.value = 'general';
        defaultOption.text = '标准';
        styleSelect.appendChild(defaultOption);
      }
      // 页面加载完成后加载语音列表
      window.onload = loadVoices;
      document.addEventListener('DOMContentLoaded', function() {
        const tabLinks = document.querySelectorAll('.tab-link');
        const tabContents = document.querySelectorAll('.tab-content');
        tabLinks.forEach(link => {
          link.addEventListener('click', () => {
            tabLinks.forEach(l => {
              l.classList.remove('text-ms-blue', 'border-ms-blue');
              l.classList.add('text-gray-500');
            });
            link.classList.add('text-ms-blue', 'border-ms-blue');
            link.classList.remove('text-gray-500');
            const targetId = link.getAttribute('data-tab');
            tabContents.forEach(tc => {
              tc.style.display = (tc.id === targetId) ? '' : 'none';
            });
          });
        });
      });
    </script>
  </body>
  </html>
@@ -598,6 +729,8 @@ addEventListener('fetch', event => {
  event.respondWith(handleRequest(event.request));
 });
 async function getEndpoint() {
  const endpointUrl = 'https://dev.microsofttranslator.com/apps/endpoint?api-version=1.0';
  const headers = {
@@ -653,6 +786,14 @@ function getSsml(text, voiceName, rate, pitch, style = 'general') {
 }
 function voiceList() {
  // 检查缓存是否有效
  if (voiceListCache && voiceListCacheTime && (Date.now() - voiceListCacheTime) < VOICE_CACHE_DURATION) {
    console.log('使用缓存的语音列表数据，剩余有效期：',
      Math.round((VOICE_CACHE_DURATION - (Date.now() - voiceListCacheTime)) / 60000), '分钟');
    return Promise.resolve(voiceListCache);
  }
  console.log('获取新的语音列表数据');
  const headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.26',
    'X-Ms-Useragent': 'SpeechStudio/2021.05.001',
@@ -663,14 +804,14 @@ function voiceList() {
  return fetch('https://eastus.api.speech.microsoft.com/cognitiveservices/voices/list', {
    headers: headers,
-    cf: {
+  })
-      // Always cache this fetch regardless of content type
+  .then(res => res.json())
-      // for a max of 5 seconds before revalidating the resource
+  .then(data => {
-      cacheTtl: 600,
+    // 更新缓存
-      cacheEverything: true,
+    voiceListCache = data;
-      cacheKey: "mstrans-voice-list",
+    voiceListCacheTime = Date.now();
-    },
+    return data;
-  }).then(res => res.json());
+  });
 }
 async function hmacSha256(key, data) {