mirror of
https://github.com/Cola-Echo/Cola.git
synced 2026-06-06 03:35:50 +00:00
760 lines
23 KiB
JavaScript
760 lines
23 KiB
JavaScript
/**
|
||
* 语音 API 封装
|
||
* TTS (文字转语音) 和 STT (语音转文字)
|
||
*/
|
||
|
||
import { getSettings } from './config.js';
|
||
|
||
/**
|
||
* 获取语音 API 配置
|
||
* @param {Object} contact - 角色对象(可选,用于获取角色独立配置)
|
||
* @returns {Object} 配置对象
|
||
*/
|
||
export function getVoiceApiConfig(contact = null) {
|
||
const settings = getSettings();
|
||
|
||
// 基础配置
|
||
const config = {
|
||
stt: {
|
||
url: settings.sttApiUrl || '',
|
||
key: settings.sttApiKey || '',
|
||
model: settings.sttModel || ''
|
||
},
|
||
tts: {
|
||
url: settings.ttsApiUrl || '',
|
||
key: settings.ttsApiKey || '',
|
||
model: settings.ttsModel || '',
|
||
voice: settings.ttsVoice || '',
|
||
speed: settings.ttsSpeed || 1,
|
||
emotion: settings.ttsEmotion || '默认',
|
||
proxyUrl: settings.ttsProxyUrl || ''
|
||
}
|
||
};
|
||
|
||
// 角色独立 TTS 配置
|
||
if (contact?.useCustomVoice && contact.customTtsVoice) {
|
||
config.tts.voice = contact.customTtsVoice;
|
||
}
|
||
|
||
return config;
|
||
}
|
||
|
||
/**
|
||
* 根据 Blob 类型获取文件名
|
||
*/
|
||
function getAudioFileName(blob) {
|
||
const type = blob.type || 'audio/webm';
|
||
if (type.includes('webm')) return 'audio.webm';
|
||
if (type.includes('ogg')) return 'audio.ogg';
|
||
if (type.includes('mp4')) return 'audio.mp4';
|
||
if (type.includes('mpeg') || type.includes('mp3')) return 'audio.mp3';
|
||
if (type.includes('wav')) return 'audio.wav';
|
||
if (type.includes('flac')) return 'audio.flac';
|
||
return 'audio.webm';
|
||
}
|
||
|
||
/**
|
||
* 将音频 Blob 转换为 WAV 格式(更好的兼容性)
|
||
* 导出供其他模块使用
|
||
*/
|
||
export async function convertToWav(audioBlob) {
|
||
try {
|
||
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
||
const arrayBuffer = await audioBlob.arrayBuffer();
|
||
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
|
||
|
||
// 创建 WAV 文件
|
||
const numChannels = audioBuffer.numberOfChannels;
|
||
const sampleRate = audioBuffer.sampleRate;
|
||
const format = 1; // PCM
|
||
const bitDepth = 16;
|
||
|
||
const bytesPerSample = bitDepth / 8;
|
||
const blockAlign = numChannels * bytesPerSample;
|
||
|
||
const samples = audioBuffer.length;
|
||
const dataSize = samples * blockAlign;
|
||
const buffer = new ArrayBuffer(44 + dataSize);
|
||
const view = new DataView(buffer);
|
||
|
||
// WAV 头部
|
||
const writeString = (offset, str) => {
|
||
for (let i = 0; i < str.length; i++) {
|
||
view.setUint8(offset + i, str.charCodeAt(i));
|
||
}
|
||
};
|
||
|
||
writeString(0, 'RIFF');
|
||
view.setUint32(4, 36 + dataSize, true);
|
||
writeString(8, 'WAVE');
|
||
writeString(12, 'fmt ');
|
||
view.setUint32(16, 16, true);
|
||
view.setUint16(20, format, true);
|
||
view.setUint16(22, numChannels, true);
|
||
view.setUint32(24, sampleRate, true);
|
||
view.setUint32(28, sampleRate * blockAlign, true);
|
||
view.setUint16(32, blockAlign, true);
|
||
view.setUint16(34, bitDepth, true);
|
||
writeString(36, 'data');
|
||
view.setUint32(40, dataSize, true);
|
||
|
||
// 写入音频数据
|
||
const channelData = [];
|
||
for (let i = 0; i < numChannels; i++) {
|
||
channelData.push(audioBuffer.getChannelData(i));
|
||
}
|
||
|
||
let offset = 44;
|
||
for (let i = 0; i < samples; i++) {
|
||
for (let ch = 0; ch < numChannels; ch++) {
|
||
const sample = Math.max(-1, Math.min(1, channelData[ch][i]));
|
||
const intSample = sample < 0 ? sample * 0x8000 : sample * 0x7FFF;
|
||
view.setInt16(offset, intSample, true);
|
||
offset += 2;
|
||
}
|
||
}
|
||
|
||
await audioContext.close();
|
||
return new Blob([buffer], { type: 'audio/wav' });
|
||
} catch (err) {
|
||
console.warn('[可乐] WAV 转换失败,使用原格式:', err);
|
||
return audioBlob;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* STT: 语音转文字
|
||
* @param {Blob} audioBlob - 音频数据
|
||
* @param {Object} options - 选项
|
||
* @returns {Promise<string>} 识别的文字
|
||
*/
|
||
export async function speechToText(audioBlob, options = {}) {
|
||
const config = getVoiceApiConfig();
|
||
|
||
if (!config.stt.url || !config.stt.key) {
|
||
throw new Error('请先配置语音识别 (STT) API');
|
||
}
|
||
|
||
// 自动补全 URL 路径
|
||
let sttUrl = config.stt.url.trim().replace(/\/+$/, '');
|
||
if (!sttUrl.includes('/audio/transcriptions')) {
|
||
sttUrl = sttUrl + '/audio/transcriptions';
|
||
}
|
||
|
||
// 如果不是 WAV 格式,尝试转换以提高兼容性
|
||
let processedBlob = audioBlob;
|
||
if (!audioBlob.type.includes('wav')) {
|
||
console.log('[可乐] 转换音频为 WAV 格式...');
|
||
processedBlob = await convertToWav(audioBlob);
|
||
}
|
||
|
||
// 根据音频类型设置正确的文件名
|
||
const fileName = getAudioFileName(processedBlob);
|
||
|
||
const formData = new FormData();
|
||
formData.append('file', processedBlob, fileName);
|
||
|
||
if (config.stt.model) {
|
||
formData.append('model', config.stt.model);
|
||
}
|
||
|
||
try {
|
||
console.log('[可乐] STT 请求:', {
|
||
url: sttUrl,
|
||
model: config.stt.model,
|
||
originalType: audioBlob.type,
|
||
processedType: processedBlob.type,
|
||
audioSize: processedBlob.size,
|
||
fileName: fileName
|
||
});
|
||
|
||
const response = await fetch(sttUrl, {
|
||
method: 'POST',
|
||
headers: {
|
||
'Authorization': `Bearer ${config.stt.key}`
|
||
},
|
||
body: formData
|
||
});
|
||
|
||
if (!response.ok) {
|
||
const errorText = await response.text();
|
||
console.error('[可乐] STT API 错误:', response.status, errorText);
|
||
// 尝试解析 JSON 错误
|
||
try {
|
||
const errorJson = JSON.parse(errorText);
|
||
const errorMsg = errorJson.error?.message || errorJson.message || errorText;
|
||
throw new Error(errorMsg);
|
||
} catch (parseErr) {
|
||
// 如果不是 JSON 解析错误,而是 throw 的错误,重新抛出
|
||
if (parseErr.message && !parseErr.message.includes('JSON')) {
|
||
throw parseErr;
|
||
}
|
||
throw new Error(`HTTP ${response.status}: ${errorText.substring(0, 200)}`);
|
||
}
|
||
}
|
||
|
||
const result = await response.json();
|
||
console.log('[可乐] STT 响应:', result);
|
||
return result.text || '';
|
||
} catch (err) {
|
||
console.error('[可乐] STT 请求失败:', err);
|
||
throw err;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* TTS: 文字转语音
|
||
* @param {string} text - 要合成的文字
|
||
* @param {Object} contact - 角色对象(用于获取角色独立音色)
|
||
* @param {Object} options - 选项
|
||
* @returns {Promise<Blob>} 音频 Blob
|
||
*/
|
||
export async function textToSpeech(text, contact = null, options = {}) {
|
||
const config = getVoiceApiConfig(contact);
|
||
|
||
if (!config.tts.url || !config.tts.key) {
|
||
throw new Error('请先配置语音合成 (TTS) API');
|
||
}
|
||
|
||
if (!text || !text.trim()) {
|
||
throw new Error('合成文字不能为空');
|
||
}
|
||
|
||
// 自动补全 URL 路径
|
||
let ttsUrl = config.tts.url.trim().replace(/\/+$/, '');
|
||
if (!ttsUrl.includes('/audio/speech')) {
|
||
ttsUrl = ttsUrl + '/audio/speech';
|
||
}
|
||
|
||
// 构建请求体
|
||
const model = (options.model || config.tts.model || '').trim();
|
||
const voice = (options.voice || config.tts.voice || '').trim();
|
||
|
||
// 检查必填字段
|
||
if (!model) {
|
||
throw new Error('请先配置 TTS 模型');
|
||
}
|
||
if (!voice) {
|
||
throw new Error('请先配置 TTS 音色');
|
||
}
|
||
|
||
// 检测是否是 Gemini TTS 模型
|
||
const isGeminiTTS = model.toLowerCase().includes('gemini') && model.toLowerCase().includes('tts');
|
||
// 检测是否是 GSVI 模型 (gsv2p.acgnai.top)
|
||
const isGSVI = model.toLowerCase().includes('gsvi');
|
||
// 检测是否是 MiniMax TTS API
|
||
const isMiniMax = ttsUrl.toLowerCase().includes('minimax') || ttsUrl.includes('/t2a_v2');
|
||
|
||
// MiniMax API 使用完全不同的格式
|
||
if (isMiniMax) {
|
||
// 修正 URL:MiniMax 使用 /v1/t2a_v2 而不是 /audio/speech
|
||
ttsUrl = ttsUrl.replace(/\/audio\/speech$/, '/t2a_v2');
|
||
if (!ttsUrl.includes('/t2a_v2')) {
|
||
ttsUrl = ttsUrl.replace(/\/+$/, '') + '/t2a_v2';
|
||
}
|
||
|
||
// 如果配置了代理 URL,使用代理(解决 CORS 问题)
|
||
if (config.tts.proxyUrl) {
|
||
const proxyBase = config.tts.proxyUrl.trim().replace(/\/+$/, '');
|
||
// 提取 MiniMax URL 的路径部分
|
||
const urlObj = new URL(ttsUrl);
|
||
ttsUrl = proxyBase + urlObj.pathname;
|
||
console.log('[可乐] MiniMax 使用代理:', ttsUrl);
|
||
}
|
||
}
|
||
|
||
// 构建请求体
|
||
let requestBody;
|
||
|
||
if (isMiniMax) {
|
||
// MiniMax API 格式
|
||
const speed = options.speed || config.tts.speed || 1;
|
||
const emotion = options.emotion || config.tts.emotion;
|
||
|
||
requestBody = {
|
||
model: model,
|
||
text: text.trim(),
|
||
stream: false,
|
||
voice_setting: {
|
||
voice_id: voice,
|
||
speed: speed,
|
||
vol: 1,
|
||
pitch: 0
|
||
},
|
||
audio_setting: {
|
||
sample_rate: 32000,
|
||
bitrate: 128000,
|
||
format: 'mp3',
|
||
channel: 1
|
||
}
|
||
};
|
||
|
||
// 添加情绪参数(只有有效值才添加)
|
||
if (emotion && emotion !== '默认') {
|
||
const emotionMap = {
|
||
'高兴': 'happy',
|
||
'悲伤': 'sad',
|
||
'愤怒': 'angry',
|
||
'害怕': 'fearful',
|
||
'厌恶': 'disgusted',
|
||
'惊讶': 'surprised',
|
||
'中性': 'calm',
|
||
'生动': 'fluent',
|
||
'低语': 'whisper'
|
||
};
|
||
// 只有在 emotionMap 中有对应值时才添加
|
||
const mappedEmotion = emotionMap[emotion];
|
||
if (mappedEmotion) {
|
||
requestBody.voice_setting.emotion = mappedEmotion;
|
||
}
|
||
}
|
||
} else {
|
||
requestBody = {
|
||
model: model,
|
||
voice: voice
|
||
};
|
||
|
||
// GSVI 模型只需要基本参数
|
||
if (isGSVI) {
|
||
requestBody.input = text.trim();
|
||
// GSVI API 不需要 language 和 emotion 参数
|
||
} else {
|
||
// OpenAI 标准格式使用 input
|
||
requestBody.input = text.trim();
|
||
|
||
// 非 Gemini TTS 时才添加额外参数
|
||
if (!isGeminiTTS) {
|
||
// 只有非默认语速才添加 speed 参数
|
||
const speed = options.speed || config.tts.speed || 1;
|
||
if (speed !== 1) {
|
||
requestBody.speed = speed;
|
||
}
|
||
|
||
// 扩展参数 (GPT-SoVITS 等支持)
|
||
const emotion = options.emotion || config.tts.emotion;
|
||
if (emotion && emotion !== '默认') {
|
||
requestBody.other_params = {
|
||
text_lang: '中英混合',
|
||
prompt_lang: '中文',
|
||
emotion: emotion
|
||
};
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
try {
|
||
const textContent = requestBody.input || requestBody.text || '';
|
||
console.log('[可乐] TTS 请求:', {
|
||
url: ttsUrl,
|
||
model: model,
|
||
voice: voice,
|
||
isGSVI: isGSVI,
|
||
isGeminiTTS: isGeminiTTS,
|
||
isMiniMax: isMiniMax,
|
||
textLength: textContent.length,
|
||
textFull: textContent // 打印完整文本
|
||
});
|
||
|
||
const response = await fetch(ttsUrl, {
|
||
method: 'POST',
|
||
headers: {
|
||
'Content-Type': 'application/json',
|
||
'Accept': isMiniMax ? 'application/json' : 'audio/mpeg, audio/wav, audio/*',
|
||
'Authorization': `Bearer ${config.tts.key}`
|
||
},
|
||
body: JSON.stringify(requestBody)
|
||
});
|
||
|
||
if (!response.ok) {
|
||
const errorText = await response.text();
|
||
console.error('[可乐] TTS API 错误:');
|
||
console.error(' 状态码:', response.status);
|
||
console.error(' 响应内容:', errorText);
|
||
console.error(' 请求URL:', ttsUrl);
|
||
console.error(' 请求体:', JSON.stringify(requestBody, null, 2));
|
||
|
||
// 尝试解析 JSON 错误
|
||
try {
|
||
const errorJson = JSON.parse(errorText);
|
||
// MiniMax 错误格式: base_resp.status_msg
|
||
const errorMsg = errorJson.base_resp?.status_msg || errorJson.error?.message || errorJson.message || errorJson.error || errorText;
|
||
throw new Error(typeof errorMsg === 'string' ? errorMsg : JSON.stringify(errorMsg));
|
||
} catch (parseErr) {
|
||
if (parseErr.message && !parseErr.message.includes('JSON')) {
|
||
throw parseErr;
|
||
}
|
||
throw new Error(`HTTP ${response.status}: ${errorText.substring(0, 300)}`);
|
||
}
|
||
}
|
||
|
||
// MiniMax API 返回 JSON,需要特殊处理
|
||
if (isMiniMax) {
|
||
const jsonResp = await response.json();
|
||
console.log('[可乐] MiniMax TTS 响应:', {
|
||
status_code: jsonResp.base_resp?.status_code,
|
||
status_msg: jsonResp.base_resp?.status_msg,
|
||
audio_length: jsonResp.extra_info?.audio_length,
|
||
audio_format: jsonResp.extra_info?.audio_format
|
||
});
|
||
|
||
// 检查 MiniMax 错误
|
||
if (jsonResp.base_resp?.status_code !== 0) {
|
||
throw new Error('MiniMax TTS 错误: ' + (jsonResp.base_resp?.status_msg || '未知错误'));
|
||
}
|
||
|
||
if (!jsonResp.data?.audio) {
|
||
throw new Error('MiniMax TTS 未返回音频数据');
|
||
}
|
||
|
||
// 将 hex 编码的音频转换为 Blob
|
||
const hexAudio = jsonResp.data.audio;
|
||
const bytes = new Uint8Array(hexAudio.length / 2);
|
||
for (let i = 0; i < hexAudio.length; i += 2) {
|
||
bytes[i / 2] = parseInt(hexAudio.substr(i, 2), 16);
|
||
}
|
||
|
||
const audioFormat = jsonResp.extra_info?.audio_format || 'mp3';
|
||
const mimeType = `audio/${audioFormat}`;
|
||
return new Blob([bytes], { type: mimeType });
|
||
}
|
||
|
||
const audioBlob = await response.blob();
|
||
console.log('[可乐] TTS 响应:', {
|
||
音频大小: audioBlob.size,
|
||
类型: audioBlob.type,
|
||
响应头ContentType: response.headers.get('content-type')
|
||
});
|
||
|
||
// 先检查是否返回了错误的 JSON(有些 API 错误时返回 JSON)
|
||
const contentType = response.headers.get('content-type') || audioBlob.type;
|
||
if (contentType.includes('application/json') || contentType.includes('text/')) {
|
||
const text = await audioBlob.text();
|
||
console.error('[可乐] TTS 返回了文本而非音频:', text);
|
||
try {
|
||
const errJson = JSON.parse(text);
|
||
const errMsg = errJson.error?.message || errJson.message || errJson.error || JSON.stringify(errJson);
|
||
throw new Error('TTS 错误: ' + errMsg);
|
||
} catch (e) {
|
||
if (e.message.includes('TTS')) throw e;
|
||
throw new Error('TTS 返回了非音频数据: ' + text.substring(0, 100));
|
||
}
|
||
}
|
||
|
||
// 检查是否返回了有效的音频数据
|
||
if (audioBlob.size < 100) {
|
||
console.error('[可乐] TTS 返回的数据太小,可能不是有效音频');
|
||
throw new Error('TTS 返回的音频数据无效');
|
||
}
|
||
|
||
// 修复:如果 blob 类型为空或不是音频类型,手动指定 MIME 类型
|
||
// 某些 TTS API(如 GPT-SoVITS)返回的音频没有正确的 Content-Type
|
||
let finalBlob = audioBlob;
|
||
if (!audioBlob.type || audioBlob.type === '' || !audioBlob.type.startsWith('audio/')) {
|
||
// 尝试从 Content-Type 头获取类型,或使用默认的 audio/wav
|
||
let mimeType = 'audio/wav';
|
||
const headerType = response.headers.get('content-type');
|
||
if (headerType && headerType.startsWith('audio/')) {
|
||
mimeType = headerType.split(';')[0].trim();
|
||
} else if (headerType && headerType.includes('octet-stream')) {
|
||
// application/octet-stream 通常是 wav 格式
|
||
mimeType = 'audio/wav';
|
||
}
|
||
|
||
console.log('[可乐] TTS blob 类型为空,手动指定为:', mimeType);
|
||
const arrayBuffer = await audioBlob.arrayBuffer();
|
||
finalBlob = new Blob([arrayBuffer], { type: mimeType });
|
||
}
|
||
|
||
return finalBlob;
|
||
} catch (err) {
|
||
console.error('[可乐] TTS 请求失败:', err);
|
||
// 检查是否是网络错误
|
||
if (err.message?.includes('Failed to fetch') || err.message?.includes('NetworkError')) {
|
||
throw new Error('网络连接失败,请检查 API 地址是否正确,或尝试使用代理');
|
||
}
|
||
throw err;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 播放音频
|
||
* @param {Blob|string} audio - 音频 Blob 或 URL
|
||
* @returns {Promise<HTMLAudioElement>} Audio 元素
|
||
*/
|
||
export function playAudio(audio) {
|
||
return new Promise((resolve, reject) => {
|
||
const audioEl = new Audio();
|
||
|
||
if (audio instanceof Blob) {
|
||
audioEl.src = URL.createObjectURL(audio);
|
||
} else {
|
||
audioEl.src = audio;
|
||
}
|
||
|
||
audioEl.onended = () => {
|
||
if (audio instanceof Blob) {
|
||
URL.revokeObjectURL(audioEl.src);
|
||
}
|
||
resolve(audioEl);
|
||
};
|
||
|
||
audioEl.onerror = (err) => {
|
||
if (audio instanceof Blob) {
|
||
URL.revokeObjectURL(audioEl.src);
|
||
}
|
||
reject(err);
|
||
};
|
||
|
||
audioEl.play().catch(reject);
|
||
});
|
||
}
|
||
|
||
/**
|
||
* 录音类
|
||
*/
|
||
export class AudioRecorder {
|
||
constructor() {
|
||
this.mediaRecorder = null;
|
||
this.audioChunks = [];
|
||
this.stream = null;
|
||
this.isRecording = false;
|
||
this.mimeType = 'audio/webm';
|
||
}
|
||
|
||
/**
|
||
* 开始录音
|
||
* @returns {Promise<void>}
|
||
*/
|
||
async start() {
|
||
if (this.isRecording) return;
|
||
|
||
try {
|
||
this.stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||
|
||
// 选择最佳支持的音频格式
|
||
this.mimeType = getSupportedMimeType();
|
||
console.log('[可乐] 录音使用格式:', this.mimeType);
|
||
|
||
this.mediaRecorder = new MediaRecorder(this.stream, {
|
||
mimeType: this.mimeType
|
||
});
|
||
this.audioChunks = [];
|
||
|
||
this.mediaRecorder.ondataavailable = (e) => {
|
||
if (e.data.size > 0) {
|
||
this.audioChunks.push(e.data);
|
||
}
|
||
};
|
||
|
||
this.mediaRecorder.start(100); // 每100ms收集一次数据
|
||
this.isRecording = true;
|
||
console.log('[可乐] 开始录音');
|
||
} catch (err) {
|
||
console.error('[可乐] 无法获取麦克风权限:', err);
|
||
throw new Error('无法获取麦克风权限,请检查浏览器设置');
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 停止录音
|
||
* @returns {Promise<Blob>} 录音数据
|
||
*/
|
||
stop() {
|
||
return new Promise((resolve, reject) => {
|
||
if (!this.isRecording || !this.mediaRecorder) {
|
||
reject(new Error('没有正在进行的录音'));
|
||
return;
|
||
}
|
||
|
||
const mimeType = this.mimeType;
|
||
|
||
this.mediaRecorder.onstop = () => {
|
||
const audioBlob = new Blob(this.audioChunks, { type: mimeType });
|
||
this.cleanup();
|
||
console.log('[可乐] 录音结束,格式:', mimeType, '大小:', audioBlob.size);
|
||
resolve(audioBlob);
|
||
};
|
||
|
||
this.mediaRecorder.stop();
|
||
this.isRecording = false;
|
||
});
|
||
}
|
||
|
||
/**
|
||
* 取消录音
|
||
*/
|
||
cancel() {
|
||
if (this.mediaRecorder && this.isRecording) {
|
||
this.mediaRecorder.stop();
|
||
}
|
||
this.cleanup();
|
||
this.isRecording = false;
|
||
}
|
||
|
||
/**
|
||
* 清理资源
|
||
*/
|
||
cleanup() {
|
||
if (this.stream) {
|
||
this.stream.getTracks().forEach(track => track.stop());
|
||
this.stream = null;
|
||
}
|
||
this.mediaRecorder = null;
|
||
this.audioChunks = [];
|
||
}
|
||
|
||
/**
|
||
* 检查浏览器是否支持录音
|
||
* @returns {boolean}
|
||
*/
|
||
static isSupported() {
|
||
const hasGetUserMedia = !!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia);
|
||
const hasMediaRecorder = typeof MediaRecorder !== 'undefined';
|
||
const isSecureContext = window.isSecureContext;
|
||
|
||
console.log('[可乐] 录音支持检测:', {
|
||
getUserMedia: hasGetUserMedia,
|
||
MediaRecorder: hasMediaRecorder,
|
||
isSecureContext: isSecureContext,
|
||
protocol: location.protocol
|
||
});
|
||
|
||
return hasGetUserMedia && hasMediaRecorder;
|
||
}
|
||
|
||
/**
|
||
* 获取不支持录音的原因
|
||
* @returns {string}
|
||
*/
|
||
static getUnsupportedReason() {
|
||
if (!window.isSecureContext) {
|
||
return '需要 HTTPS 安全连接才能使用录音功能';
|
||
}
|
||
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
|
||
return '浏览器不支持 getUserMedia API';
|
||
}
|
||
if (typeof MediaRecorder === 'undefined') {
|
||
return '浏览器不支持 MediaRecorder API(iOS Safari 需要 iOS 14.3+)';
|
||
}
|
||
return '未知原因';
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 获取 MediaRecorder 支持的音频格式
|
||
*/
|
||
function getSupportedMimeType() {
|
||
const types = [
|
||
'audio/webm;codecs=opus',
|
||
'audio/webm',
|
||
'audio/ogg;codecs=opus',
|
||
'audio/ogg',
|
||
'audio/mp4',
|
||
'audio/mpeg'
|
||
];
|
||
|
||
for (const type of types) {
|
||
if (MediaRecorder.isTypeSupported(type)) {
|
||
return type;
|
||
}
|
||
}
|
||
return 'audio/webm';
|
||
}
|
||
|
||
/**
|
||
* 测试 STT API
|
||
* @returns {Promise<boolean>}
|
||
*/
|
||
export async function testSttApi() {
|
||
const config = getVoiceApiConfig();
|
||
|
||
if (!config.stt.url || !config.stt.key) {
|
||
throw new Error('请先填写 STT API 地址和密钥');
|
||
}
|
||
|
||
console.log('[可乐] 开始 STT 测试...');
|
||
console.log('[可乐] STT 配置:', {
|
||
url: config.stt.url,
|
||
model: config.stt.model,
|
||
keyLength: config.stt.key?.length || 0
|
||
});
|
||
|
||
// 创建测试音频 (1.5秒,包含一些变化的音调模拟语音)
|
||
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
||
const oscillator = audioContext.createOscillator();
|
||
const gainNode = audioContext.createGain();
|
||
const destination = audioContext.createMediaStreamDestination();
|
||
|
||
oscillator.connect(gainNode);
|
||
gainNode.connect(destination);
|
||
|
||
// 模拟语音的频率变化
|
||
oscillator.frequency.setValueAtTime(200, audioContext.currentTime);
|
||
oscillator.frequency.linearRampToValueAtTime(400, audioContext.currentTime + 0.5);
|
||
oscillator.frequency.linearRampToValueAtTime(300, audioContext.currentTime + 1);
|
||
oscillator.frequency.linearRampToValueAtTime(350, audioContext.currentTime + 1.5);
|
||
|
||
// 音量包络
|
||
gainNode.gain.setValueAtTime(0.3, audioContext.currentTime);
|
||
gainNode.gain.linearRampToValueAtTime(0.5, audioContext.currentTime + 0.3);
|
||
gainNode.gain.linearRampToValueAtTime(0.3, audioContext.currentTime + 1.2);
|
||
gainNode.gain.linearRampToValueAtTime(0, audioContext.currentTime + 1.5);
|
||
|
||
oscillator.start();
|
||
|
||
const mimeType = getSupportedMimeType();
|
||
console.log('[可乐] 录制音频格式:', mimeType);
|
||
|
||
const recorder = new MediaRecorder(destination.stream, { mimeType });
|
||
const chunks = [];
|
||
|
||
return new Promise((resolve, reject) => {
|
||
recorder.ondataavailable = e => {
|
||
if (e.data.size > 0) {
|
||
chunks.push(e.data);
|
||
}
|
||
};
|
||
|
||
recorder.onstop = async () => {
|
||
oscillator.stop();
|
||
audioContext.close();
|
||
|
||
const blob = new Blob(chunks, { type: mimeType });
|
||
console.log('[可乐] 测试音频大小:', blob.size, 'bytes');
|
||
|
||
if (blob.size < 100) {
|
||
reject(new Error('测试音频生成失败'));
|
||
return;
|
||
}
|
||
|
||
try {
|
||
// speechToText 会自动转换为 WAV 格式
|
||
const result = await speechToText(blob);
|
||
console.log('[可乐] STT 测试结果:', result);
|
||
resolve(true);
|
||
} catch (err) {
|
||
reject(err);
|
||
}
|
||
};
|
||
|
||
recorder.start(100);
|
||
// 录制 1.5 秒
|
||
setTimeout(() => recorder.stop(), 1500);
|
||
});
|
||
}
|
||
|
||
/**
|
||
* 测试 TTS API
|
||
* @returns {Promise<Blob>}
|
||
*/
|
||
export async function testTtsApi() {
|
||
const config = getVoiceApiConfig();
|
||
|
||
if (!config.tts.url || !config.tts.key) {
|
||
throw new Error('请先填写 TTS API 地址和密钥');
|
||
}
|
||
|
||
return await textToSpeech('测试语音合成');
|
||
}
|