migrate
This commit is contained in:
1077
public/scripts/extensions/tts/alltalk.js
Normal file
1077
public/scripts/extensions/tts/alltalk.js
Normal file
File diff suppressed because it is too large
Load Diff
220
public/scripts/extensions/tts/azure.js
Normal file
220
public/scripts/extensions/tts/azure.js
Normal file
@@ -0,0 +1,220 @@
|
||||
import { getRequestHeaders } from '../../../script.js';
|
||||
import { POPUP_RESULT, POPUP_TYPE, callGenericPopup } from '../../popup.js';
|
||||
import { SECRET_KEYS, findSecret, secret_state, writeSecret } from '../../secrets.js';
|
||||
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
||||
export { AzureTtsProvider };
|
||||
|
||||
class AzureTtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
voices = [];
|
||||
separator = ' . ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
defaultSettings = {
|
||||
region: '',
|
||||
voiceMap: {},
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<div class="azure_tts_settings">
|
||||
<div class="flex-container alignItemsBaseline">
|
||||
<h4 for="azure_tts_key" class="flex1 margin0">
|
||||
<a href="https://portal.azure.com/" target="_blank">Azure TTS Key</a>
|
||||
</h4>
|
||||
<div id="azure_tts_key" class="menu_button menu_button_icon">
|
||||
<i class="fa-solid fa-key"></i>
|
||||
<span>Click to set</span>
|
||||
</div>
|
||||
</div>
|
||||
<label for="azure_tts_region">Region:</label>
|
||||
<input id="azure_tts_region" type="text" class="text_pole" placeholder="e.g. westus" />
|
||||
<hr>
|
||||
</div>
|
||||
`;
|
||||
return html;
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Update dynamically
|
||||
this.settings.region = String($('#azure_tts_region').val());
|
||||
// Reset voices
|
||||
this.voices = [];
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Populate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
$('#azure_tts_region').val(this.settings.region).on('input', () => this.onSettingsChange());
|
||||
$('#azure_tts_key').toggleClass('success', secret_state[SECRET_KEYS.AZURE_TTS]);
|
||||
$('#azure_tts_key').on('click', async () => {
|
||||
const popupText = 'Azure TTS API Key';
|
||||
const savedKey = secret_state[SECRET_KEYS.AZURE_TTS] ? await findSecret(SECRET_KEYS.AZURE_TTS) : '';
|
||||
|
||||
const key = await callGenericPopup(popupText, POPUP_TYPE.INPUT, savedKey, {
|
||||
customButtons: [{
|
||||
text: 'Remove Key',
|
||||
appendAtEnd: true,
|
||||
result: POPUP_RESULT.NEGATIVE,
|
||||
action: async () => {
|
||||
await writeSecret(SECRET_KEYS.AZURE_TTS, '');
|
||||
$('#azure_tts_key').toggleClass('success', !!secret_state[SECRET_KEYS.AZURE_TTS]);
|
||||
toastr.success('API Key removed');
|
||||
await this.onRefreshClick();
|
||||
},
|
||||
}],
|
||||
});
|
||||
|
||||
if (!key) {
|
||||
return;
|
||||
}
|
||||
|
||||
await writeSecret(SECRET_KEYS.AZURE_TTS, String(key));
|
||||
|
||||
toastr.success('API Key saved');
|
||||
$('#azure_tts_key').addClass('success');
|
||||
await this.onRefreshClick();
|
||||
});
|
||||
|
||||
try {
|
||||
await this.checkReady();
|
||||
console.debug('Azure: Settings loaded');
|
||||
} catch {
|
||||
console.debug('Azure: Settings loaded, but not ready');
|
||||
}
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady() {
|
||||
if (secret_state[SECRET_KEYS.AZURE_TTS]) {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
} else {
|
||||
this.voices = [];
|
||||
}
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
await this.checkReady();
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
voice => voice.name == voiceName,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
if (!secret_state[SECRET_KEYS.AZURE_TTS]) {
|
||||
console.warn('Azure TTS API Key not set');
|
||||
return [];
|
||||
}
|
||||
|
||||
if (!this.settings.region) {
|
||||
console.warn('Azure TTS region not set');
|
||||
return [];
|
||||
}
|
||||
|
||||
const response = await fetch('/api/azure/list', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
region: this.settings.region,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
let responseJson = await response.json();
|
||||
responseJson = responseJson
|
||||
.sort((a, b) => a.Locale.localeCompare(b.Locale) || a.ShortName.localeCompare(b.ShortName))
|
||||
.map(x => ({ name: x.ShortName, voice_id: x.ShortName, preview_url: false, lang: x.Locale }));
|
||||
return responseJson;
|
||||
}
|
||||
|
||||
/**
|
||||
* Preview TTS for a given voice ID.
|
||||
* @param {string} id Voice ID
|
||||
*/
|
||||
async previewTtsVoice(id) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.currentTime = 0;
|
||||
const voice = await this.getVoice(id);
|
||||
const text = getPreviewString(voice.lang);
|
||||
const response = await this.fetchTtsGeneration(text, id);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
this.audioElement.src = url;
|
||||
this.audioElement.play();
|
||||
this.audioElement.onended = () => URL.revokeObjectURL(url);
|
||||
}
|
||||
|
||||
async fetchTtsGeneration(text, voiceId) {
|
||||
if (!secret_state[SECRET_KEYS.AZURE_TTS]) {
|
||||
throw new Error('Azure TTS API Key not set');
|
||||
}
|
||||
|
||||
if (!this.settings.region) {
|
||||
throw new Error('Azure TTS region not set');
|
||||
}
|
||||
|
||||
const response = await fetch('/api/azure/generate', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
text: text,
|
||||
voice: voiceId,
|
||||
region: this.settings.region,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
}
|
771
public/scripts/extensions/tts/coqui.js
Normal file
771
public/scripts/extensions/tts/coqui.js
Normal file
@@ -0,0 +1,771 @@
|
||||
/*
|
||||
TODO:
|
||||
- Hide voice map its just confusing
|
||||
- Delete useless call
|
||||
*/
|
||||
|
||||
import { doExtrasFetch, extension_settings, getApiUrl, modules } from '../../extensions.js';
|
||||
import { initVoiceMap } from './index.js';
|
||||
import { POPUP_TYPE, callGenericPopup } from '../../popup.js';
|
||||
|
||||
export { CoquiTtsProvider };
|
||||
|
||||
const DEBUG_PREFIX = '<Coqui TTS module> ';
|
||||
|
||||
let inApiCall = false;
|
||||
let coquiApiModels = {}; // Initialized only once
|
||||
let coquiApiModelsFull = {}; // Initialized only once
|
||||
let coquiLocalModels = []; // Initialized only once
|
||||
let coquiLocalModelsReceived = false;
|
||||
/*
|
||||
coquiApiModels format [language][dataset][name]:coqui-api-model-id, example:
|
||||
{
|
||||
"en": {
|
||||
"vctk": {
|
||||
"vits": "tts_models/en/vctk/vits"
|
||||
}
|
||||
},
|
||||
"ja": {
|
||||
"kokoro": {
|
||||
"tacotron2-DDC": "tts_models/ja/kokoro/tacotron2-DDC"
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
const languageLabels = {
|
||||
'multilingual': 'Multilingual',
|
||||
'en': 'English',
|
||||
'fr': 'French',
|
||||
'es': 'Spanish',
|
||||
'ja': 'Japanese',
|
||||
};
|
||||
|
||||
function throwIfModuleMissing() {
|
||||
if (!modules.includes('coqui-tts')) {
|
||||
const message = 'Coqui TTS module not loaded. Add coqui-tts to enable-modules and restart the Extras API.';
|
||||
// toastr.error(message, { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
throw new Error(DEBUG_PREFIX, message);
|
||||
}
|
||||
}
|
||||
|
||||
function resetModelSettings() {
|
||||
$('#coqui_api_model_settings_language').val('none');
|
||||
$('#coqui_api_model_settings_speaker').val('none');
|
||||
}
|
||||
|
||||
class CoquiTtsProvider {
|
||||
//#############################//
|
||||
// Extension UI and Settings //
|
||||
//#############################//
|
||||
|
||||
settings;
|
||||
|
||||
defaultSettings = {
|
||||
voiceMap: {},
|
||||
customVoices: {},
|
||||
voiceIds: [],
|
||||
voiceMapDict: {},
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<div class="flex wide100p flexGap10 alignitemscenter">
|
||||
<div>
|
||||
<div style="flex: 50%;">
|
||||
<small>To use CoquiTTS, select the origin, language, and model, then click Add Voice. The voice will then be available to add to a character. Voices are saved globally. </small><br>
|
||||
<label for="coqui_voicename_select">Select Saved Voice:</label>
|
||||
<select id="coqui_voicename_select">
|
||||
<!-- Populated by JS -->
|
||||
</select>
|
||||
<div class="tts_block">
|
||||
<input id="coqui_remove_voiceId_mapping" class="menu_button" type="button" value="Remove Voice" />
|
||||
<input id="coqui_add_voiceId_mapping" class="menu_button" type="button" value="Add Voice" />
|
||||
</div>
|
||||
<label for="coqui_model_origin">Models:</label>
|
||||
<select id="coqui_model_origin">gpu_mode
|
||||
<option value="none">Select Origin</option>
|
||||
<option value="coqui-api">Coqui API (Tested)</option>
|
||||
<option value="coqui-api-full">Coqui API (Experimental)</option>
|
||||
<option value="local">My Models</option>
|
||||
</select>
|
||||
|
||||
<div id="coqui_api_model_div">
|
||||
<select id="coqui_api_language">
|
||||
<!-- Populated by JS and request -->
|
||||
</select>
|
||||
|
||||
<select id="coqui_api_model_name">
|
||||
<!-- Populated by JS and request -->
|
||||
</select>
|
||||
|
||||
<div id="coqui_api_model_settings">
|
||||
<select id="coqui_api_model_settings_language">
|
||||
<!-- Populated by JS and request -->
|
||||
</select>
|
||||
<select id="coqui_api_model_settings_speaker">
|
||||
<!-- Populated by JS and request -->
|
||||
</select>
|
||||
</div>
|
||||
<span id="coqui_api_model_install_status">Model installed on extras server</span>
|
||||
<input id="coqui_api_model_install_button" class="menu_button" type="button" value="Install" />
|
||||
</div>
|
||||
|
||||
<div id="coqui_local_model_div">
|
||||
<select id="coqui_local_model_name">
|
||||
<!-- Populated by JS and request -->
|
||||
</select>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
return html;
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw DEBUG_PREFIX + `Invalid setting passed to extension: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
await initLocalModels();
|
||||
this.updateCustomVoices(); // Overide any manual modification
|
||||
|
||||
$('#coqui_api_model_div').hide();
|
||||
$('#coqui_local_model_div').hide();
|
||||
|
||||
$('#coqui_api_language').show();
|
||||
$('#coqui_api_model_name').hide();
|
||||
$('#coqui_api_model_settings').hide();
|
||||
$('#coqui_api_model_install_status').hide();
|
||||
$('#coqui_api_model_install_button').hide();
|
||||
|
||||
let that = this;
|
||||
$('#coqui_model_origin').on('change', function () { that.onModelOriginChange(); });
|
||||
$('#coqui_api_language').on('change', function () { that.onModelLanguageChange(); });
|
||||
$('#coqui_api_model_name').on('change', function () { that.onModelNameChange(); });
|
||||
|
||||
$('#coqui_remove_voiceId_mapping').on('click', function () { that.onRemoveClick(); });
|
||||
$('#coqui_add_voiceId_mapping').on('click', function () { that.onAddClick(); });
|
||||
|
||||
// Load coqui-api settings from json file
|
||||
await fetch('/scripts/extensions/tts/coqui_api_models_settings.json')
|
||||
.then(response => response.json())
|
||||
.then(json => {
|
||||
coquiApiModels = json;
|
||||
console.debug(DEBUG_PREFIX,'initialized coqui-api model list to', coquiApiModels);
|
||||
/*
|
||||
$('#coqui_api_language')
|
||||
.find('option')
|
||||
.remove()
|
||||
.end()
|
||||
.append('<option value="none">Select model language</option>')
|
||||
.val('none');
|
||||
|
||||
for(let language in coquiApiModels) {
|
||||
$("#coqui_api_language").append(new Option(languageLabels[language],language));
|
||||
console.log(DEBUG_PREFIX,"added language",language);
|
||||
}*/
|
||||
});
|
||||
|
||||
// Load coqui-api FULL settings from json file
|
||||
await fetch('/scripts/extensions/tts/coqui_api_models_settings_full.json')
|
||||
.then(response => response.json())
|
||||
.then(json => {
|
||||
coquiApiModelsFull = json;
|
||||
console.debug(DEBUG_PREFIX,'initialized coqui-api full model list to', coquiApiModelsFull);
|
||||
/*
|
||||
$('#coqui_api_full_language')
|
||||
.find('option')
|
||||
.remove()
|
||||
.end()
|
||||
.append('<option value="none">Select model language</option>')
|
||||
.val('none');
|
||||
|
||||
for(let language in coquiApiModelsFull) {
|
||||
$("#coqui_api_full_language").append(new Option(languageLabels[language],language));
|
||||
console.log(DEBUG_PREFIX,"added language",language);
|
||||
}*/
|
||||
});
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady(){
|
||||
throwIfModuleMissing();
|
||||
await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
updateCustomVoices() {
|
||||
// Takes voiceMapDict and converts it to a string to save to voiceMap
|
||||
this.settings.customVoices = {};
|
||||
for (let voiceName in this.settings.voiceMapDict) {
|
||||
const voiceId = this.settings.voiceMapDict[voiceName];
|
||||
this.settings.customVoices[voiceName] = voiceId['model_id'];
|
||||
|
||||
if (voiceId['model_language'] != null)
|
||||
this.settings.customVoices[voiceName] += '[' + voiceId['model_language'] + ']';
|
||||
|
||||
if (voiceId['model_speaker'] != null)
|
||||
this.settings.customVoices[voiceName] += '[' + voiceId['model_speaker'] + ']';
|
||||
}
|
||||
|
||||
// Update UI select list with voices
|
||||
$('#coqui_voicename_select').empty();
|
||||
$('#coqui_voicename_select')
|
||||
.find('option')
|
||||
.remove()
|
||||
.end()
|
||||
.append('<option value="none">Select Voice</option>')
|
||||
.val('none');
|
||||
for (const voiceName in this.settings.voiceMapDict) {
|
||||
$('#coqui_voicename_select').append(new Option(voiceName, voiceName));
|
||||
}
|
||||
|
||||
this.onSettingsChange();
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
console.debug(DEBUG_PREFIX, 'Settings changes', this.settings);
|
||||
extension_settings.tts.Coqui = this.settings;
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
this.checkReady();
|
||||
}
|
||||
|
||||
async onAddClick() {
|
||||
if (inApiCall) {
|
||||
return; //TODO: block dropdown
|
||||
}
|
||||
|
||||
// Ask user for voiceId name to save voice
|
||||
const voiceName = await callGenericPopup('Name of Coqui voice to add to voice select dropdown:', POPUP_TYPE.INPUT);
|
||||
|
||||
const model_origin = $('#coqui_model_origin').val();
|
||||
const model_language = $('#coqui_api_language').val();
|
||||
const model_name = $('#coqui_api_model_name').val();
|
||||
let model_setting_language = $('#coqui_api_model_settings_language').val();
|
||||
let model_setting_speaker = $('#coqui_api_model_settings_speaker').val();
|
||||
|
||||
|
||||
if (!voiceName) {
|
||||
toastr.error('Voice name empty, please enter one.', DEBUG_PREFIX + ' voice mapping voice name', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
this.updateCustomVoices(); // Overide any manual modification
|
||||
return;
|
||||
}
|
||||
|
||||
if (model_origin == 'none') {
|
||||
toastr.error('Origin not selected, please select one.', DEBUG_PREFIX + ' voice mapping origin', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
this.updateCustomVoices(); // Overide any manual modification
|
||||
return;
|
||||
}
|
||||
|
||||
if (model_origin == 'local') {
|
||||
const model_id = $('#coqui_local_model_name').val();
|
||||
|
||||
if (model_name == 'none') {
|
||||
toastr.error('Model not selected, please select one.', DEBUG_PREFIX + ' voice mapping model', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
this.updateCustomVoices(); // Overide any manual modification
|
||||
return;
|
||||
}
|
||||
|
||||
this.settings.voiceMapDict[voiceName] = { model_type: 'local', model_id: 'local/' + model_id };
|
||||
console.debug(DEBUG_PREFIX, 'Registered new voice map: ', voiceName, ':', this.settings.voiceMapDict[voiceName]);
|
||||
this.updateCustomVoices(); // Overide any manual modification
|
||||
return;
|
||||
}
|
||||
|
||||
if (model_language == 'none') {
|
||||
toastr.error('Language not selected, please select one.', DEBUG_PREFIX + ' voice mapping language', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
this.updateCustomVoices(); // Overide any manual modification
|
||||
return;
|
||||
}
|
||||
|
||||
if (model_name == 'none') {
|
||||
toastr.error('Model not selected, please select one.', DEBUG_PREFIX + ' voice mapping model', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
this.updateCustomVoices(); // Overide any manual modification
|
||||
return;
|
||||
}
|
||||
|
||||
if (model_setting_language == 'none')
|
||||
model_setting_language = null;
|
||||
|
||||
if (model_setting_speaker == 'none')
|
||||
model_setting_speaker = null;
|
||||
|
||||
const tokens = $('#coqui_api_model_name').val().split('/');
|
||||
const model_dataset = tokens[0];
|
||||
const model_label = tokens[1];
|
||||
const model_id = 'tts_models/' + model_language + '/' + model_dataset + '/' + model_label;
|
||||
|
||||
let modelDict = coquiApiModels;
|
||||
if (model_origin == 'coqui-api-full')
|
||||
modelDict = coquiApiModelsFull;
|
||||
|
||||
if (model_setting_language == null & 'languages' in modelDict[model_language][model_dataset][model_label]) {
|
||||
toastr.error('Model language not selected, please select one.', DEBUG_PREFIX + ' voice mapping model language', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
return;
|
||||
}
|
||||
|
||||
if (model_setting_speaker == null & 'speakers' in modelDict[model_language][model_dataset][model_label]) {
|
||||
toastr.error('Model speaker not selected, please select one.', DEBUG_PREFIX + ' voice mapping model speaker', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
return;
|
||||
}
|
||||
|
||||
console.debug(DEBUG_PREFIX, 'Current custom voices: ', this.settings.customVoices);
|
||||
|
||||
this.settings.voiceMapDict[voiceName] = { model_type: 'coqui-api', model_id: model_id, model_language: model_setting_language, model_speaker: model_setting_speaker };
|
||||
|
||||
console.debug(DEBUG_PREFIX, 'Registered new voice map: ', voiceName, ':', this.settings.voiceMapDict[voiceName]);
|
||||
|
||||
this.updateCustomVoices();
|
||||
initVoiceMap(); // Update TTS extension voiceMap
|
||||
|
||||
let successMsg = voiceName + ':' + model_id;
|
||||
if (model_setting_language != null)
|
||||
successMsg += '[' + model_setting_language + ']';
|
||||
if (model_setting_speaker != null)
|
||||
successMsg += '[' + model_setting_speaker + ']';
|
||||
toastr.info(successMsg, DEBUG_PREFIX + ' voice map updated', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
async getVoice(voiceName) {
|
||||
let match = await this.fetchTtsVoiceObjects();
|
||||
match = match.filter(
|
||||
voice => voice.name == voiceName,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found in CoquiTTS Provider voice list`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async onRemoveClick() {
|
||||
const voiceName = $('#coqui_voicename_select').val();
|
||||
|
||||
if (voiceName === 'none') {
|
||||
toastr.error('Voice not selected, please select one.', DEBUG_PREFIX + ' voice mapping voiceId', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
return;
|
||||
}
|
||||
|
||||
// Todo erase from voicemap
|
||||
delete (this.settings.voiceMapDict[voiceName]);
|
||||
this.updateCustomVoices();
|
||||
initVoiceMap(); // Update TTS extension voiceMap
|
||||
}
|
||||
|
||||
async onModelOriginChange() {
|
||||
throwIfModuleMissing();
|
||||
resetModelSettings();
|
||||
const model_origin = $('#coqui_model_origin').val();
|
||||
|
||||
if (model_origin == 'none') {
|
||||
$('#coqui_local_model_div').hide();
|
||||
$('#coqui_api_model_div').hide();
|
||||
}
|
||||
|
||||
// show coqui model selected list (SAFE)
|
||||
if (model_origin == 'coqui-api') {
|
||||
$('#coqui_local_model_div').hide();
|
||||
|
||||
$('#coqui_api_language')
|
||||
.find('option')
|
||||
.remove()
|
||||
.end()
|
||||
.append('<option value="none">Select model language</option>')
|
||||
.val('none');
|
||||
|
||||
for(let language in coquiApiModels) {
|
||||
let languageLabel = language;
|
||||
if (language in languageLabels)
|
||||
languageLabel = languageLabels[language];
|
||||
$('#coqui_api_language').append(new Option(languageLabel,language));
|
||||
console.log(DEBUG_PREFIX,'added language',languageLabel,'(',language,')');
|
||||
}
|
||||
|
||||
$('#coqui_api_model_div').show();
|
||||
}
|
||||
|
||||
// show coqui model full list (UNSAFE)
|
||||
if (model_origin == 'coqui-api-full') {
|
||||
$('#coqui_local_model_div').hide();
|
||||
|
||||
$('#coqui_api_language')
|
||||
.find('option')
|
||||
.remove()
|
||||
.end()
|
||||
.append('<option value="none">Select model language</option>')
|
||||
.val('none');
|
||||
|
||||
for(let language in coquiApiModelsFull) {
|
||||
let languageLabel = language;
|
||||
if (language in languageLabels)
|
||||
languageLabel = languageLabels[language];
|
||||
$('#coqui_api_language').append(new Option(languageLabel,language));
|
||||
console.log(DEBUG_PREFIX,'added language',languageLabel,'(',language,')');
|
||||
}
|
||||
|
||||
$('#coqui_api_model_div').show();
|
||||
}
|
||||
|
||||
|
||||
// show local model list
|
||||
if (model_origin == 'local') {
|
||||
$('#coqui_api_model_div').hide();
|
||||
$('#coqui_local_model_div').show();
|
||||
}
|
||||
}
|
||||
|
||||
async onModelLanguageChange() {
|
||||
throwIfModuleMissing();
|
||||
resetModelSettings();
|
||||
$('#coqui_api_model_settings').hide();
|
||||
const model_origin = $('#coqui_model_origin').val();
|
||||
const model_language = $('#coqui_api_language').val();
|
||||
console.debug(model_language);
|
||||
|
||||
if (model_language == 'none') {
|
||||
$('#coqui_api_model_name').hide();
|
||||
return;
|
||||
}
|
||||
|
||||
$('#coqui_api_model_name').show();
|
||||
$('#coqui_api_model_name')
|
||||
.find('option')
|
||||
.remove()
|
||||
.end()
|
||||
.append('<option value="none">Select model</option>')
|
||||
.val('none');
|
||||
|
||||
let modelDict = coquiApiModels;
|
||||
if (model_origin == 'coqui-api-full')
|
||||
modelDict = coquiApiModelsFull;
|
||||
|
||||
for(let model_dataset in modelDict[model_language])
|
||||
for(let model_name in modelDict[model_language][model_dataset]) {
|
||||
const model_id = model_dataset + '/' + model_name;
|
||||
const model_label = model_name + ' (' + model_dataset + ' dataset)';
|
||||
$('#coqui_api_model_name').append(new Option(model_label, model_id));
|
||||
}
|
||||
}
|
||||
|
||||
async onModelNameChange() {
|
||||
throwIfModuleMissing();
|
||||
resetModelSettings();
|
||||
$('#coqui_api_model_settings').hide();
|
||||
const model_origin = $('#coqui_model_origin').val();
|
||||
|
||||
// No model selected
|
||||
if ($('#coqui_api_model_name').val() == 'none') {
|
||||
$('#coqui_api_model_install_button').off('click');
|
||||
$('#coqui_api_model_install_button').hide();
|
||||
return;
|
||||
}
|
||||
|
||||
// Get languages and speakers options
|
||||
const model_language = $('#coqui_api_language').val();
|
||||
const tokens = $('#coqui_api_model_name').val().split('/');
|
||||
const model_dataset = tokens[0];
|
||||
const model_name = tokens[1];
|
||||
|
||||
let modelDict = coquiApiModels;
|
||||
if (model_origin == 'coqui-api-full')
|
||||
modelDict = coquiApiModelsFull;
|
||||
|
||||
const model_settings = modelDict[model_language][model_dataset][model_name];
|
||||
|
||||
if ('languages' in model_settings) {
|
||||
$('#coqui_api_model_settings').show();
|
||||
$('#coqui_api_model_settings_language').show();
|
||||
$('#coqui_api_model_settings_language')
|
||||
.find('option')
|
||||
.remove()
|
||||
.end()
|
||||
.append('<option value="none">Select language</option>')
|
||||
.val('none');
|
||||
|
||||
for (let i = 0; i < model_settings['languages'].length; i++) {
|
||||
const language_label = JSON.stringify(model_settings['languages'][i]).replaceAll('"', '');
|
||||
$('#coqui_api_model_settings_language').append(new Option(language_label, i));
|
||||
}
|
||||
}
|
||||
else {
|
||||
$('#coqui_api_model_settings_language').hide();
|
||||
}
|
||||
|
||||
if ('speakers' in model_settings) {
|
||||
$('#coqui_api_model_settings').show();
|
||||
$('#coqui_api_model_settings_speaker').show();
|
||||
$('#coqui_api_model_settings_speaker')
|
||||
.find('option')
|
||||
.remove()
|
||||
.end()
|
||||
.append('<option value="none">Select speaker</option>')
|
||||
.val('none');
|
||||
|
||||
for (let i = 0; i < model_settings['speakers'].length; i++) {
|
||||
const speaker_label = JSON.stringify(model_settings['speakers'][i]).replaceAll('"', '');
|
||||
$('#coqui_api_model_settings_speaker').append(new Option(speaker_label, i));
|
||||
}
|
||||
}
|
||||
else {
|
||||
$('#coqui_api_model_settings_speaker').hide();
|
||||
}
|
||||
|
||||
$('#coqui_api_model_install_status').text('Requesting model to extras server...');
|
||||
$('#coqui_api_model_install_status').show();
|
||||
|
||||
// Check if already installed and propose to do it otherwise
|
||||
const model_id = modelDict[model_language][model_dataset][model_name]['id'];
|
||||
console.debug(DEBUG_PREFIX,'Check if model is already installed',model_id);
|
||||
let result = await CoquiTtsProvider.checkmodel_state(model_id);
|
||||
result = await result.json();
|
||||
const model_state = result['model_state'];
|
||||
|
||||
console.debug(DEBUG_PREFIX, ' Model state:', model_state);
|
||||
|
||||
if (model_state == 'installed') {
|
||||
$('#coqui_api_model_install_status').text('Model already installed on extras server');
|
||||
$('#coqui_api_model_install_button').hide();
|
||||
}
|
||||
else {
|
||||
let action = 'download';
|
||||
if (model_state == 'corrupted') {
|
||||
action = 'repare';
|
||||
//toastr.error("Click install button to reinstall the model "+$("#coqui_api_model_name").find(":selected").text(), DEBUG_PREFIX+" corrupted model install", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
$('#coqui_api_model_install_status').text('Model found but incomplete try install again (maybe still downloading)'); // (remove and download again)
|
||||
}
|
||||
else {
|
||||
toastr.info('Click download button to install the model ' + $('#coqui_api_model_name').find(':selected').text(), DEBUG_PREFIX + ' model not installed', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
$('#coqui_api_model_install_status').text('Model not found on extras server');
|
||||
}
|
||||
|
||||
const onModelNameChange_pointer = this.onModelNameChange;
|
||||
|
||||
$('#coqui_api_model_install_button').off('click').on('click', async function () {
|
||||
try {
|
||||
$('#coqui_api_model_install_status').text('Downloading model...');
|
||||
$('#coqui_api_model_install_button').hide();
|
||||
//toastr.info("For model "+model_id, DEBUG_PREFIX+" Started "+action, { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
let apiResult = await CoquiTtsProvider.installModel(model_id, action);
|
||||
apiResult = await apiResult.json();
|
||||
|
||||
console.debug(DEBUG_PREFIX, 'Response:', apiResult);
|
||||
|
||||
if (apiResult['status'] == 'done') {
|
||||
$('#coqui_api_model_install_status').text('Model installed and ready to use!');
|
||||
$('#coqui_api_model_install_button').hide();
|
||||
onModelNameChange_pointer();
|
||||
}
|
||||
|
||||
if (apiResult['status'] == 'downloading') {
|
||||
toastr.error('Check extras console for progress', DEBUG_PREFIX + ' already downloading', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
$('#coqui_api_model_install_status').text('Already downloading a model, check extras console!');
|
||||
$('#coqui_api_model_install_button').show();
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
toastr.error(error, DEBUG_PREFIX + ' error with model download', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
onModelNameChange_pointer();
|
||||
}
|
||||
// will refresh model status
|
||||
});
|
||||
|
||||
$('#coqui_api_model_install_button').show();
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
//#############################//
|
||||
// API Calls //
|
||||
//#############################//
|
||||
|
||||
/*
|
||||
Check model installation state, return one of ["installed", "corrupted", "absent"]
|
||||
*/
|
||||
static async checkmodel_state(model_id) {
|
||||
throwIfModuleMissing();
|
||||
const url = new URL(getApiUrl());
|
||||
url.pathname = '/api/text-to-speech/coqui/coqui-api/check-model-state';
|
||||
|
||||
const apiResult = await doExtrasFetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Cache-Control': 'no-cache',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
'model_id': model_id,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!apiResult.ok) {
|
||||
toastr.error(apiResult.statusText, DEBUG_PREFIX + ' Check model state request failed');
|
||||
throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
|
||||
}
|
||||
|
||||
return apiResult;
|
||||
}
|
||||
|
||||
static async installModel(model_id, action) {
|
||||
throwIfModuleMissing();
|
||||
const url = new URL(getApiUrl());
|
||||
url.pathname = '/api/text-to-speech/coqui/coqui-api/install-model';
|
||||
|
||||
const apiResult = await doExtrasFetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Cache-Control': 'no-cache',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
'model_id': model_id,
|
||||
'action': action,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!apiResult.ok) {
|
||||
toastr.error(apiResult.statusText, DEBUG_PREFIX + ' Install model ' + model_id + ' request failed');
|
||||
throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
|
||||
}
|
||||
|
||||
return apiResult;
|
||||
}
|
||||
|
||||
/*
|
||||
Retrieve user custom models
|
||||
*/
|
||||
static async getLocalModelList() {
|
||||
throwIfModuleMissing();
|
||||
const url = new URL(getApiUrl());
|
||||
url.pathname = '/api/text-to-speech/coqui/local/get-models';
|
||||
|
||||
const apiResult = await doExtrasFetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Cache-Control': 'no-cache',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
'model_id': 'model_id',
|
||||
'action': 'action',
|
||||
}),
|
||||
});
|
||||
|
||||
if (!apiResult.ok) {
|
||||
toastr.error(apiResult.statusText, DEBUG_PREFIX + ' Get local model list request failed');
|
||||
throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
|
||||
}
|
||||
|
||||
return apiResult;
|
||||
}
|
||||
|
||||
|
||||
// Expect voiceId format to be like:
|
||||
// tts_models/multilingual/multi-dataset/your_tts[2][1]
|
||||
// tts_models/en/ljspeech/glow-tts
|
||||
// ts_models/ja/kokoro/tacotron2-DDC
|
||||
async generateTts(text, voiceId) {
|
||||
throwIfModuleMissing();
|
||||
voiceId = this.settings.customVoices[voiceId];
|
||||
|
||||
const url = new URL(getApiUrl());
|
||||
url.pathname = '/api/text-to-speech/coqui/generate-tts';
|
||||
|
||||
let language = 'none';
|
||||
let speaker = 'none';
|
||||
const tokens = voiceId.replaceAll(']', '').replaceAll('"', '').split('[');
|
||||
const model_id = tokens[0];
|
||||
|
||||
console.debug(DEBUG_PREFIX, 'Preparing TTS request for', tokens);
|
||||
|
||||
// First option
|
||||
if (tokens.length > 1) {
|
||||
const option1 = tokens[1];
|
||||
|
||||
if (model_id.includes('multilingual'))
|
||||
language = option1;
|
||||
else
|
||||
speaker = option1;
|
||||
}
|
||||
|
||||
// Second option
|
||||
if (tokens.length > 2)
|
||||
speaker = tokens[2];
|
||||
|
||||
const apiResult = await doExtrasFetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Cache-Control': 'no-cache',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
'text': text,
|
||||
'model_id': model_id,
|
||||
'language_id': parseInt(language),
|
||||
'speaker_id': parseInt(speaker),
|
||||
}),
|
||||
});
|
||||
|
||||
if (!apiResult.ok) {
|
||||
toastr.error(apiResult.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
|
||||
}
|
||||
|
||||
return apiResult;
|
||||
}
|
||||
|
||||
// Dirty hack to say not implemented
|
||||
async fetchTtsVoiceObjects() {
|
||||
const voiceIds = Object
|
||||
.keys(this.settings.voiceMapDict)
|
||||
.map(voice => ({ name: voice, voice_id: voice, preview_url: false }));
|
||||
return voiceIds;
|
||||
}
|
||||
|
||||
// Do nothing
|
||||
previewTtsVoice(id) {
|
||||
return;
|
||||
}
|
||||
|
||||
async fetchTtsFromHistory(history_item_id) {
|
||||
return Promise.resolve(history_item_id);
|
||||
}
|
||||
}
|
||||
|
||||
async function initLocalModels() {
|
||||
if (!modules.includes('coqui-tts'))
|
||||
return;
|
||||
|
||||
// Initialized local model once
|
||||
if (!coquiLocalModelsReceived) {
|
||||
let result = await CoquiTtsProvider.getLocalModelList();
|
||||
result = await result.json();
|
||||
|
||||
coquiLocalModels = result['models_list'];
|
||||
|
||||
$('#coqui_local_model_name').show();
|
||||
$('#coqui_local_model_name')
|
||||
.find('option')
|
||||
.remove()
|
||||
.end()
|
||||
.append('<option value="none">Select model</option>')
|
||||
.val('none');
|
||||
|
||||
for (const model_dataset of coquiLocalModels)
|
||||
$('#coqui_local_model_name').append(new Option(model_dataset, model_dataset));
|
||||
|
||||
coquiLocalModelsReceived = true;
|
||||
}
|
||||
}
|
190
public/scripts/extensions/tts/coqui_api_models_settings.json
Normal file
190
public/scripts/extensions/tts/coqui_api_models_settings.json
Normal file
@@ -0,0 +1,190 @@
|
||||
{
|
||||
"multilingual": {
|
||||
"multi-dataset": {
|
||||
"your_tts": {
|
||||
"id": "tts_models/multilingual/multi-dataset/your_tts",
|
||||
"languages": [
|
||||
"en",
|
||||
"fr-fr",
|
||||
"pt-br"
|
||||
],
|
||||
"speakers": [
|
||||
"female-en-5",
|
||||
"female-en-5\n",
|
||||
"female-pt-4\n",
|
||||
"male-en-2",
|
||||
"male-en-2\n",
|
||||
"male-pt-3\n"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"en": {
|
||||
"ljspeech": {
|
||||
"tacotron2-DDC": {
|
||||
"id": "tts_models/en/ljspeech/tacotron2-DDC"
|
||||
},
|
||||
"glow-tts": {
|
||||
"id": "tts_models/en/ljspeech/glow-tts"
|
||||
},
|
||||
"speedy-speech": {
|
||||
"id": "tts_models/en/ljspeech/speedy-speech"
|
||||
},
|
||||
"vits": {
|
||||
"id": "tts_models/en/ljspeech/vits"
|
||||
}
|
||||
},
|
||||
"vctk": {
|
||||
"vits": {
|
||||
"id": "tts_models/en/vctk/vits",
|
||||
"speakers": [
|
||||
"ED\n",
|
||||
"p225",
|
||||
"p226",
|
||||
"p227",
|
||||
"p228",
|
||||
"p229",
|
||||
"p230",
|
||||
"p231",
|
||||
"p232",
|
||||
"p233",
|
||||
"p234",
|
||||
"p236",
|
||||
"p237",
|
||||
"p238",
|
||||
"p239",
|
||||
"p240",
|
||||
"p241",
|
||||
"p243",
|
||||
"p244",
|
||||
"p245",
|
||||
"p246",
|
||||
"p247",
|
||||
"p248",
|
||||
"p249",
|
||||
"p250",
|
||||
"p251",
|
||||
"p252",
|
||||
"p253",
|
||||
"p254",
|
||||
"p255",
|
||||
"p256",
|
||||
"p257",
|
||||
"p258",
|
||||
"p259",
|
||||
"p260",
|
||||
"p261",
|
||||
"p262",
|
||||
"p263",
|
||||
"p264",
|
||||
"p265",
|
||||
"p266",
|
||||
"p267",
|
||||
"p268",
|
||||
"p269",
|
||||
"p270",
|
||||
"p271",
|
||||
"p272",
|
||||
"p273",
|
||||
"p274",
|
||||
"p275",
|
||||
"p276",
|
||||
"p277",
|
||||
"p278",
|
||||
"p279",
|
||||
"p280",
|
||||
"p281",
|
||||
"p282",
|
||||
"p283",
|
||||
"p284",
|
||||
"p285",
|
||||
"p286",
|
||||
"p287",
|
||||
"p288",
|
||||
"p292",
|
||||
"p293",
|
||||
"p294",
|
||||
"p295",
|
||||
"p297",
|
||||
"p298",
|
||||
"p299",
|
||||
"p300",
|
||||
"p301",
|
||||
"p302",
|
||||
"p303",
|
||||
"p304",
|
||||
"p305",
|
||||
"p306",
|
||||
"p307",
|
||||
"p308",
|
||||
"p310",
|
||||
"p311",
|
||||
"p312",
|
||||
"p313",
|
||||
"p314",
|
||||
"p316",
|
||||
"p317",
|
||||
"p318",
|
||||
"p323",
|
||||
"p326",
|
||||
"p329",
|
||||
"p330",
|
||||
"p333",
|
||||
"p334",
|
||||
"p335",
|
||||
"p336",
|
||||
"p339",
|
||||
"p340",
|
||||
"p341",
|
||||
"p343",
|
||||
"p345",
|
||||
"p347",
|
||||
"p351",
|
||||
"p360",
|
||||
"p361",
|
||||
"p362",
|
||||
"p363",
|
||||
"p364",
|
||||
"p374",
|
||||
"p376"
|
||||
]
|
||||
}
|
||||
},
|
||||
"jenny": {
|
||||
"jenny": {
|
||||
"id": "tts_models/en/jenny/jenny"
|
||||
}
|
||||
}
|
||||
},
|
||||
"es": {
|
||||
"mai": {
|
||||
"tacotron2-DDC": {
|
||||
"id": "tts_models/es/mai/tacotron2-DDC"
|
||||
}
|
||||
},
|
||||
"css10": {
|
||||
"vits": {
|
||||
"id": "tts_models/es/css10/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"fr": {
|
||||
"mai": {
|
||||
"tacotron2-DDC": {
|
||||
"id": "tts_models/fr/mai/tacotron2-DDC"
|
||||
}
|
||||
},
|
||||
"css10": {
|
||||
"vits": {
|
||||
"id": "tts_models/fr/css10/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"ja": {
|
||||
"kokoro": {
|
||||
"tacotron2-DDC": {
|
||||
"id": "tts_models/ja/kokoro/tacotron2-DDC"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@@ -0,0 +1,870 @@
|
||||
{
|
||||
"multilingual": {
|
||||
"multi-dataset": {
|
||||
"your_tts": {
|
||||
"id": "tts_models/multilingual/multi-dataset/your_tts",
|
||||
"languages": [
|
||||
"en",
|
||||
"fr-fr",
|
||||
"pt-br"
|
||||
],
|
||||
"speakers": [
|
||||
"female-en-5",
|
||||
"female-en-5\n",
|
||||
"female-pt-4\n",
|
||||
"male-en-2",
|
||||
"male-en-2\n",
|
||||
"male-pt-3\n"
|
||||
]
|
||||
},
|
||||
"bark": {
|
||||
"id": "tts_models/multilingual/multi-dataset/bark"
|
||||
}
|
||||
}
|
||||
},
|
||||
"bg": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/bg/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"cs": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/cs/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"da": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/da/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"et": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/et/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"ga": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/ga/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"en": {
|
||||
"ek1": {
|
||||
"tacotron2": {
|
||||
"id": "tts_models/en/ek1/tacotron2"
|
||||
}
|
||||
},
|
||||
"ljspeech": {
|
||||
"tacotron2-DDC": {
|
||||
"id": "tts_models/en/ljspeech/tacotron2-DDC"
|
||||
},
|
||||
"tacotron2-DDC_ph": {
|
||||
"id": "tts_models/en/ljspeech/tacotron2-DDC_ph"
|
||||
},
|
||||
"glow-tts": {
|
||||
"id": "tts_models/en/ljspeech/glow-tts"
|
||||
},
|
||||
"speedy-speech": {
|
||||
"id": "tts_models/en/ljspeech/speedy-speech"
|
||||
},
|
||||
"tacotron2-DCA": {
|
||||
"id": "tts_models/en/ljspeech/tacotron2-DCA"
|
||||
},
|
||||
"vits": {
|
||||
"id": "tts_models/en/ljspeech/vits"
|
||||
},
|
||||
"vits--neon": {
|
||||
"id": "tts_models/en/ljspeech/vits--neon"
|
||||
},
|
||||
"fast_pitch": {
|
||||
"id": "tts_models/en/ljspeech/fast_pitch"
|
||||
},
|
||||
"overflow": {
|
||||
"id": "tts_models/en/ljspeech/overflow"
|
||||
},
|
||||
"neural_hmm": {
|
||||
"id": "tts_models/en/ljspeech/neural_hmm"
|
||||
}
|
||||
},
|
||||
"vctk": {
|
||||
"vits": {
|
||||
"id": "tts_models/en/vctk/vits",
|
||||
"speakers": [
|
||||
"ED\n",
|
||||
"p225",
|
||||
"p226",
|
||||
"p227",
|
||||
"p228",
|
||||
"p229",
|
||||
"p230",
|
||||
"p231",
|
||||
"p232",
|
||||
"p233",
|
||||
"p234",
|
||||
"p236",
|
||||
"p237",
|
||||
"p238",
|
||||
"p239",
|
||||
"p240",
|
||||
"p241",
|
||||
"p243",
|
||||
"p244",
|
||||
"p245",
|
||||
"p246",
|
||||
"p247",
|
||||
"p248",
|
||||
"p249",
|
||||
"p250",
|
||||
"p251",
|
||||
"p252",
|
||||
"p253",
|
||||
"p254",
|
||||
"p255",
|
||||
"p256",
|
||||
"p257",
|
||||
"p258",
|
||||
"p259",
|
||||
"p260",
|
||||
"p261",
|
||||
"p262",
|
||||
"p263",
|
||||
"p264",
|
||||
"p265",
|
||||
"p266",
|
||||
"p267",
|
||||
"p268",
|
||||
"p269",
|
||||
"p270",
|
||||
"p271",
|
||||
"p272",
|
||||
"p273",
|
||||
"p274",
|
||||
"p275",
|
||||
"p276",
|
||||
"p277",
|
||||
"p278",
|
||||
"p279",
|
||||
"p280",
|
||||
"p281",
|
||||
"p282",
|
||||
"p283",
|
||||
"p284",
|
||||
"p285",
|
||||
"p286",
|
||||
"p287",
|
||||
"p288",
|
||||
"p292",
|
||||
"p293",
|
||||
"p294",
|
||||
"p295",
|
||||
"p297",
|
||||
"p298",
|
||||
"p299",
|
||||
"p300",
|
||||
"p301",
|
||||
"p302",
|
||||
"p303",
|
||||
"p304",
|
||||
"p305",
|
||||
"p306",
|
||||
"p307",
|
||||
"p308",
|
||||
"p310",
|
||||
"p311",
|
||||
"p312",
|
||||
"p313",
|
||||
"p314",
|
||||
"p316",
|
||||
"p317",
|
||||
"p318",
|
||||
"p323",
|
||||
"p326",
|
||||
"p329",
|
||||
"p330",
|
||||
"p333",
|
||||
"p334",
|
||||
"p335",
|
||||
"p336",
|
||||
"p339",
|
||||
"p340",
|
||||
"p341",
|
||||
"p343",
|
||||
"p345",
|
||||
"p347",
|
||||
"p351",
|
||||
"p360",
|
||||
"p361",
|
||||
"p362",
|
||||
"p363",
|
||||
"p364",
|
||||
"p374",
|
||||
"p376"
|
||||
]
|
||||
},
|
||||
"fast_pitch": {
|
||||
"id": "tts_models/en/vctk/fast_pitch",
|
||||
"speakers": [
|
||||
"VCTK_p225",
|
||||
"VCTK_p226",
|
||||
"VCTK_p227",
|
||||
"VCTK_p228",
|
||||
"VCTK_p229",
|
||||
"VCTK_p230",
|
||||
"VCTK_p231",
|
||||
"VCTK_p232",
|
||||
"VCTK_p233",
|
||||
"VCTK_p234",
|
||||
"VCTK_p236",
|
||||
"VCTK_p237",
|
||||
"VCTK_p238",
|
||||
"VCTK_p239",
|
||||
"VCTK_p240",
|
||||
"VCTK_p241",
|
||||
"VCTK_p243",
|
||||
"VCTK_p244",
|
||||
"VCTK_p245",
|
||||
"VCTK_p246",
|
||||
"VCTK_p247",
|
||||
"VCTK_p248",
|
||||
"VCTK_p249",
|
||||
"VCTK_p250",
|
||||
"VCTK_p251",
|
||||
"VCTK_p252",
|
||||
"VCTK_p253",
|
||||
"VCTK_p254",
|
||||
"VCTK_p255",
|
||||
"VCTK_p256",
|
||||
"VCTK_p257",
|
||||
"VCTK_p258",
|
||||
"VCTK_p259",
|
||||
"VCTK_p260",
|
||||
"VCTK_p261",
|
||||
"VCTK_p262",
|
||||
"VCTK_p263",
|
||||
"VCTK_p264",
|
||||
"VCTK_p265",
|
||||
"VCTK_p266",
|
||||
"VCTK_p267",
|
||||
"VCTK_p268",
|
||||
"VCTK_p269",
|
||||
"VCTK_p270",
|
||||
"VCTK_p271",
|
||||
"VCTK_p272",
|
||||
"VCTK_p273",
|
||||
"VCTK_p274",
|
||||
"VCTK_p275",
|
||||
"VCTK_p276",
|
||||
"VCTK_p277",
|
||||
"VCTK_p278",
|
||||
"VCTK_p279",
|
||||
"VCTK_p280",
|
||||
"VCTK_p281",
|
||||
"VCTK_p282",
|
||||
"VCTK_p283",
|
||||
"VCTK_p284",
|
||||
"VCTK_p285",
|
||||
"VCTK_p286",
|
||||
"VCTK_p287",
|
||||
"VCTK_p288",
|
||||
"VCTK_p292",
|
||||
"VCTK_p293",
|
||||
"VCTK_p294",
|
||||
"VCTK_p295",
|
||||
"VCTK_p297",
|
||||
"VCTK_p298",
|
||||
"VCTK_p299",
|
||||
"VCTK_p300",
|
||||
"VCTK_p301",
|
||||
"VCTK_p302",
|
||||
"VCTK_p303",
|
||||
"VCTK_p304",
|
||||
"VCTK_p305",
|
||||
"VCTK_p306",
|
||||
"VCTK_p307",
|
||||
"VCTK_p308",
|
||||
"VCTK_p310",
|
||||
"VCTK_p311",
|
||||
"VCTK_p312",
|
||||
"VCTK_p313",
|
||||
"VCTK_p314",
|
||||
"VCTK_p316",
|
||||
"VCTK_p317",
|
||||
"VCTK_p318",
|
||||
"VCTK_p323",
|
||||
"VCTK_p326",
|
||||
"VCTK_p329",
|
||||
"VCTK_p330",
|
||||
"VCTK_p333",
|
||||
"VCTK_p334",
|
||||
"VCTK_p335",
|
||||
"VCTK_p336",
|
||||
"VCTK_p339",
|
||||
"VCTK_p340",
|
||||
"VCTK_p341",
|
||||
"VCTK_p343",
|
||||
"VCTK_p345",
|
||||
"VCTK_p347",
|
||||
"VCTK_p351",
|
||||
"VCTK_p360",
|
||||
"VCTK_p361",
|
||||
"VCTK_p362",
|
||||
"VCTK_p363",
|
||||
"VCTK_p364",
|
||||
"VCTK_p374",
|
||||
"VCTK_p376"
|
||||
]
|
||||
}
|
||||
},
|
||||
"sam": {
|
||||
"tacotron-DDC": {
|
||||
"id": "tts_models/en/sam/tacotron-DDC"
|
||||
}
|
||||
},
|
||||
"blizzard2013": {
|
||||
"capacitron-t2-c50": {
|
||||
"id": "tts_models/en/blizzard2013/capacitron-t2-c50"
|
||||
},
|
||||
"capacitron-t2-c150_v2": {
|
||||
"id": "tts_models/en/blizzard2013/capacitron-t2-c150_v2"
|
||||
}
|
||||
},
|
||||
"multi-dataset": {
|
||||
"tortoise-v2": {
|
||||
"id": "tts_models/en/multi-dataset/tortoise-v2"
|
||||
}
|
||||
},
|
||||
"jenny": {
|
||||
"jenny": {
|
||||
"id": "tts_models/en/jenny/jenny"
|
||||
}
|
||||
}
|
||||
},
|
||||
"es": {
|
||||
"mai": {
|
||||
"tacotron2-DDC": {
|
||||
"id": "tts_models/es/mai/tacotron2-DDC"
|
||||
}
|
||||
},
|
||||
"css10": {
|
||||
"vits": {
|
||||
"id": "tts_models/es/css10/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"fr": {
|
||||
"mai": {
|
||||
"tacotron2-DDC": {
|
||||
"id": "tts_models/fr/mai/tacotron2-DDC"
|
||||
}
|
||||
},
|
||||
"css10": {
|
||||
"vits": {
|
||||
"id": "tts_models/fr/css10/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"uk": {
|
||||
"mai": {
|
||||
"glow-tts": {
|
||||
"id": "tts_models/uk/mai/glow-tts"
|
||||
},
|
||||
"vits": {
|
||||
"id": "tts_models/uk/mai/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"zh-CN": {
|
||||
"baker": {
|
||||
"tacotron2-DDC-GST": {
|
||||
"id": "tts_models/zh-CN/baker/tacotron2-DDC-GST"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nl": {
|
||||
"mai": {
|
||||
"tacotron2-DDC": {
|
||||
"id": "tts_models/nl/mai/tacotron2-DDC"
|
||||
}
|
||||
},
|
||||
"css10": {
|
||||
"vits": {
|
||||
"id": "tts_models/nl/css10/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"de": {
|
||||
"thorsten": {
|
||||
"tacotron2-DCA": {
|
||||
"id": "tts_models/de/thorsten/tacotron2-DCA"
|
||||
},
|
||||
"vits": {
|
||||
"id": "tts_models/de/thorsten/vits"
|
||||
},
|
||||
"tacotron2-DDC": {
|
||||
"id": "tts_models/de/thorsten/tacotron2-DDC"
|
||||
}
|
||||
},
|
||||
"css10": {
|
||||
"vits-neon": {
|
||||
"id": "tts_models/de/css10/vits-neon"
|
||||
}
|
||||
}
|
||||
},
|
||||
"ja": {
|
||||
"kokoro": {
|
||||
"tacotron2-DDC": {
|
||||
"id": "tts_models/ja/kokoro/tacotron2-DDC"
|
||||
}
|
||||
}
|
||||
},
|
||||
"tr": {
|
||||
"common-voice": {
|
||||
"glow-tts": {
|
||||
"id": "tts_models/tr/common-voice/glow-tts"
|
||||
}
|
||||
}
|
||||
},
|
||||
"it": {
|
||||
"mai_female": {
|
||||
"glow-tts": {
|
||||
"id": "tts_models/it/mai_female/glow-tts"
|
||||
},
|
||||
"vits": {
|
||||
"id": "tts_models/it/mai_female/vits"
|
||||
}
|
||||
},
|
||||
"mai_male": {
|
||||
"glow-tts": {
|
||||
"id": "tts_models/it/mai_male/glow-tts"
|
||||
},
|
||||
"vits": {
|
||||
"id": "tts_models/it/mai_male/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"ewe": {
|
||||
"openbible": {
|
||||
"vits": {
|
||||
"id": "tts_models/ewe/openbible/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"hau": {
|
||||
"openbible": {
|
||||
"vits": {
|
||||
"id": "tts_models/hau/openbible/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"lin": {
|
||||
"openbible": {
|
||||
"vits": {
|
||||
"id": "tts_models/lin/openbible/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"tw_akuapem": {
|
||||
"openbible": {
|
||||
"vits": {
|
||||
"id": "tts_models/tw_akuapem/openbible/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"tw_asante": {
|
||||
"openbible": {
|
||||
"vits": {
|
||||
"id": "tts_models/tw_asante/openbible/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"yor": {
|
||||
"openbible": {
|
||||
"vits": {
|
||||
"id": "tts_models/yor/openbible/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"hu": {
|
||||
"css10": {
|
||||
"vits": {
|
||||
"id": "tts_models/hu/css10/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"el": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/el/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"fi": {
|
||||
"css10": {
|
||||
"vits": {
|
||||
"id": "tts_models/fi/css10/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"hr": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/hr/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"lt": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/lt/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"lv": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/lv/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"mt": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/mt/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"pl": {
|
||||
"mai_female": {
|
||||
"vits": {
|
||||
"id": "tts_models/pl/mai_female/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"pt": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/pt/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"ro": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/ro/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"sk": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/sk/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"sl": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/sl/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"sv": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/sv/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"ca": {
|
||||
"custom": {
|
||||
"vits": {
|
||||
"id": "tts_models/ca/custom/vits",
|
||||
"speakers": [
|
||||
"00236e350cc84b94a6684f182acf96e68963d7fa1164d4fa56da20f46f210b2dd3ecf189e97fb3c94113a54c12dc20550508f5b7b9b37e1873898d58a308feb5",
|
||||
"00459",
|
||||
"00762",
|
||||
"00983a845f95493fb27125b114c635f3b40060efaee167d32d8a3dd040c877713446c7bd3e6944641227bdb4165ecb8d684ec2ef66c817e65e77c52cc50e62ed",
|
||||
"01591",
|
||||
"02452",
|
||||
"02689",
|
||||
"02992",
|
||||
"02f7d61edf5063ca42953b1068539f1572985aa9448555cfd8d7667121eeedc72c912d95cf33abf61a1f9620f2a01be4251a53aa5440d15849003fb31210d830",
|
||||
"03115",
|
||||
"03386",
|
||||
"03655",
|
||||
"03944",
|
||||
"04247",
|
||||
"04484",
|
||||
"04787",
|
||||
"04910",
|
||||
"05147",
|
||||
"056d7638d714a7dc1efe1c47d390d0659fbfdfc7df5249e8bfe10ba346cc76d5cda93fc8ecbeadffd4924c4f9cfb6b32c1739c8af1e2d58d7cec88b2cf18795f",
|
||||
"05739",
|
||||
"06008",
|
||||
"06042",
|
||||
"06279",
|
||||
"06311",
|
||||
"06582",
|
||||
"06705",
|
||||
"06942",
|
||||
"06c6d2e093624103c268e2cba37466147fd564bff1312a78d1c5be9ba168af4cf4819c7a91d5321d7aa9bd20ad6c702ca2cb005496dd20c45d293200b2b8a7b9",
|
||||
"07140",
|
||||
"07245",
|
||||
"07803",
|
||||
"08001",
|
||||
"08106",
|
||||
"085503e68b0772f1b3aa4de86a57bb26e3750660e7929a14a653c729787a110cc8b3704f8ea09842f72be46b6ffbb35bdb3732308b31dceefc3b33e5ad3f7975",
|
||||
"08664",
|
||||
"08935",
|
||||
"08967",
|
||||
"09204",
|
||||
"09598",
|
||||
"09901",
|
||||
"0befb1084ad00d656f45a87ad83f074c61e3b3767cf6f5463fd5bc199ab7fd4733c5f02e3a100359e953977cc2a2689bd4824ef6e3178a7108cb45a0204fb3cb",
|
||||
"0c6bf67821762116d753c9b48ebed8a2ccfa0a956d5dbf19feb0ac0bc2096154ca288ae7c5e324a3092db395cc24c64c6a4e4fb0e01429b7343cafc7ac1b2e13",
|
||||
"0d0a943d348b4f0948da443c4d020b2e690731955ce8c318c0fb72663cfec3cd3458488ff9ff9cee6d221c85771b8eb83cc087dda37d4109bbb1614039e5f565",
|
||||
"0da83aed14276e120e2581be32891bb088a22c272feb6f03b4bac1b827cccdbc8fee277a885f58e98931819e0d6171526c5fa7b2e788f68a2852e4d5314f613b",
|
||||
"0ff19536d6147f61b24d50c0c993a7a687df4d253c2052e4fa30b1624c87e60075649d888f51ce71318fb8789cb378879091aba020256d66ac19f024833c3e63",
|
||||
"125d9d1721de26a9b89d0e4f4d386e07458d287ebb2f338879e8886847abe6b3209f79e2bc335bcfc437350184df5a7d9e1a08ffb5239674edfd1cf95a9d1e24",
|
||||
"1378866a4d2b6965c03eed8e48e03fffd089638acdf1fa82ed20a9856406e083f0c0e1f5043c4d3bf67dbc383f7cd28b602eff1d8bd8bf8c1a0191dc98540322",
|
||||
"14bc32c10eb26503a4e799c3a762bfe5949d7a232074e854ecfad8139acaa4257c563a502e06a263f2d5fa8337114a9741d4a634a0f914adae74e5f9a80f145f",
|
||||
"151fcb1168f41a51c49e20b426605109e1d9dd50be2926aa9878040fe325eba2f2c470357a735e9e24b7412e78e2550a8fbd0ea77fed80e4d8f50a21f2064948",
|
||||
"1610e29603954ee12f408eaeb83a5c99781b5efe5f64fdf2e3c5e0ba9756b5b11bfeefda40a787842f7d5da653135043e3f43e7f786499cb51ad6181ef8acc9d",
|
||||
"1887c37f4187a4c3213ba4b58d0ef15f903a3720ee94d5ce59c33db193f60db416a4d9607f213ae44ca3eafc7217fb9871e2ee1837ad4cc0f34794e5c543f9f5",
|
||||
"1add23d44d2d913f0ea6e061fe292b0563653fa15b4ecc9cce0c2b83dc5a743d6d3dd4625efa112f751852c348a667a9456ef3486ae7e8c5954dedc69f998ea2",
|
||||
"1b7fc0c4e437188bdf1b03ed21d45b780b525fd0dc3900b9759d0755e34bc25e31d64e69c5bd547ed0eda67d104fc0d658b8ec78277810830167c53ef8ced24b",
|
||||
"1b8354b1fe9255578225b3d2255d5e781eed7d13ab61e84bc08be5d6465ec468c533563137ca756fcb7d3759af0ac2c0b4d00873782c7bf47ea72fd9be2f9e8c",
|
||||
"1be6c773da6334cc73e23312689bc8a5915529c905e1d5289dbfe00332a7dcb9ae97efe209635e2e5040783777409155926d231a5a5f76357494671512d2b1a4",
|
||||
"1c7af1cc1357fd63bd9ffe915745e20c34588438e1e0d85fdc8c9de4b3bd41d3d61b318b6a69862c6d64dd41f15ef3d994a6bb6c9a9dac69c891308b09ab16a5",
|
||||
"1c7f19a7fa0b166c700bac583b6858ce7adbe19566d66e530953273aae59776757aebaeb30c20a58d74bc50ce1345516af5bbd36168f443fed809bf54c02f63c",
|
||||
"1c80e9d982aa0c12db8498e8275b2281e638e8e5c684a752e19f8f9842979b70a3624785d0c30e0e57112950cae5e892bf554c295c74cf8c82c8ec33c732d8c6",
|
||||
"2256cc5ee6c617347af9a1abd97dfe80f55e6691eb0a042321d46a1bd7ce0baf1c7a4c8ea3fe4184f8638b1c3d9e83b6aa193bd6f9b49d5358523f1fe324cd92",
|
||||
"238532dddf77923ce93cf2e9ed809d088094106a1aad327e8a7b229ce24a339771e59478f7d5162efc1da6f347b44cbb2273ac9154aa3a76c7a8fc458470cc2b",
|
||||
"241ca4fdf2124f550657446301fb8dfc8bdef46d3888ce39bf9d8622c2bbec7e06b198f5e33fadbf30e477fdb37435cea36d10341af1a7d3a80d0ad1caa94bf9",
|
||||
"2421aa51a089ecfe45250bf284d5690a9994a9eb03f2ba4f43d2ad73fe78783ae5f3d3088d772e01fd1d747b2ecd6bb1bfae5feb10a72130d3952ba7304d5c53",
|
||||
"24d967d0e8b84beb3652417724be81ab83c7834afaa7b7d3d7d9591b1a2a7bb75f9b25be548a200570ebd6cc34e91306b675af510ef91cd34a77060b65b9faaa",
|
||||
"25911630ab15956e81427d3e990cf37f79490e305914a15ca7dd7b95dd4d4feb15fd94549cc005376801ce68d637eab6e19ee36017dba5c01bd0f206e5e8dc3b",
|
||||
"26099adbc4db8fcf000e2c7d1da3399662281f9af03831808d29c602431af4fc13f21b38c5c42c5ac4f77ece48448eef99f735d92cdaed857d19da7dd2b888ad",
|
||||
"28e2fe1944a593c44c3de0dc52971f040f0b8901fced2057025bdbafa1fe3b042be19618044ae085d7364e3cb38601e9ce4030329f15af7a0898f9d4c2c5014f",
|
||||
"2b59e9f830e5ea00c500b63eff4e72553e0c2608f5741d35c226e733400412014d7697bd6efb67bc61b19fd61e40f9ed70fde2589fe0b5498915eebc1c8b5d93",
|
||||
"2bc2a177bf56dcc98e05501e7bc6eebd3d1662114764299a4f9e6b060a48095b8ec95d20a5814f71343d65ee3cd2e7f42a80faa51a148005242cc5073e605ba4",
|
||||
"2ce84c6ea6aae52c449b6d34cbc095b2f2c3e6fa20d0e48b2f7d223724ae01375e92a8ad106b029f0562ee735de36d9bca6cd167257c3f68796bd8b1a0ab600f",
|
||||
"2d84f39c2cca33dd28fee650caa022c9a06407462342fae8dc256af7904cdd114af5b4cc883181407b8dcf2dc4a93d45c62c83a317d84e876cf710a521f20d80",
|
||||
"2e6ccdf9f0a7bf0df6fd0572bbb53f25378fc5333b352bc885b3a0d01e5dd672156cf697c127cc998ac546d122c547c421970a6f23dccb60bf2c841146fa6576",
|
||||
"2f92b4704080216aa80b1b39cfa223feeb9ed7c909f5b77be1b6e45fdc8827463bc4c4cd98263f02b57e653ecc2ccc7192aedd92990113433077e7ed44eb1e0a",
|
||||
"2fb95c3b786fa65215534207266e034b294317b2327ee0928be3436258e42db8f4479e86e6006979ec4438dbaa9daa05be21ddc66717d30dc43e36ead349965e",
|
||||
"30b1f81c579755895581259d79a8a5a3ca45b908b0bd14ad1c6418f39aa1e2f47cb4749c69b5440cdb92e3bafb772e19e7bc2b16d196b061addd173a1309e491",
|
||||
"31535cb2ece4710d08fdbeefb6f8f75ed093fee4cf8573bd601d960f8c6156f0fd0a85712761691e86e31160b993ee0eacb10c4c8aed000cc394cf7c7d207a7e",
|
||||
"31e6f3a011661320b2e59b6f8be43f6db2243e9feabc2b9787c1413788e13eb0e5810bed983bf7ff66e46417d183a91ed50b3b9be9d89e4f51aada72293b9881",
|
||||
"32550810ba55b9a67a25d308f0ede521f12cbf6076472ff5bd60a8f5e951c481b784e2f04194fb96116c4f001d84b3993b2c580879671de46333d5f212ff2ca5",
|
||||
"336f82b4645b80c99137018e69bb6f8138a9c8dc05a510e36922503120648625674e1414cd90d0cf46f28cbd5993ae0eaedc9994b72e8eb5242737ddefc0bfb2",
|
||||
"35b962b08846ed7d8a4cc47582a4e607f5ff4136042ce0b1adb55d1e8d58e2dc1abc5807b3601a7f7be1ada5939e1771e128fc916c1b5d39ed3619e251707952",
|
||||
"3637902e0d19f0080313c14d2c9dcde800ec6b71d493459c2f3b2cebf186f028ea289dd59ba1fd4705e53891216f7f4c36dbcb8938aeeaf142317b441b20a837",
|
||||
"3723bd65a05afc7411c2bfca904742062b7b0c081ba126e68c65d28eaa6122f69196f4959fc1795fe03f8e49ec7364863911f9b659684a06b3a126c6f1729551",
|
||||
"373d86f9fa3a127372dd913b7571ed318bfea42173b2b7daebde93c742f3224fb7fe5306085e836d20bfee4201bfe070b4c6b36510f5c9f379f6a3b610f36cd0",
|
||||
"379d321bff71ebcd34792e8f4552d341f30a006b4765f8c6de4fa98d3ca416def88adbcb0253f5849f51793b3d7ca7e53700ec70b5a97e84ccd1f35a2a1fb6e5",
|
||||
"37c12c700c95dc0028b3b82c4cf1fb922d68680c35fe84585bf22674e71c4dc53bd9233ce8b71bd31e9c9b0e000d01d195a3572b9055a73fbec891b1ade250cf",
|
||||
"3a4a32c7cff18f1896e7cbff2c19b4e6f91a95c1e7aab616722600ceb36a86b07fb0e1e0c70cc285dfb6192b53cb67826698b7f3f652549e27a969bde0177fa7",
|
||||
"404ecea5ae8e5f4ec3d2c48494cf7f1d559268542d8f1f7928da2fcde55c9fa3f491ba632f555ec69e8c9e819072df450add7e5886cf5527f446b11544af7d05",
|
||||
"41e5e21b3a3b0c8df01ba5b3c3e6224cc4082f41ca87679344b0273e2216cc272e19426c160f5a9580915c057a3e4000788be6cc7a6f5f346cad5068c7884ce7",
|
||||
"464d9ac63f7958200bc09a141171355bf4f3631d66dc4bbfabd497619a8f055c034c0752987944b2102e02d4b435bcd3ce0527962871112049e1d26865b776e9",
|
||||
"4869d94d4936ab700c5e5bc7b666177b53220082f5f221774b5625d7275cd4f117482dcd1498674b7f885fa41d86f99b8d00b6a6f641829780946651f561fc22",
|
||||
"496b66c9cb705a46cdfef9eeaf29c9d738a4b70b601270985a7df5a06f9e1d6c56be0982995c8cc06902d0ee89bae201c37a91f568331ffe28ad2d150e183fed",
|
||||
"49a7654071536ed5882b8b6e6d2e3558ef796ecd8aab8ceaa24ad8bc9f3420b528ef1413696584c11facd6d5bccd37fe8e274b8c3d139dc251ffb11c3a503aaa",
|
||||
"4b6c7e4e9bde35c471cbf5e2e93b2eb8bbba52b710acecf99910af08b3b35365f24d883ddfdd9825918c31477a5f3fc48f075080c4e97e80fecd6e1936bc92eb",
|
||||
"4bce212aca40bd1834bf741e47954526a8817ecbff8fedda854dbfc2d033a2567bd34b84fa02c3d07855f3dcf413590ae75ad6edf261d66bffb84d77803a7b76",
|
||||
"4cedaa8d96436fd0d2ebdb61d616790a3cb3737d0a93d2ae41d588137c0d3339999d991b7b3c452704be1f5f512ce5a08c0971898fad0ad77f18fd623411cd7f",
|
||||
"4d7e2548403c7e04d809030aa25015c9706e773517e1f72b81bdda22213aeb8f542cc62156bc5ef1c1622e99227fedbcc9c1b3e5e147b854e3b629f8f78bd158",
|
||||
"4de9f262eee7ee7d24ef8933af4610a1c5b97ff055c4fd0f97868e338a017308d460f4b003b74bd2aba7789153593f3b986b814fd93f2e4dfa5b55594fb17c55",
|
||||
"4e5e58a6ec7d9cac969f99b817f981ab7f8d2cbd9ab9dd0a37e45c70a8a8ca3b8e1c43b2013082062ffc1f4f3b268ea78ebb88d613d026a6312f40a6867a1d0b",
|
||||
"4ec8f1e81d7abd9d2dcb3dbd4be86b615f643386f3b1098c37a02a103fe6b36239c05bff6746ce568ca81765b285c1c271af4fb1fd99120341cae2851b776bbb",
|
||||
"4f57d1abde3364d91128e682ba724e6d3bc2ed6b112d2cd679739e478ea6bd671c527edff64c6a7b5c1173f68e02a410f09c2256356fde7d517908310c118382",
|
||||
"503dbbe83f0154e9bba4bc685bf1c1fbdd27293d0e4f837947910e4d320bc4d5bed1ade67a45b541013189a2c133f6f9f6cbc3566fad220c0635f286feec74c5",
|
||||
"51795e8ea8faa28e88f02559f6bdd47d9a0735589d47dd0f2e057b8b01fd3667fd9fd29e2613f200174af1d4b2d3d0860704cebebf2b6e79f1724d6782d7a270",
|
||||
"52cfac480c0cbc60068305d983adbf98814d2cfddb8be0ccfeb7c7f95bdaf31a5f70da944cc2453e6a5fbb9bb4092e36b662e838762855fb016f55e6e3d957c1",
|
||||
"537e815df93312978a9ba479ff2dffc9975c875950a203e8a1b7ffe4cb06625964f59dde1a06b87921a2a91702cc6bab04e159aec7cb2e2fc576cbe25838df2b",
|
||||
"547dd49c2cbe113b60c9df4a8e8b83a532f0da054cea8f1d23db66cc2638f7b5edfee820b4764646be10dbcd05caa5d71483477718a73ce8dfc752204807d9e5",
|
||||
"54f344faa37da0c9ab1ab563735c532ab81fcd1c8431cd8eca4ab7a8774f194e1ffe922547ea42bd1fad36e7493761992eca4821138ded1a9580e9fa38685291",
|
||||
"56071bfe30e977f201fa4d6808f8d7c2f3e6788ba68d12e2eb18386ac2507bb2bbe3c14bab90370066bbf6e2af42afcf1e45b362dba958d38fbc69cdea3874ea",
|
||||
"57e5f7cc5fac058f7c772eb41f8d49bd0fe3070c41eef445b1c073abf9b1cec451aa22764490b7da4c5bcacf4ee453c3153158cb1569f2f9447807cb14dc1126",
|
||||
"5a9a6481f1365def2919871790a95fdccbae145640f3b4b5e11d1d1370ed35c5a4c31e402b3b438892a6bfd9dfbbe2fc97056d2cc24f2ac412b3a5e1adb7003a",
|
||||
"5ba168675a3f2ea8d6d51896c5db84ee59ca65359b1b97e6d79543a6c918fe427f8b6cec79037c452eb086debe1d57049c25481d61a873f0503703266bf0cb84",
|
||||
"5da56ed896575439b7bbca20981f0b50618958d94f08b8f47d13774dca3990d4c571be1f4aa2786bf8fcbd1a594336be49cc26d972d5fb0c0682ab4d5b59d19e",
|
||||
"5ebf04dfec6c9b10a6fe7fd03725901973565a13530c20d02b4332670cce9beab185dd0b0f61f4a87f9c3dacc307d06e062d640f6550d9443a4c06a114e5bca5",
|
||||
"620b0d4c3be90f5f77f0cf9f976e5d7f067689884dc857f2b26a6edb40ef4fd2826213b5028900b168e853d036f1741600b236e04d8dcae5fa26cd2b8975ac04",
|
||||
"6323ec0401b28c1b06afa76760b478535101ae48c6c9367491087143287d9ff76b9c00f39dc838cdb20d65eab16622dc85143f5845791bf85705cb4f20975bbe",
|
||||
"633e7303eae41420e558e186308510783f5c234e9c639c0e6f5b6d37fca6bab766c5d475b2f330910bac93cef6982124e73a1b6bab1a2e99a2e5d797f8547c6d",
|
||||
"6688b60c24d068e19487c0b88a8b0a256854d8090ebebfa9a462fe49a77b8e9f303aa02042069cb0d6f227932cb48863758d0b57a18d53125ad39953bac543aa",
|
||||
"6745c47d0bd557c3dbce201697e8a2fbaff9ca52744d6007a636a237b82d1167795a0c0e2e5eb71b7460ed16e3fdcdac1dba1b7a2910d5168416e236c93ccb76",
|
||||
"6892c6ba9f66d0d7aa0445139081dc82a76d9ef8c7bb049a8eaf090f76c06cb4f1db05739038d7e04167569bb6d0fea55fc15343f7c77cd5a3e2d4c5ed068290",
|
||||
"689a213fd2d66b9d3634c9165b316e49ec53ac96131be42226d462ef1bc3ba38651e94698fd6e6f5c6d6c834d2b9a6732be54a8d6273c1025511d795326ffd3e",
|
||||
"696e8808717101399ab7ff16382db411adfadcd60c6a525539b0f8f88d84b448662fbca212b175379ff78ce7b2e64aa4b4e96d1820ade8eb2f742295f744db7c",
|
||||
"6bdec6b6f7e6b5a187feb6537101d90cca1043e34d53e347f2f0b14e701585361fc4a4cd81577b6d4588844fc8bdba8af66155d9eb6c2eefd461e23d0b2b87e4",
|
||||
"6e5948f904b3048511677d23d3cc9bd678739b234170302e1556c1bd1db8cee4243bf5e012a1320b4c50f6276e05cc5f620c461f640ac7413c23524f63f4aac3",
|
||||
"7115c00371f891d0094a716083b978948431509a16d5a9598e78ec12712db46d46f1674312cd31339e2d6118cca5f7a3f82ec25dce861a059ee31d832cd6dcda",
|
||||
"71b67ba5ec75978632136441a25426dbd48d4c0a55c1a5fc91f0f952b6bac06ab0d9709f0a7bb5a05393499135b76e4d722c7065fb636a227ba58c7fb86438eb",
|
||||
"72a3d5bde83f60653937232cf4d29218ff5988533855fdbc804d9bea7e94eb14a8afbe36a8f8ee576a3ed2345632d4ad36df52efdd9adbdb60da6f890074c6b6",
|
||||
"73d3685f3e78183724e3362f6c4288d522b54a8d2722197dc5ff5006974c1529dc562c1cbb05f023da4922cfc04340eb83b887c5343041febed1ddc44b22f9ed",
|
||||
"74a679bf6c4a1b5856a25780496812416383f0567afcbd9b411ae9a0abab47d466741bd925b03decd7da586c6ea9589c8f40208ac2a22fa4413d4ea6e1a6f0dd",
|
||||
"7638395f7d47fbf631633e2b899044e82e7ce0e07305114921cb0696551966b09993766782aaad70fa40a0f7362be31940381653c659fb73d3e1fd1fa45c257d",
|
||||
"76383f56d9979837d4b3348f9f28877dd1ba58c1bd0ea839bdabf021428c2edfba46ff25558004c5183a73575eb126d4e0746a40e22ab15154d5d6f238a48ca5",
|
||||
"77cd12af0a3d1d8cb64dd577bd2d50ac057d816694e8bc04089a6adb90e53ad6cbe9fe6aaf52596450e0c8178d8f9b88a545b27adcbc89bddf4d7c4bc4dd31fd",
|
||||
"7834da277192e9434b0c039272ee6b3f1b225d2f975aa175fee762fb0d5f16b1edba2e0dbf11d8aa2bab5984482f703f88f0e9d1786a7687710ea2688f307ccd",
|
||||
"79a830901c1bb0e27663dbe14d13df91d887daf0eabb6d3eee7f09768212afcd9cdde458d13042a9d2aa099f390c79ab94f2c1ca47fe0321f6c18973e437cdd6",
|
||||
"7b7593f44cc6f9f7b21495bca6f3d564f73f36b97ee15d51a783da8141463834022996c55e494800d21304079aefa8a5fe64350c9273e0d36453b097b2dcc5f4",
|
||||
"7c7d917d97412c24b76af336086469a43013d1d6b27298aa82c4e99b3f3b6c5a82014428a6a14b080a834382d9b0f178e405fb10170bcd340957955087698e19",
|
||||
"7d19dccf48114d3ec00c45fe80581300faca042157d6c9458ec439c300d8c7b1190aa70eecf19f8b1d5af8c7291f3da08fc635a7fd6acc7c5b203d1e226589f5",
|
||||
"7d8d6fa22ff724d823b82499686732b7fbf32f7c1f35dd5733ec3b65fa9625cf2d49bba86e6e0132252bea64074f35ff96a77bfa44441aed3fc1765b13cbc526",
|
||||
"7e36be2204fe367a3798e1b2ff988779890591e5c997b1f6025ec8ee1fef3eb19e81b74bb8657874f5a990d5062d6c849621ce363c4a9c2c5a63c0966be6140c",
|
||||
"7ff908cc2a18ec5a80e74fb4a2f12b406f0b7456ac797d35091d618c7ee991baa88edf62200817aa27732b03d9109cbdc6603092822b2e13a575953045b1cd0a",
|
||||
"8154716e77acd0f5e912887facffc7b2c9889891e863a39fcfed1e5637e47328a4a3bf40bbac1e740629d3013304ada88cf24dbf3735a7aa2d4b855f813c8fd1",
|
||||
"8162d651b6211f06f655a69cd7fdd383d6b4287e9ba132b9898ef9ac8687349e777626333d23bed93f9264aae965efb14ed650cb64fd0ad90494aff903eaef11",
|
||||
"8348c81a253096a9def0b472a8499fc03ef8c6c6d3cc9b4a018f142501ebd04c2479008b88895e033eb83978e7d71e52a91a2e324ca869ed8f2724dfdcef269e",
|
||||
"84b101db8d076398c1d624a8b38b22fbddbfa8fcc43ade44619f5a9b6e70daf1c963d6dc09ea039cca94ec56ccfd04a1689e806c970c0bd32cc9e56b73c7bd7b",
|
||||
"853fb95e0f017c203e08312e3ccf45c0419928e08313b1bb0444aa4ff089550546e67fdd3434a22cae3f67603437051e49be5c4d8fc5583b1aa6a1ae36f0a911",
|
||||
"85c9e13ccfc0d67de10281b04257d8ac0c256d2f9415e54148fb59954c0d43f66d3cbea43ea6389f8407a8bda8b1b1becd30e41dfbb3dd9bebbe69816d096fa9",
|
||||
"85ea0b349a8df04283c62efb571d2947e7264b566883e300501086733b08efa42ced215bc47951c8198626f86ca8c0df730cdc35f4d99ffff958599884b68e51",
|
||||
"88673d4f24d039e89c15d9ede6b653e41e42ca8bd7a8cb7e92a4f235e9b56cbad6200f8dc313c644e9a7d8d1dbc2b7e988da93bc0765499701bca6bc86d8fe3d",
|
||||
"88ec4ff5a1b0ffdabfe62d068286c851ee64c428883e56f32af14b59756d5846be9d46e5a777c4c22f2dec9596a1a44ed3aa75f1fb0231923cbd4ab59f1f9c47",
|
||||
"892bf89bd3a008a7d982de0d278349e654c713efacf965e88e46a12398375cd8502711378e378c39c33b2f995f47799760c6e6e05948b93c0d2b9fd427854ffc",
|
||||
"894bd433b4b06514195a604961c871649e108d210a41d5cbebe76f78cb6270b7708d1c59d6cb88807f882bb154be1c9058a753e2b6a95c3f4ac9e27a02036f12",
|
||||
"896256329fbeb5b8116349c31d8a39a7d36d5f970d48558e1db5417d611e240e4dbf473f6e49137f7aa6116394b7deabb0bbec4a014896cdc9484ee91458117d",
|
||||
"897c3401b4a35d8fad5966bf8c4dce6d94837c76e46e8131a8bd70527f6e1d8c9f59053d0a56425d7dee71939280ac3c38df14e976f613cb906d7187d6141297",
|
||||
"89e6f6a865ab743936a9b29d53b67bf4b68660ccbe834d4a11fa9011edb535e3b7b4d7a238c84971d4cb5f06ef7398bfecc4f2b786200fee67d7307f242da565",
|
||||
"8b707d4f8f32c80709d880fb257873915033c7d5bce9589a80ca9437618262c55dbe8eec2e8c82469bc335a84a8f16f89afcc53b8329dced5407a513927efc4c",
|
||||
"8e98d00c5d110856943461cd85305b0a817abb457c2afc8e89edb32e502d0060081c8e667d9fceb63a2f8efbcb6e193e9b0231afcc05ecb2303d7f742f304396",
|
||||
"90bb7c91281bb6625a0700c1ee2f3cee488cb9c1864ccf2e24699c5d957b1b7b686574d11acb37572fdf18a15f272fd44009b6cfce9b6cdf9025dd5002869d30",
|
||||
"911c26cf828319df5123a9cf38641704961a6b894aa6ee2b0d13409996a93d89f4868b91e0eb1efea907a70a14cf3a3bd8935033aeb03bd8555f2dea857a48bd",
|
||||
"92862e616dce7469bafc507ab8fbb47bb6f5ca8b96b05e9fbf39a259d1d4c4cac97b0472f713db2e5fbff0d3e587e7b34bedff80cc2a70c446becf9b488370d9",
|
||||
"92a15e2cbd0c89fbce36b05e3b282255097bb5492fc11f0d2b0a08c4311621a41ec35df201de51523b62189a3b44bb3eb1cbdf64e80f4a543d0d9f9a99f9bd3f",
|
||||
"97679def7032179662646816abc12f74fc693fb02c43675a2d5407e58be6dacd1eb483d1bf46f66c5103de3a649211c29e1127dca473e13b02dcd5e7df719cc0",
|
||||
"97e29f9edfe712b059203de5af236569e2c41fae8cddfc7b486204d6e30c411ec605c757fa5a1a151646092bd5d71de18a5f2d8b6fb74b9a28a7c7226a4f641b",
|
||||
"9b5f9ebc961424b8a6b7def59a86ad6fa6e45fc9ad5fb251c15d4d09202e6d3f63bb37b80faa4fdfe3997182079988d78556a9ebf7db535951a1e3cba0c0f6c9",
|
||||
"9b847b5006ea1b47dc0ec366d09aec4a67aec747c55af554c094994fe8c8625b09cfd5322958c816bea74f725abb3d1403f2e9336007db3b257949401b1fef03",
|
||||
"9cdf4ab91c8ef6148dfd724f2a2c644cc00df44f5eea5035e760ac59ec79078ffaf3d97a9c5a9747c04895a3dc666339f82cd17e40095b9fd055df3ff07da6d6",
|
||||
"9fb127fbe4659174b52ef61778a705cc5a96c8f136445bd28c10ac79398ab9ea291852b627e285e828fe37aa23d05b13cf202f3f0cb4c272aba94dd1806802c9",
|
||||
"9fe6ba948da2f4e4aa0e1b0d3e1aec1f093335f8097d7dd3d6b5217cd539f5c41735ef7a615d8210f2e6b777b7198f151264ade172be7dbbf5d442bf91843e8f",
|
||||
"a1afb2eae49546bf59e6f9a1968287add54dd6e336ec795037090a435f736b6d8ba2076e05e27034979a8caaeafce05fc6d9d5541f4e5a4321e64106dabd1549",
|
||||
"a2b06b54679145e65ce10a8356285efcadbacd41be817d2e0858ddba59e638775b79f76cb9e4ac5859627b67ebf227c55b51cc48e6d0d7ef41c9845d96ded68a",
|
||||
"a2b503bc78bd0b68fcdc3e3b68e3c68cf3da8d2d48d91f09313c7cdc11b43dd4d4de3a8a2c4b526809adf9879427c4818db72cffdbc2f0015a9fa5ade83bd400",
|
||||
"a359c15185b6d2a402dacfb7b3dc2e3ce5fd80a1add892b2dcf8e23bebe57f16680eebf7a851c3a870d3ba9932c4e42bad937c4676931d849c62f021ba812860",
|
||||
"a35dea43a67cbd18b705cf2b28114652686eb409c1ae1e56c04256fd902ba9ab52c7343bb8b162522bf3442da42431246644432c70f819ba8617a723abcce836",
|
||||
"a4b1eb406ff2c349437a5634148365fd0eecad5a264036e3af171d0f6769a7129590a0a3e09592038baa8bc1292af2bbdbfb74e3b1a685844e263532a87baef6",
|
||||
"a4b8fa949865e0aa45147a27f0a034a26e34745d624dcf0603dd25fd1ce279eaf2d073a853d67e6432447d5e06708d71a9cddac0d2918876d2d3498af3ae0892",
|
||||
"a6bc3c6beffd4335228c3b4857365215f0c4bc5197a5b0eca95334af33dba19ebf8d513f6c75359d7cb678b051d96579d73ebdaa5b6906e3b6eab35005bee13e",
|
||||
"aabfdbdc21150ac70f9bb1a34f4d7de570a72ba7e1afd8c08d64c85e00c12e6ca1f2ffe60dbd16a871987bd7aa47182baf57e7f68daaf0bac7fc3b907c8ef4c0",
|
||||
"af506d21ee140905c125e61c19d04599354fe84fe211502c9c766951387f6ff79e80db0658392af173f37ef7c92d7815ac9214e8ba4c6ade3e7a7ef014e5cc08",
|
||||
"b04a1d5062f2921f39074e4f5c00675269195834a0a9c0bcce10b1427bf8a6499bdd7d8c6717f220aa4ec9f590bb04b290673018528a60dd819ce9798b0a33b4",
|
||||
"b0a3c5148905a3e7e18c773684026e4ccd8811c3c62f6fcfc23135686a8db9c2caa6de7b14775e29b7cdeb360ae25ea626381c7689ade892c3fb72f82e2daa89",
|
||||
"b1a0cbb91459433ff6de32b189783a734c2ada4c04d7dd164de449ce79c749d382aff10aa9ed7b4449af3390da51585123ef88719ecf7cfea9c24223023a23dd",
|
||||
"b47a96b489f4dd851c364dee278699905f1ed933ba3a98a6660160463a8decef830bb91ac0a1b4f9b742df2dfbdc9625ec27133a69f6cf3cb81ed298183764e7",
|
||||
"b52e493e5049e86223385546f3407f5924fd75311a0a11af38423b7bb7c02c3f085fd1d9188515c7b43c59fbf168c23126456dba98dc9c0d29b7a3edee159015",
|
||||
"b5419f6ea89dc32431a7671df1ebf934647bba5b27db54235fb1e47d691b70c3160bf8019653d5faad616b169adfea5d8e7077e9820d9294144354133d45ee16",
|
||||
"b570d19edbda421e0975056b5fdf4cefbc3825b840aacaa337567ec1aa151a81633eb645a86c8c1c22b23e7f916c60c20cb115de29b670511fb9413611e8cc3b",
|
||||
"baff09432cffceac6ecd395a8ed5c947fdafe6c30c1c0f3b83c4ddfaa2ca9d57b21876153ac2b82067d7d37bc6789e2f68558f1f26fbcd53fd6a500124f80655",
|
||||
"bc0b544f1c13cc1d0fe15b0eab96e89e6d4dfc8919de1fb757ef97a7d5de9efff5e520def5a8471b75480fd49d410d222ed9332089bd527946c74070e8ad1934",
|
||||
"bc3886ba087d3fd637a4fa85adf33170e23b369c0c6eca422ddb26c73c04ae467e2b95ed73bead19013001af65bf2cf0d686a6e702b458a77068184c8b17dfb5",
|
||||
"bd609b6955a6a35a5580a6e19e173b02fa6d4ed880b6cba8fb5d2fd91309dc753326a824a47ee6148b3d6a01b9b49ce7c1122b1e30b6ea181bd257bbc38c2940",
|
||||
"bet",
|
||||
"bf64f21ff129fae4bf3ff795c39df0a4a6dc40ece1d71747a913dd84af2e4cac4e1b84213e23cb1397b3299f26b1b6302a3cdbd41da8baea2505febd6e1803ce",
|
||||
"bfe8d96ce71f9cce7bd16b5282041c66773405f1a11f4f0c8d3b6e81646f262bdac0cb3ee8f54e13175ba9ed7da38407e8a9aeff20972271f0c62c0b19f8b644",
|
||||
"c088e98f02d33581ac0d79c37a101e4273e0750a5691cffd96a09c38742617dae948cbc4affbff4ece1d611e44ea5539f0597eef33ef39f7f0e3ec2a5edf75eb",
|
||||
"c1bafe50eb70a1b65188fac549c6bbe7f641b672fbe9fd08cb64ed1f176efbedeca88f5c295d508e2dbf9b495fe0040bbbfbc4776af0d6cad6576a997db3e4cc",
|
||||
"c1e166044d7731207ce8b838011eae84814857a8ddb63b8a393d2497bdcd7e96d045aa229a7978533646cf9f9ea99a619943599d47a1558073690601fb486ad5",
|
||||
"c21ee36416076c1929dd93af7e936e371d4fe263662a2deb8fd6b0e5cd5b8cd86437b4afb2faa8813bd7b8689c7f56a63729a1e666684d8303f469faad669e54",
|
||||
"c3f1018eb1f7b5e5c0210deab309d06d3e8e9e15ec7dd41d2dbcf863c39e36955b2034fe44af5a4983285b8fc6c0d92b092f95383f8989c1d75a40a4bcdd3d83",
|
||||
"c4d740361d5f6bdcf408abc029d8adceb35f06c332c46fc290d187d96562992a8d6caa562eaa21643c346d44c9e706cd991ba986e53cfe37b41a0e048d14d6e0",
|
||||
"c5d4c712e06053bc35bc6cef173daaaae7fd47db5ac812b95a2f0f08374432ffeaa2b49a0f10cb60f38405d2459489df0e43fb73b48bdb6caadcb4405915c33e",
|
||||
"c777d3358a0aff067b64f254ac462fa223a1650af20ce2af341de610eebbb55a128a1dc43c91da7a1844848b5920b7dd5c5e0a1e8651d6442a2418709dad8c87",
|
||||
"c96c4e97012d25add2fe69513a5b1f941fc36c837737780c443203c72182b808a129982ebd64aaffb8eda4ba3c8787fd98ca55fd33f060f63917567446417574",
|
||||
"c9774fae6c0a30b456a21005abf026799f370a12fbcbc098e81bac2456955320ec6e712f1d6f9d59a50d615f81c6284785292180364598987a7990ae83c0f0c9",
|
||||
"cb557116fa7b3b6da35024b539795d9e255c111c06edbf0e77ba728dd352353182c96918c649fb9327bbb4fea1bb25affcade9b5069676b191611062941356e7",
|
||||
"cc3b30ba0f733abfe64667838f620c4f542db4665fa68e4d945b75ac0d2c435e6529e6541c4ac8ca18dec753b10e3a5c4614cfbc658dc951ab6cab357e6ef363",
|
||||
"ccd85fb40538f948396a4c2bf381ea591927a7cde9330ecab883cad5bd59db56f0c983362f9d0a8e88a67d3f2bae2182bc8ea94b4e3adc721c782ca5c801e2af",
|
||||
"cd1226e73c8275de15f2edb3744a413277fc76a4ebb7842fb743215c14b405b96c4e64bc8324feafe58937da218a1b0aeb9451d5781672ced1ad68c31eb54ba1",
|
||||
"cdc5df38351edbdf7afdb3aaf0b4f53253cedbf3f43d662548a432f86389505fd6f2f64f51f951355f4fcfc5718a98dd782e1472246556c87f0bbaacebb38cb8",
|
||||
"ce31dc5dfa61834e3ab67925ff5f24baf04b4aee6e35cd8ffa524f87b2e2e094999f85c68cc7a1c0e9b19016d050c1755406d02f7116ef85afa355c65a9a5855",
|
||||
"cefa12e7ac99a5d11df487ab6521837b11165246d1c3cdb2108770532cb1429c2dcba5262a4dbd9a37686bb76ad1c48ddecf473d807c2e552534b24bb78ee30d",
|
||||
"cf5b890eb74b4ac647d011a989a92a413c23c0db580c87057fc5afba2d83dd861f2a8640fb952381d090328d6278dbe56713d516020ce95cfb6d4fecf63b89e7",
|
||||
"cf8c583b1282449a97b72e317e56d5a4d1432e5420148a21ba8fd8bb2a172c7832379f30cd6582bd6674b548deb8517c8915c5c4b423bd3e73903f71b8862380",
|
||||
"d0cd44fcdae652efb0dd428cd1b8f1911e6eb2ca3469a1f2d6f9faf97a9d05e30f28387dfb81bfb4c97eba64187a0c047c85bf06998ccaec58781f3982626bb6",
|
||||
"d15bfc3278de168872744ebec8fc7a07678bd04b7557e89749eeedc7087fe0a36cb8b094e978e979d67feba46c4a2741f0fab18010796b5ff436836a5fc67e88",
|
||||
"d3d64ab67746fcb7b4a37d6b6b80c9d4b11afd9e15d81a60b3fde53e4f99267a63b50cfb2184c7c84c9f0dd4345c0d929160a7df52698a82603c112e0bf8ab8e",
|
||||
"d647b73602a3a0c1b06f282a612c29eefc6a7e372bc8af212a41f481843c23a975b41ca402f06ecb7dc660d4dd22a814f7659b48da7dfd28c02a319032394da1",
|
||||
"d98d182c89b465adb0fdd1cc5c2bcb22b81fcc4eb941977b667de22927ccc9a7876033008118957d803c83afb95595986bcc076e77483dd55dca91ce253ba010",
|
||||
"dafd89491990553f5e22021f96344b3bc92be6a419c919ba78860876f226e51e668dbabcb11cf9500f3bd05582b387907ea007b5e8f37c78fb71ac819b9bc20e",
|
||||
"db6932752693a1b2e7ef9af4adbf6fc8a299f21965ff9ff52b141563a471600df9308a89562af7b664b7fe14da134b4f44beafcf910f8794652e16dc475796b5",
|
||||
"db8eecd1ac9b20918e31f04331e46007f367c1f6365c9c4abb7af70eb1d2ea12174375fb95d1d11c46e03c81976de6d68f70693e1ea7f2096aecf06307a17d29",
|
||||
"dbe9efadf636bdd82f3ac2b3710653421e7cefca01b74012824b73f7368469fc4dd7e788b047920d4b3b7e4a486c732872ca11a75a89d1323337191ac2bc899e",
|
||||
"dca1aa77f919ef1000d91291ba68800340332c299e3c4c6bcabb41fd2305f36db353211d6ac691c37d16889e3c3ffc1efb7c621e8040cb77b7249e264af44768",
|
||||
"dee065b956b99b10db4763759d64c41791af1a7e77f1864f90a2b0847a12633dcf9bc108db7eaf73cc8d0e750f5c37383a56cd77cc2276d3960104c6bebe6346",
|
||||
"df52eb2c24a6c35b977a1d0fab336ab5c21cd84f78f685d5f0bea9ebaa7c078c0ca69717455e29f17bcd9282a1af9cbbe2d3e608c62cecf868419da081e2d810",
|
||||
"dfc8721858bd56b846473eb6123420a2735fc69cd77a92a1d2c623c51eab3ac664d61a890d305c6fe77ec48f2759248744e9d56689f6c22317bbaa316c848fbc",
|
||||
"e249989b0c397ac03583594a3911c9e9222ccce620921170bb39b8ab6fdaf136b164f3c9fcd8b4f750fc469c9cd69f144c2ca2dd918fcb778148fbf9751a869b",
|
||||
"e364856fe22a5c80cc8d13ee445473a0eb7204bad6972fc4c116ea1551b50da43a01577ef0487f2afb7aaee4b4155d61b1ff2b83dc502363929de76af0226818",
|
||||
"e37d85b60af58cc03e9b36e09dee5e8308368f44f91b28455e7f645a13fe29902e7f7d594ccb600e02caf4202a05d15477d4ea5191c7b97038ea06d73ce93c33",
|
||||
"e41b679ec1446821bf0a80fa7003fb90ac66b79d09c00dccf702a1b254f9ea85a68b0643ecd81d999413d5814b06b9998afd9876062067f51a63747533921d08",
|
||||
"e61565e75d632748413d51997cabb00613355f0a94cf6b2f929fdfa351490d2afc9bad72c7fa67595d9d9c7adc9454e8d1b05527991a17258424b14ec4e9a1d5",
|
||||
"e6a64aa839b95caeb74d810677a33b747e23907213719dd9706af7364b4cacf204b09f9b26686a70cd6d416a6b590f87103cc683685529968ea0edd75107f649",
|
||||
"e751d2f83310990aedc7392b54f827afac1873e9f8861e625814a8d1d15776160864742d557796d07a612479b2886287b417273cc9f7718889216c2ec3b3b7ed",
|
||||
"e7847a5814b865bc043600fee7d810b9815da389278fdfdd412114ab8f87b1536f4b63f3f7c3d3eeb097486abc152043eefdae6fd12c2f8743dac1cb668ab136",
|
||||
"e82ba384934ac4780595261c43eeceb3df29a047087870f5da13c7acae782b4b97857b98852ce235428b4bc24aa4ddcdcd7297acf683421201eff1c3fbcab84e",
|
||||
"e9da05b6d590dcf94addabd168c543be41a2275ddf44f6f44db1e3698f0bf7dd67f2e93b66679e0a0d42a2f39f3bc6a389f0e6b362431d0cb197fe46f9dd6606",
|
||||
"ea8456e0667e1cce6273cb333b7e6982f9aa0f260c7c103e04eb0076a73fe3497070b1a8f0c45b097dc3100a30254095a1c63e9514367655e9a378344ed25d1d",
|
||||
"eb415e110eaff48bdbc03b5ab719f64593f222b4a1d872b552e4fc48d338e532d1954f76e94813e44a6cd030425b4076cd7b9bf388b870a31344545d092dfa1a",
|
||||
"eb5078bcb64f9595d6d8589ad60502b2870f16942fbb4cbd2483c817c7fa460faeda90b82bcf531ac96be8c1d6825953ab85ab0bd46ea477615e71e50386ffe0",
|
||||
"ed5c9e654bfb28e9d4131b3805597ee9fa14fe72c6e2a6d503ec2e47faf396bbfa15ce49e6fe83bd97da1d441138545d388a329ae888c1f1ea44fc62996d787f",
|
||||
"edba91511ccf8ab01de2e2cef34c47d8430f8a2f4c62cd66c42ecb62da52d396e909aef7da067eedc58e1eb58a1fc3697939371e6a36c931af5987a50509854b",
|
||||
"ee216d2d13cba1a951445b061771ab0c97eb3c250003e16008debd85fa0317a508f923db79c796dc29de18c83baad5b15651f80db1cf7aee854e6da28853b742",
|
||||
"eli",
|
||||
"eva",
|
||||
"f1812dbb566edaa2ac92121641e5ae504d647bec835a02ed5d7c7f90424d0e8fe202846a599c2f74c49ec9b86181d3d6c50ac0688baa9b4c28608d592becdfdb",
|
||||
"f26a63e5171e2935e13015fbb755f04bff87fb1767ac91aa3481b9fe13b54cc75f772b41dfdc634829dd9b44c7b08798ed114046ef981d454889c41d4f6408d9",
|
||||
"f2f359ea473c07070fd1e50d2fcfe3dc4f624f01678c35920b079660b2d5b9c1743259ae6129992cd3b99ec2cdda94a45e8710888488b196c6cd9c853e86e454",
|
||||
"f35ce011f75fc01d153a94339aad24ae4fd5f181af55916a5ca0153cd5220ed199b98459eb88e9f4f3a4f8fbcf5c272bafdca35ddaca0827c4b480f79e7db1d6",
|
||||
"f4df4a067fec667827901fb55acb16acc4650f24eeaa588af1a103e5009e9166f753c7cd313d0d3dec79abb82a13c43fd2059db5ac0307b78369ca318001c4e7",
|
||||
"f56a47b89ebd2d22f869e2260b55f70d7ae0d499fc3fd4dbcb0e6e507f12513f29c004b9426e428696df0d434e4ad467f143bc620a2f661a54608de9e2c265d5",
|
||||
"f61bdd3abb2d03f07e33bfb0b9fba46069468cefd9eda04e77cdc5c2f13a417716d3e60ca91c39de1a480b72112ef0e6143e927fad45410ee252cfce9034f0b1",
|
||||
"f62196a11f50362b35eb1ed830b03c18bb187e4d07014a3d1b238756fe836f254afa923184170512a0c6d990032b4b1edb25dd2b74f6fc15f6ef6b51b6f82dd4",
|
||||
"f8e4bf2dd4f93dd473b055ebf2dfa6081703014fddca40a0efb6bd5dcb702244a30a2d3edcd6597ea4118c20258da575a0bc69a895356519d8400a5ad3b2bf58",
|
||||
"f980d152d5c14c6e7557f13fe26305ed0105dbb23177d455372e5529a5d3333e203070e87352d985a136f5ce3976a16b97070a4343fb4cb9d0760d9bcd5c7677",
|
||||
"fa8641fb64db60e7299f070f6497678dee0bfdeefcc22a51ca328da34b33fdd6c31b882d97fc32cfcdeee4fcb72b05d7eae43b10b531db161b7e8dcfc2775ebf",
|
||||
"fdde8cdd2fa5689aec75121e3c0778ca8c37238fd6a64706d85a4156d7735c482f1db74cefd023e94587b64a56d4a06e3b7fecf5c85978a4c777c9eaa5c633fd",
|
||||
"jan",
|
||||
"mar",
|
||||
"ona",
|
||||
"pau",
|
||||
"pep",
|
||||
"pol",
|
||||
"teo"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"fa": {
|
||||
"custom": {
|
||||
"glow-tts": {
|
||||
"id": "tts_models/fa/custom/glow-tts"
|
||||
}
|
||||
}
|
||||
},
|
||||
"bn": {
|
||||
"custom": {
|
||||
"vits-male": {
|
||||
"id": "tts_models/bn/custom/vits-male"
|
||||
},
|
||||
"vits-female": {
|
||||
"id": "tts_models/bn/custom/vits-female"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
206
public/scripts/extensions/tts/cosyvoice.js
Normal file
206
public/scripts/extensions/tts/cosyvoice.js
Normal file
@@ -0,0 +1,206 @@
|
||||
import { saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export { CosyVoiceProvider };
|
||||
|
||||
class CosyVoiceProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
ready = false;
|
||||
voices = [];
|
||||
separator = '. ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
/**
|
||||
* Perform any text processing before passing to TTS engine.
|
||||
* @param {string} text Input text
|
||||
* @returns {string} Processed text
|
||||
*/
|
||||
processText(text) {
|
||||
return text;
|
||||
}
|
||||
|
||||
audioFormats = ['wav', 'ogg', 'silk', 'mp3', 'flac'];
|
||||
|
||||
languageLabels = {
|
||||
'Auto': 'auto',
|
||||
};
|
||||
|
||||
langKey2LangCode = {
|
||||
'zh': 'zh-CN',
|
||||
'en': 'en-US',
|
||||
'ja': 'ja-JP',
|
||||
'ko': 'ko-KR',
|
||||
};
|
||||
|
||||
modelTypes = {
|
||||
CosyVoice: 'CosyVoice',
|
||||
};
|
||||
|
||||
defaultSettings = {
|
||||
provider_endpoint: 'http://localhost:9880',
|
||||
format: 'wav',
|
||||
lang: 'auto',
|
||||
streaming: false,
|
||||
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
|
||||
<label for="tts_endpoint">Provider Endpoint:</label>
|
||||
<input id="tts_endpoint" type="text" class="text_pole" maxlength="250" height="300" value="${this.defaultSettings.provider_endpoint}"/>
|
||||
<span>Windows users Use <a target="_blank" href="https://github.com/v3ucn/CosyVoice_For_Windows">CosyVoice_For_Windows</a>(Unofficial).</span><br/>
|
||||
<span>Macos Users Use <a target="_blank" href="https://github.com/v3ucn/CosyVoice_for_MacOs">CosyVoice_for_MacOs</a>(Unofficial).</span><br/>
|
||||
<br/>
|
||||
|
||||
`;
|
||||
|
||||
return html;
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Used when provider settings are updated from UI
|
||||
this.settings.provider_endpoint = $('#tts_endpoint').val();
|
||||
|
||||
|
||||
saveTtsProviderSettings();
|
||||
this.changeTTSSettings();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Pupulate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
console.debug(`Ignoring non-user-configurable setting: ${key}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Set initial values from the settings
|
||||
$('#tts_endpoint').val(this.settings.provider_endpoint);
|
||||
|
||||
|
||||
await this.checkReady();
|
||||
|
||||
console.info('ITS: Settings loaded');
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady() {
|
||||
await Promise.allSettled([this.fetchTtsVoiceObjects(), this.changeTTSSettings()]);
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return;
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
async getVoice(voiceName) {
|
||||
|
||||
|
||||
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
|
||||
|
||||
const match = this.voices.filter(
|
||||
v => v.name == voiceName,
|
||||
)[0];
|
||||
console.log(match);
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
const response = await fetch(`${this.settings.provider_endpoint}/speakers`);
|
||||
console.info(response);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.json()}`);
|
||||
}
|
||||
const responseJson = await response.json();
|
||||
|
||||
|
||||
this.voices = responseJson;
|
||||
|
||||
return responseJson;
|
||||
}
|
||||
|
||||
// Each time a parameter is changed, we change the configuration
|
||||
async changeTTSSettings() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch TTS generation from the API.
|
||||
* @param {string} inputText Text to generate TTS for
|
||||
* @param {string} voiceId Voice ID to use (model_type&speaker_id))
|
||||
* @returns {Promise<Response|string>} Fetch response
|
||||
*/
|
||||
async fetchTtsGeneration(inputText, voiceId, lang = null, forceNoStreaming = false) {
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
|
||||
const streaming = this.settings.streaming;
|
||||
|
||||
const params = {
|
||||
text: inputText,
|
||||
speaker: voiceId,
|
||||
};
|
||||
|
||||
if (streaming) {
|
||||
params['streaming'] = 1;
|
||||
}
|
||||
|
||||
const url = `${this.settings.provider_endpoint}/`;
|
||||
|
||||
const response = await fetch(
|
||||
url,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(params), // Convert parameter objects to JSON strings
|
||||
},
|
||||
);
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Interface not used
|
||||
async fetchTtsFromHistory(history_item_id) {
|
||||
return Promise.resolve(history_item_id);
|
||||
}
|
||||
}
|
270
public/scripts/extensions/tts/edge.js
Normal file
270
public/scripts/extensions/tts/edge.js
Normal file
@@ -0,0 +1,270 @@
|
||||
import { getRequestHeaders } from '../../../script.js';
|
||||
import { getApiUrl } from '../../extensions.js';
|
||||
import { doExtrasFetch, modules } from '../../extensions.js';
|
||||
import { getPreviewString } from './index.js';
|
||||
import { saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export { EdgeTtsProvider };
|
||||
|
||||
const EDGE_TTS_PROVIDER = {
|
||||
extras: 'extras',
|
||||
plugin: 'plugin',
|
||||
};
|
||||
|
||||
class EdgeTtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
voices = [];
|
||||
separator = ' . ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
defaultSettings = {
|
||||
voiceMap: {},
|
||||
rate: 0,
|
||||
provider: EDGE_TTS_PROVIDER.extras,
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `Microsoft Edge TTS<br>
|
||||
<label for="edge_tts_provider">Provider</label>
|
||||
<select id="edge_tts_provider">
|
||||
<option value="${EDGE_TTS_PROVIDER.extras}">Extras</option>
|
||||
<option value="${EDGE_TTS_PROVIDER.plugin}">Plugin</option>
|
||||
</select>
|
||||
<label for="edge_tts_rate">Rate: <span id="edge_tts_rate_output"></span></label>
|
||||
<input id="edge_tts_rate" type="range" value="${this.defaultSettings.rate}" min="-100" max="100" step="1" />
|
||||
`;
|
||||
return html;
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
this.settings.rate = Number($('#edge_tts_rate').val());
|
||||
$('#edge_tts_rate_output').text(this.settings.rate);
|
||||
this.settings.provider = String($('#edge_tts_provider').val());
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Pupulate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
$('#edge_tts_rate').val(this.settings.rate || 0);
|
||||
$('#edge_tts_rate_output').text(this.settings.rate || 0);
|
||||
$('#edge_tts_rate').on('input', () => { this.onSettingsChange(); });
|
||||
$('#edge_tts_provider').val(this.settings.provider || EDGE_TTS_PROVIDER.extras);
|
||||
$('#edge_tts_provider').on('change', () => { this.onSettingsChange(); });
|
||||
await this.checkReady();
|
||||
|
||||
console.debug('EdgeTTS: Settings loaded');
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform a simple readiness check by trying to fetch voiceIds
|
||||
*/
|
||||
async checkReady() {
|
||||
await this.throwIfModuleMissing();
|
||||
await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return;
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
/**
|
||||
* Get a voice from the TTS provider.
|
||||
* @param {string} voiceName Voice name to get
|
||||
* @returns {Promise<Object>} Voice object
|
||||
*/
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
voice => voice.name == voiceName,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate TTS for a given text.
|
||||
* @param {string} text Text to generate TTS for
|
||||
* @param {string} voiceId Voice ID to use
|
||||
* @returns {Promise<Response>} Fetch response
|
||||
*/
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
await this.throwIfModuleMissing();
|
||||
|
||||
const url = this.getVoicesUrl();
|
||||
const response = await this.doFetch(url);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
let responseJson = await response.json();
|
||||
responseJson = responseJson
|
||||
.sort((a, b) => a.Locale.localeCompare(b.Locale) || a.ShortName.localeCompare(b.ShortName))
|
||||
.map(x => ({ name: x.ShortName, voice_id: x.ShortName, preview_url: false, lang: x.Locale }));
|
||||
return responseJson;
|
||||
}
|
||||
|
||||
/**
|
||||
* Preview TTS for a given voice ID.
|
||||
* @param {string} id Voice ID
|
||||
*/
|
||||
async previewTtsVoice(id) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.currentTime = 0;
|
||||
const voice = await this.getVoice(id);
|
||||
const text = getPreviewString(voice.lang);
|
||||
const response = await this.fetchTtsGeneration(text, id);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
this.audioElement.src = url;
|
||||
this.audioElement.play();
|
||||
this.audioElement.onended = () => URL.revokeObjectURL(url);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch TTS generation from the API.
|
||||
* @param {string} inputText Text to generate TTS for
|
||||
* @param {string} voiceId Voice ID to use
|
||||
* @returns {Promise<Response>} Fetch response
|
||||
*/
|
||||
async fetchTtsGeneration(inputText, voiceId) {
|
||||
await this.throwIfModuleMissing();
|
||||
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
const url = this.getGenerateUrl();
|
||||
const response = await this.doFetch(url,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
'text': inputText,
|
||||
'voice': voiceId,
|
||||
'rate': Number(this.settings.rate),
|
||||
}),
|
||||
},
|
||||
);
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform a fetch request using the configured provider.
|
||||
* @param {string} url URL string
|
||||
* @param {any} options Request options
|
||||
* @returns {Promise<Response>} Fetch response
|
||||
*/
|
||||
doFetch(url, options) {
|
||||
if (this.settings.provider === EDGE_TTS_PROVIDER.extras) {
|
||||
return doExtrasFetch(url, options);
|
||||
}
|
||||
|
||||
if (this.settings.provider === EDGE_TTS_PROVIDER.plugin) {
|
||||
return fetch(url, options);
|
||||
}
|
||||
|
||||
throw new Error('Invalid TTS Provider');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the URL for the TTS generation endpoint.
|
||||
* @returns {string} URL string
|
||||
*/
|
||||
getGenerateUrl() {
|
||||
if (this.settings.provider === EDGE_TTS_PROVIDER.extras) {
|
||||
const url = new URL(getApiUrl());
|
||||
url.pathname = '/api/edge-tts/generate';
|
||||
return url.toString();
|
||||
}
|
||||
|
||||
if (this.settings.provider === EDGE_TTS_PROVIDER.plugin) {
|
||||
return '/api/plugins/edge-tts/generate';
|
||||
}
|
||||
|
||||
throw new Error('Invalid TTS Provider');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the URL for the TTS voices endpoint.
|
||||
* @returns {string} URL object or string
|
||||
*/
|
||||
getVoicesUrl() {
|
||||
if (this.settings.provider === EDGE_TTS_PROVIDER.extras) {
|
||||
const url = new URL(getApiUrl());
|
||||
url.pathname = '/api/edge-tts/list';
|
||||
return url.toString();
|
||||
}
|
||||
|
||||
if (this.settings.provider === EDGE_TTS_PROVIDER.plugin) {
|
||||
return '/api/plugins/edge-tts/list';
|
||||
}
|
||||
|
||||
throw new Error('Invalid TTS Provider');
|
||||
}
|
||||
|
||||
async throwIfModuleMissing() {
|
||||
if (this.settings.provider === EDGE_TTS_PROVIDER.extras && !modules.includes('edge-tts')) {
|
||||
const message = 'Edge TTS module not loaded. Add edge-tts to enable-modules and restart the Extras API.';
|
||||
// toastr.error(message)
|
||||
throw new Error(message);
|
||||
}
|
||||
|
||||
if (this.settings.provider === EDGE_TTS_PROVIDER.plugin && !this.isPluginAvailable()) {
|
||||
const message = 'Edge TTS Server plugin not loaded. Install it from https://github.com/ChuQuadrant/ChuQuadrant-EdgeTTS-Plugin and restart the ChuQuadrant server.';
|
||||
// toastr.error(message)
|
||||
throw new Error(message);
|
||||
}
|
||||
}
|
||||
|
||||
async isPluginAvailable() {
|
||||
try {
|
||||
const result = await fetch('/api/plugins/edge-tts/probe', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
});
|
||||
return result.ok;
|
||||
} catch (e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
390
public/scripts/extensions/tts/elevenlabs.js
Normal file
390
public/scripts/extensions/tts/elevenlabs.js
Normal file
@@ -0,0 +1,390 @@
|
||||
import { saveTtsProviderSettings } from './index.js';
|
||||
export { ElevenLabsTtsProvider };
|
||||
|
||||
class ElevenLabsTtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
voices = [];
|
||||
separator = ' ... ... ... ';
|
||||
|
||||
|
||||
defaultSettings = {
|
||||
stability: 0.75,
|
||||
similarity_boost: 0.75,
|
||||
style_exaggeration: 0.00,
|
||||
speaker_boost: true,
|
||||
apiKey: '',
|
||||
model: 'eleven_monolingual_v1',
|
||||
voiceMap: {},
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<div class="elevenlabs_tts_settings">
|
||||
<label for="elevenlabs_tts_api_key">API Key</label>
|
||||
<input id="elevenlabs_tts_api_key" type="text" class="text_pole" placeholder="<API Key>"/>
|
||||
<label for="elevenlabs_tts_model">Model</label>
|
||||
<select id="elevenlabs_tts_model" class="text_pole">
|
||||
<option value="eleven_monolingual_v1">English v1</option>
|
||||
<option value="eleven_multilingual_v1">Multilingual v1</option>
|
||||
<option value="eleven_multilingual_v2">Multilingual v2</option>
|
||||
<option value="eleven_turbo_v2">Turbo v2</option>
|
||||
</select>
|
||||
<input id="eleven_labs_connect" class="menu_button" type="button" value="Connect" />
|
||||
<label for="elevenlabs_tts_stability">Stability: <span id="elevenlabs_tts_stability_output"></span></label>
|
||||
<input id="elevenlabs_tts_stability" type="range" value="${this.defaultSettings.stability}" min="0" max="1" step="0.01" />
|
||||
<label for="elevenlabs_tts_similarity_boost">Similarity Boost: <span id="elevenlabs_tts_similarity_boost_output"></span></label>
|
||||
<input id="elevenlabs_tts_similarity_boost" type="range" value="${this.defaultSettings.similarity_boost}" min="0" max="1" step="0.01" />
|
||||
<div id="elevenlabs_tts_v2_options" style="display: none;">
|
||||
<label for="elevenlabs_tts_style_exaggeration">Style Exaggeration: <span id="elevenlabs_tts_style_exaggeration_output"></span></label>
|
||||
<input id="elevenlabs_tts_style_exaggeration" type="range" value="${this.defaultSettings.style_exaggeration}" min="0" max="1" step="0.01" />
|
||||
<label for="elevenlabs_tts_speaker_boost">Speaker Boost:</label>
|
||||
<input id="elevenlabs_tts_speaker_boost" style="display: inline-grid" type="checkbox" />
|
||||
</div>
|
||||
<hr>
|
||||
<div id="elevenlabs_tts_voice_cloning">
|
||||
<span>Instant Voice Cloning</span><br>
|
||||
<input id="elevenlabs_tts_voice_cloning_name" type="text" class="text_pole" placeholder="Voice Name"/>
|
||||
<input id="elevenlabs_tts_voice_cloning_description" type="text" class="text_pole" placeholder="Voice Description"/>
|
||||
<input id="elevenlabs_tts_voice_cloning_labels" type="text" class="text_pole" placeholder="Labels"/>
|
||||
<div class="menu_button menu_button_icon" id="upload_audio_file">
|
||||
<i class="fa-solid fa-file-import"></i>
|
||||
<span>Upload Audio Files</span>
|
||||
</div>
|
||||
<input id="elevenlabs_tts_audio_files" type="file" name="audio_files" accept="audio/*" style="display: none;" multiple>
|
||||
<div id="elevenlabs_tts_selected_files_list"></div>
|
||||
<input id="elevenlabs_tts_clone_voice_button" class="menu_button menu_button_icon" type="button" value="Clone Voice">
|
||||
</div>
|
||||
<hr>
|
||||
</div>
|
||||
`;
|
||||
return html;
|
||||
}
|
||||
|
||||
shouldInvolveExtendedSettings() {
|
||||
return this.settings.model === 'eleven_multilingual_v2';
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Update dynamically
|
||||
this.settings.stability = $('#elevenlabs_tts_stability').val();
|
||||
this.settings.similarity_boost = $('#elevenlabs_tts_similarity_boost').val();
|
||||
this.settings.style_exaggeration = $('#elevenlabs_tts_style_exaggeration').val();
|
||||
this.settings.speaker_boost = $('#elevenlabs_tts_speaker_boost').is(':checked');
|
||||
this.settings.model = $('#elevenlabs_tts_model').find(':selected').val();
|
||||
$('#elevenlabs_tts_stability_output').text(Math.round(this.settings.stability * 100) + '%');
|
||||
$('#elevenlabs_tts_similarity_boost_output').text(Math.round(this.settings.similarity_boost * 100) + '%');
|
||||
$('#elevenlabs_tts_style_exaggeration_output').text(Math.round(this.settings.style_exaggeration * 100) + '%');
|
||||
$('#elevenlabs_tts_v2_options').toggle(this.shouldInvolveExtendedSettings());
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Pupulate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
// Migrate old settings
|
||||
if (settings['multilingual'] !== undefined) {
|
||||
settings.model = settings.multilingual ? 'eleven_multilingual_v1' : 'eleven_monolingual_v1';
|
||||
delete settings['multilingual'];
|
||||
}
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
$('#elevenlabs_tts_stability').val(this.settings.stability);
|
||||
$('#elevenlabs_tts_similarity_boost').val(this.settings.similarity_boost);
|
||||
$('#elevenlabs_tts_style_exaggeration').val(this.settings.style_exaggeration);
|
||||
$('#elevenlabs_tts_speaker_boost').prop('checked', this.settings.speaker_boost);
|
||||
$('#elevenlabs_tts_api_key').val(this.settings.apiKey);
|
||||
$('#elevenlabs_tts_model').val(this.settings.model);
|
||||
$('#eleven_labs_connect').on('click', () => { this.onConnectClick(); });
|
||||
$('#elevenlabs_tts_similarity_boost').on('input', this.onSettingsChange.bind(this));
|
||||
$('#elevenlabs_tts_stability').on('input', this.onSettingsChange.bind(this));
|
||||
$('#elevenlabs_tts_style_exaggeration').on('input', this.onSettingsChange.bind(this));
|
||||
$('#elevenlabs_tts_speaker_boost').on('change', this.onSettingsChange.bind(this));
|
||||
$('#elevenlabs_tts_model').on('change', this.onSettingsChange.bind(this));
|
||||
$('#elevenlabs_tts_stability_output').text(Math.round(this.settings.stability * 100) + '%');
|
||||
$('#elevenlabs_tts_similarity_boost_output').text(Math.round(this.settings.similarity_boost * 100) + '%');
|
||||
$('#elevenlabs_tts_style_exaggeration_output').text(Math.round(this.settings.style_exaggeration * 100) + '%');
|
||||
$('#elevenlabs_tts_v2_options').toggle(this.shouldInvolveExtendedSettings());
|
||||
try {
|
||||
await this.checkReady();
|
||||
console.debug('ElevenLabs: Settings loaded');
|
||||
} catch {
|
||||
console.debug('ElevenLabs: Settings loaded, but not ready');
|
||||
}
|
||||
|
||||
this.setupVoiceCloningMenu();
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady() {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
}
|
||||
|
||||
async onConnectClick() {
|
||||
// Update on Apply click
|
||||
return await this.updateApiKey().catch((error) => {
|
||||
toastr.error(`ElevenLabs: ${error}`);
|
||||
});
|
||||
}
|
||||
|
||||
setupVoiceCloningMenu() {
|
||||
const audioFilesInput = document.getElementById('elevenlabs_tts_audio_files');
|
||||
const selectedFilesListElement = document.getElementById('elevenlabs_tts_selected_files_list');
|
||||
const cloneVoiceButton = document.getElementById('elevenlabs_tts_clone_voice_button');
|
||||
const uploadAudioFileButton = document.getElementById('upload_audio_file');
|
||||
const voiceCloningNameInput = document.getElementById('elevenlabs_tts_voice_cloning_name');
|
||||
const voiceCloningDescriptionInput = document.getElementById('elevenlabs_tts_voice_cloning_description');
|
||||
const voiceCloningLabelsInput = document.getElementById('elevenlabs_tts_voice_cloning_labels');
|
||||
|
||||
const updateCloneVoiceButtonVisibility = () => {
|
||||
cloneVoiceButton.style.display = audioFilesInput.files.length > 0 ? 'inline-block' : 'none';
|
||||
};
|
||||
|
||||
const clearSelectedFiles = () => {
|
||||
audioFilesInput.value = '';
|
||||
selectedFilesListElement.innerHTML = '';
|
||||
updateCloneVoiceButtonVisibility();
|
||||
};
|
||||
|
||||
uploadAudioFileButton.addEventListener('click', () => {
|
||||
audioFilesInput.click();
|
||||
});
|
||||
|
||||
audioFilesInput.addEventListener('change', () => {
|
||||
selectedFilesListElement.innerHTML = '';
|
||||
for (const file of audioFilesInput.files) {
|
||||
const listItem = document.createElement('div');
|
||||
listItem.textContent = file.name;
|
||||
selectedFilesListElement.appendChild(listItem);
|
||||
}
|
||||
updateCloneVoiceButtonVisibility();
|
||||
});
|
||||
|
||||
cloneVoiceButton.addEventListener('click', async () => {
|
||||
const voiceName = voiceCloningNameInput.value.trim();
|
||||
const voiceDescription = voiceCloningDescriptionInput.value.trim();
|
||||
const voiceLabels = voiceCloningLabelsInput.value.trim();
|
||||
|
||||
if (!voiceName) {
|
||||
toastr.error('Please provide a name for the cloned voice.');
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await this.addVoice(voiceName, voiceDescription, voiceLabels);
|
||||
toastr.success('Voice cloned successfully. Hit reload to see the new voice in the voice listing.');
|
||||
clearSelectedFiles();
|
||||
voiceCloningNameInput.value = '';
|
||||
voiceCloningDescriptionInput.value = '';
|
||||
voiceCloningLabelsInput.value = '';
|
||||
} catch (error) {
|
||||
toastr.error(`Failed to clone voice: ${error.message}`);
|
||||
}
|
||||
});
|
||||
|
||||
updateCloneVoiceButtonVisibility();
|
||||
}
|
||||
|
||||
async updateApiKey() {
|
||||
// Using this call to validate API key
|
||||
this.settings.apiKey = $('#elevenlabs_tts_api_key').val();
|
||||
|
||||
await this.fetchTtsVoiceObjects().catch(error => {
|
||||
throw 'TTS API key validation failed';
|
||||
});
|
||||
console.debug(`Saved new API_KEY: ${this.settings.apiKey}`);
|
||||
$('#tts_status').text('');
|
||||
this.onSettingsChange();
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
elevenVoice => elevenVoice.name == voiceName,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found in ElevenLabs account`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const historyId = await this.findTtsGenerationInHistory(text, voiceId);
|
||||
|
||||
let response;
|
||||
if (historyId) {
|
||||
console.debug(`Found existing TTS generation with id ${historyId}`);
|
||||
response = await this.fetchTtsFromHistory(historyId);
|
||||
} else {
|
||||
console.debug('No existing TTS generation found, requesting new generation');
|
||||
response = await this.fetchTtsGeneration(text, voiceId);
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
//###################//
|
||||
// Helper Functions //
|
||||
//###################//
|
||||
|
||||
async findTtsGenerationInHistory(message, voiceId) {
|
||||
const ttsHistory = await this.fetchTtsHistory();
|
||||
for (const history of ttsHistory) {
|
||||
const text = history.text;
|
||||
const itemId = history.history_item_id;
|
||||
if (message === text && history.voice_id == voiceId) {
|
||||
console.info(`Existing TTS history item ${itemId} found: ${text} `);
|
||||
return itemId;
|
||||
}
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
const headers = {
|
||||
'xi-api-key': this.settings.apiKey,
|
||||
};
|
||||
const response = await fetch('https://api.elevenlabs.io/v1/voices', {
|
||||
headers: headers,
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
const responseJson = await response.json();
|
||||
return responseJson.voices;
|
||||
}
|
||||
|
||||
async fetchTtsVoiceSettings() {
|
||||
const headers = {
|
||||
'xi-api-key': this.settings.apiKey,
|
||||
};
|
||||
const response = await fetch(
|
||||
'https://api.elevenlabs.io/v1/voices/settings/default',
|
||||
{
|
||||
headers: headers,
|
||||
},
|
||||
);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
return response.json();
|
||||
}
|
||||
|
||||
async fetchTtsGeneration(text, voiceId) {
|
||||
let model = this.settings.model ?? 'eleven_monolingual_v1';
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}, model ${model}`);
|
||||
const request = {
|
||||
model_id: model,
|
||||
text: text,
|
||||
voice_settings: {
|
||||
stability: Number(this.settings.stability),
|
||||
similarity_boost: Number(this.settings.similarity_boost),
|
||||
},
|
||||
};
|
||||
if (this.shouldInvolveExtendedSettings()) {
|
||||
request.voice_settings.style = Number(this.settings.style_exaggeration);
|
||||
request.voice_settings.use_speaker_boost = Boolean(this.settings.speaker_boost);
|
||||
}
|
||||
const response = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'xi-api-key': this.settings.apiKey,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(request),
|
||||
});
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
async fetchTtsFromHistory(history_item_id) {
|
||||
console.info(`Fetched existing TTS with history_item_id ${history_item_id}`);
|
||||
const response = await fetch(
|
||||
`https://api.elevenlabs.io/v1/history/${history_item_id}/audio`,
|
||||
{
|
||||
headers: {
|
||||
'xi-api-key': this.settings.apiKey,
|
||||
},
|
||||
},
|
||||
);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
async fetchTtsHistory() {
|
||||
const headers = {
|
||||
'xi-api-key': this.settings.apiKey,
|
||||
};
|
||||
const response = await fetch('https://api.elevenlabs.io/v1/history', {
|
||||
headers: headers,
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
const responseJson = await response.json();
|
||||
return responseJson.history;
|
||||
}
|
||||
|
||||
async addVoice(name, description, labels) {
|
||||
const selected_files = document.querySelectorAll('input[type="file"][name="audio_files"]');
|
||||
const formData = new FormData();
|
||||
|
||||
formData.append('name', name);
|
||||
formData.append('description', description);
|
||||
formData.append('labels', labels);
|
||||
|
||||
for (const file of selected_files) {
|
||||
if (file.files.length > 0) {
|
||||
formData.append('files', file.files[0]);
|
||||
}
|
||||
}
|
||||
|
||||
const response = await fetch('https://api.elevenlabs.io/v1/voices/add', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'xi-api-key': this.settings.apiKey,
|
||||
},
|
||||
body: formData,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
return await response.json();
|
||||
}
|
||||
}
|
140
public/scripts/extensions/tts/google-translate.js
Normal file
140
public/scripts/extensions/tts/google-translate.js
Normal file
@@ -0,0 +1,140 @@
|
||||
import { getRequestHeaders } from '../../../script.js';
|
||||
import { splitRecursive } from '../../utils.js';
|
||||
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
||||
export { GoogleTranslateTtsProvider };
|
||||
|
||||
class GoogleTranslateTtsProvider {
|
||||
settings;
|
||||
voices = [];
|
||||
separator = ' . ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
defaultSettings = {
|
||||
region: '',
|
||||
voiceMap: {},
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
return '';
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
this.voices = [];
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Populate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
await this.checkReady();
|
||||
console.debug('Google Translate TTS: Settings loaded');
|
||||
} catch {
|
||||
console.debug('Google Translate TTS: Settings loaded, but not ready');
|
||||
}
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady() {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
await this.checkReady();
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
voice => voice.name == voiceName || voice.voice_id == voiceName,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
const response = await fetch('/api/google/list-voices', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
let responseJson = await response.json();
|
||||
responseJson = Object.entries(responseJson)
|
||||
.sort((a, b) => a[1].localeCompare(b[1]))
|
||||
.map(x => ({ name: x[1], voice_id: x[0], preview_url: false, lang: x[0] }));
|
||||
return responseJson;
|
||||
}
|
||||
|
||||
/**
|
||||
* Preview TTS for a given voice ID.
|
||||
* @param {string} id Voice ID
|
||||
*/
|
||||
async previewTtsVoice(id) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.currentTime = 0;
|
||||
const voice = await this.getVoice(id);
|
||||
const text = getPreviewString(voice.lang);
|
||||
const response = await this.fetchTtsGeneration(text, id);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
this.audioElement.src = url;
|
||||
this.audioElement.play();
|
||||
this.audioElement.onended = () => URL.revokeObjectURL(url);
|
||||
}
|
||||
|
||||
async fetchTtsGeneration(text, voiceId) {
|
||||
const response = await fetch('/api/google/generate-voice', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
text: splitRecursive(text, 200),
|
||||
voice: voiceId,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
}
|
224
public/scripts/extensions/tts/gpt-sovits-v2.js
Normal file
224
public/scripts/extensions/tts/gpt-sovits-v2.js
Normal file
@@ -0,0 +1,224 @@
|
||||
import { saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export { GptSovitsV2Provider };
|
||||
|
||||
class GptSovitsV2Provider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
ready = false;
|
||||
voices = [];
|
||||
separator = '. ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
/**
|
||||
* Perform any text processing before passing to TTS engine.
|
||||
* @param {string} text Input text
|
||||
* @returns {string} Processed text
|
||||
*/
|
||||
processText(text) {
|
||||
return text;
|
||||
}
|
||||
|
||||
audioFormats = ['wav', 'ogg', 'silk', 'mp3', 'flac'];
|
||||
|
||||
languageLabels = {
|
||||
'Auto': 'auto',
|
||||
};
|
||||
|
||||
langKey2LangCode = {
|
||||
'zh': 'zh-CN',
|
||||
'en': 'en-US',
|
||||
'ja': 'ja-JP',
|
||||
'ko': 'ko-KR',
|
||||
};
|
||||
|
||||
|
||||
defaultSettings = {
|
||||
provider_endpoint: 'http://localhost:9880',
|
||||
format: 'wav',
|
||||
lang: 'auto',
|
||||
streaming: false,
|
||||
text_lang: 'zh',
|
||||
prompt_lang: 'zh',
|
||||
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
|
||||
<label for="tts_endpoint">Provider Endpoint:</label>
|
||||
<input id="tts_endpoint" type="text" class="text_pole" maxlength="250" height="300" value="${this.defaultSettings.provider_endpoint}"/>
|
||||
<span>Use <a target="_blank" href="https://github.com/v3ucn/GPT-SoVITS-V2">GPT-SoVITS-V2</a>(Unofficial).</span><br/>
|
||||
<label for="text_lang">Text Lang(Inference text language):</label>
|
||||
<input id="text_lang" type="text" class="text_pole" maxlength="250" height="300" value="${this.defaultSettings.text_lang}"/>
|
||||
<label for="text_lang">Prompt Lang(Reference audio text language):</label>
|
||||
<input id="prompt_lang" type="text" class="text_pole" maxlength="250" height="300" value="${this.defaultSettings.prompt_lang}"/>
|
||||
<br/>
|
||||
|
||||
`;
|
||||
|
||||
return html;
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Used when provider settings are updated from UI
|
||||
this.settings.provider_endpoint = $('#tts_endpoint').val();
|
||||
this.settings.text_lang = $('#text_lang').val();
|
||||
this.settings.prompt_lang = $('#prompt_lang').val();
|
||||
|
||||
|
||||
saveTtsProviderSettings();
|
||||
this.changeTTSSettings();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Pupulate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
console.debug(`Ignoring non-user-configurable setting: ${key}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Set initial values from the settings
|
||||
$('#tts_endpoint').val(this.settings.provider_endpoint);
|
||||
$('#text_lang').val(this.settings.text_lang);
|
||||
$('#prompt_lang').val(this.settings.prompt_lang);
|
||||
|
||||
|
||||
await this.checkReady();
|
||||
|
||||
console.info('ITS: Settings loaded');
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady() {
|
||||
await Promise.allSettled([this.fetchTtsVoiceObjects(), this.changeTTSSettings()]);
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return;
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
async getVoice(voiceName) {
|
||||
|
||||
|
||||
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
|
||||
|
||||
const match = this.voices.filter(
|
||||
v => v.name == voiceName,
|
||||
)[0];
|
||||
console.log(match);
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
const response = await fetch(`${this.settings.provider_endpoint}/speakers`);
|
||||
console.info(response);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.json()}`);
|
||||
}
|
||||
const responseJson = await response.json();
|
||||
|
||||
|
||||
this.voices = responseJson;
|
||||
|
||||
return responseJson;
|
||||
}
|
||||
|
||||
// Each time a parameter is changed, we change the configuration
|
||||
async changeTTSSettings() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch TTS generation from the API.
|
||||
* @param {string} inputText Text to generate TTS for
|
||||
* @param {string} voiceId Voice ID to use (model_type&speaker_id))
|
||||
* @returns {Promise<Response|string>} Fetch response
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
async fetchTtsGeneration(inputText, voiceId, lang = null, forceNoStreaming = false) {
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
|
||||
function replaceSpeaker(text) {
|
||||
return text.replace(/\[.*?\]/gu, '');
|
||||
}
|
||||
|
||||
let prompt_text = replaceSpeaker(voiceId);
|
||||
|
||||
const params = {
|
||||
text: inputText,
|
||||
prompt_text: prompt_text,
|
||||
ref_audio_path: './参考音频/' + voiceId + '.wav',
|
||||
text_lang: this.settings.text_lang,
|
||||
prompt_lang: this.settings.prompt_lang,
|
||||
text_split_method: 'cut5',
|
||||
batch_size: 1,
|
||||
media_type: 'ogg',
|
||||
streaming_mode: 'true',
|
||||
};
|
||||
|
||||
|
||||
const url = `${this.settings.provider_endpoint}/`;
|
||||
|
||||
const response = await fetch(
|
||||
url,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(params), // Convert parameter objects to JSON strings
|
||||
},
|
||||
);
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Interface not used
|
||||
async fetchTtsFromHistory(history_item_id) {
|
||||
return Promise.resolve(history_item_id);
|
||||
}
|
||||
}
|
267
public/scripts/extensions/tts/gsvi.js
Normal file
267
public/scripts/extensions/tts/gsvi.js
Normal file
@@ -0,0 +1,267 @@
|
||||
|
||||
import { saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export { GSVITtsProvider };
|
||||
|
||||
class GSVITtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
ready = false;
|
||||
separator = '. ';
|
||||
|
||||
characterList = {};
|
||||
voices = [];
|
||||
/**
|
||||
* Perform any text processing before passing to TTS engine.
|
||||
* @param {string} text Input text
|
||||
* @returns {string} Processed text
|
||||
*/
|
||||
processText(text) {
|
||||
text = text.replace('<br>', '\n'); // Replace <br> with newline
|
||||
return text;
|
||||
}
|
||||
|
||||
languageLabels = {
|
||||
'Multilingual': '多语种混合',
|
||||
'Chinese': '中文',
|
||||
'English': '英文',
|
||||
'Japanese': '日文',
|
||||
'Chinese-English': '中英混合',
|
||||
'Japanese-English': '日英混合',
|
||||
};
|
||||
defaultSettings = {
|
||||
provider_endpoint: 'http://127.0.0.1:5000',
|
||||
|
||||
language: '多语种混合',
|
||||
|
||||
cha_name: '',
|
||||
character_emotion: 'default',
|
||||
|
||||
speed: 1,
|
||||
|
||||
top_k: 6,
|
||||
top_p: 0.85,
|
||||
temperature: 0.75,
|
||||
batch_size: 10,
|
||||
|
||||
stream: false,
|
||||
stream_chunk_size: 100,
|
||||
};
|
||||
|
||||
// Added new methods to obtain characters and emotions
|
||||
async fetchCharacterList() {
|
||||
const response = await fetch(this.settings.provider_endpoint + '/character_list');
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
const characterList = await response.json();
|
||||
this.characterList = characterList;
|
||||
this.voices = Object.keys(characterList);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<label for="gsvi_api_language">Text Language</label>
|
||||
<select id="gsvi_api_language">`;
|
||||
|
||||
for (let language in this.languageLabels) {
|
||||
if (this.languageLabels[language] == this.settings?.language) {
|
||||
html += `<option value="${this.languageLabels[language]}" selected="selected">${language}</option>`;
|
||||
continue;
|
||||
}
|
||||
|
||||
html += `<option value="${this.languageLabels[language]}">${language}</option>`;
|
||||
}
|
||||
|
||||
html += `
|
||||
</select>
|
||||
<label>GSVI Settings:</label><br/>
|
||||
<label for="gsvi_tts_endpoint">Provider Endpoint:</label>
|
||||
<input id="gsvi_tts_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/>
|
||||
|
||||
|
||||
<label for="gsvi_speed">Speed: <span id="gsvi_tts_speed_output">${this.defaultSettings.speed}</span></label>
|
||||
<input id="gsvi_speed" type="range" value="${this.defaultSettings.speed}" min="0.5" max="2" step="0.01" />
|
||||
|
||||
<label for="gsvi_top_k">Top K: <span id="gsvi_top_k_output">${this.defaultSettings.top_k}</span></label>
|
||||
<input id="gsvi_top_k" type="range" value="${this.defaultSettings.top_k}" min="0" max="100" step="1" />
|
||||
|
||||
<label for="gsvi_top_p">Top P: <span id="gsvi_top_p_output">${this.defaultSettings.top_p}</span></label>
|
||||
<input id="gsvi_top_p" type="range" value="${this.defaultSettings.top_p}" min="0" max="1" step="0.01" />
|
||||
|
||||
<label for="gsvi_temperature">Temperature: <span id="gsvi_tts_temperature_output">${this.defaultSettings.temperature}</span></label>
|
||||
<input id="gsvi_temperature" type="range" value="${this.defaultSettings.temperature}" min="0.01" max="1" step="0.01" />
|
||||
|
||||
<label for="gsvi_batch_size">Batch Size: <span id="gsvi_batch_size_output">${this.defaultSettings.batch_size}</span></label>
|
||||
<input id="gsvi_batch_size" type="range" value="${this.defaultSettings.batch_size}" min="1" max="35" step="1" />
|
||||
|
||||
<label for="gsvi_tts_streaming" class="checkbox_label">
|
||||
<input id="gsvi_tts_streaming" type="checkbox" ${this.defaultSettings.stream ? 'checked' : ''}/>
|
||||
<span>Streaming</span>
|
||||
</label>
|
||||
|
||||
<label for="gsvi_stream_chunk_size">Stream Chunk Size: <span id="gsvi_stream_chunk_size_output">${this.defaultSettings.stream_chunk_size}</span></label>
|
||||
<input id="gsvi_stream_chunk_size" type="range" value="${this.defaultSettings.stream_chunk_size}" min="100" max="400" step="1" />
|
||||
<p>
|
||||
For more information, visit the
|
||||
<a href="https://github.com/X-T-E-R/GPT-SoVITS-Inference" target="_blank">GSVI project page</a>.
|
||||
</p>
|
||||
`;
|
||||
|
||||
return html;
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Update provider settings based on input fields
|
||||
this.settings.provider_endpoint = $('#gsvi_tts_endpoint').val();
|
||||
this.settings.language = $('#gsvi_api_language').val();
|
||||
|
||||
|
||||
// Update the rest of TTS settings based on input fields
|
||||
this.settings.speed = parseFloat($('#gsvi_speed').val());
|
||||
this.settings.temperature = parseFloat($('#gsvi_temperature').val());
|
||||
this.settings.top_k = parseInt($('#gsvi_top_k').val(), 10);
|
||||
this.settings.top_p = parseFloat($('#gsvi_top_p').val());
|
||||
this.settings.batch_size = parseInt($('#gsvi_batch_size').val(), 10);
|
||||
this.settings.stream = $('#gsvi_tts_streaming').is(':checked');
|
||||
this.settings.stream_chunk_size = parseInt($('#gsvi_stream_chunk_size').val(), 10);
|
||||
|
||||
// Update UI to reflect changes
|
||||
|
||||
$('#gsvi_tts_speed_output').text(this.settings.speed);
|
||||
$('#gsvi_tts_temperature_output').text(this.settings.temperature);
|
||||
$('#gsvi_top_k_output').text(this.settings.top_k);
|
||||
$('#gsvi_top_p_output').text(this.settings.top_p);
|
||||
$('#gsvi_stream_chunk_size_output').text(this.settings.stream_chunk_size);
|
||||
$('#gsvi_batch_size_output').text(this.settings.batch_size);
|
||||
|
||||
|
||||
|
||||
|
||||
// Persist settings changes
|
||||
saveTtsProviderSettings();
|
||||
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Populate Provider UI given input settings
|
||||
if (Object.keys(settings).length === 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = { ...this.defaultSettings, ...settings };
|
||||
|
||||
// Fetch character and emotion list
|
||||
// Set initial values from the settings
|
||||
$('#gsvi_tts_endpoint').val(this.settings.provider_endpoint);
|
||||
$('#gsvi_api_language').val(this.settings.language);
|
||||
|
||||
$('#gsvi_speed').val(this.settings.speed);
|
||||
$('#gsvi_temperature').val(this.settings.temperature);
|
||||
$('#gsvi_top_k').val(this.settings.top_k);
|
||||
$('#gsvi_top_p').val(this.settings.top_p);
|
||||
$('#gsvi_batch_size').val(this.settings.batch_size);
|
||||
$('#gsvi_tts_streaming').prop('checked', this.settings.stream);
|
||||
$('#gsvi_stream_chunk_size').val(this.settings.stream_chunk_size);
|
||||
|
||||
// Update UI to reflect initial settings
|
||||
$('#gsvi_tts_speed_output').text(this.settings.speed);
|
||||
$('#gsvi_tts_temperature_output').text(this.settings.temperature);
|
||||
$('#gsvi_top_k_output').text(this.settings.top_k);
|
||||
$('#gsvi_top_p_output').text(this.settings.top_p);
|
||||
$('#gsvi_stream_chunk_size_output').text(this.settings.stream_chunk_size);
|
||||
|
||||
// Register event listeners to update settings on user interaction
|
||||
// (Similar to before, ensure event listeners for character and emotion selection are included)
|
||||
// Register input/change event listeners to update settings on user interaction
|
||||
$('#gsvi_tts_endpoint').on('input', () => { this.onSettingsChange(); });
|
||||
$('#gsvi_api_language').on('change', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#gsvi_speed').on('input', () => { this.onSettingsChange(); });
|
||||
$('#gsvi_temperature').on('input', () => { this.onSettingsChange(); });
|
||||
$('#gsvi_top_k').on('input', () => { this.onSettingsChange(); });
|
||||
$('#gsvi_top_p').on('input', () => { this.onSettingsChange(); });
|
||||
$('#gsvi_batch_size').on('input', () => { this.onSettingsChange(); });
|
||||
$('#gsvi_tts_streaming').on('change', () => { this.onSettingsChange(); });
|
||||
$('#gsvi_stream_chunk_size').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
await this.checkReady();
|
||||
console.debug('GSVI: Settings loaded');
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady() {
|
||||
await Promise.allSettled([this.fetchCharacterList()]);
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return;
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.fetchCharacterList();
|
||||
}
|
||||
if (!this.voices.includes(voiceName)) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return { name: voiceName, voice_id: voiceName, preview_url: false, lang: 'zh-CN' };
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
if (this.voices.length == 0) {
|
||||
await this.fetchCharacterList();
|
||||
}
|
||||
console.log(this.voices);
|
||||
const voices = this.voices.map(x => ({ name: x, voice_id: x, preview_url: false, lang: 'zh-CN' }));
|
||||
return voices;
|
||||
}
|
||||
|
||||
|
||||
async fetchTtsGeneration(inputText, voiceId) {
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
|
||||
|
||||
const params = new URLSearchParams();
|
||||
params.append('text', inputText);
|
||||
params.append('cha_name', voiceId);
|
||||
params.append('text_language', this.settings.language);
|
||||
params.append('batch_size', this.settings.batch_size.toString());
|
||||
params.append('speed', this.settings.speed.toString());
|
||||
params.append('top_k', this.settings.top_k.toString());
|
||||
params.append('top_p', this.settings.top_p.toString());
|
||||
params.append('temperature', this.settings.temperature.toString());
|
||||
params.append('stream', this.settings.stream.toString());
|
||||
|
||||
|
||||
return `${this.settings.provider_endpoint}/tts?${params.toString()}`;
|
||||
|
||||
}
|
||||
|
||||
// Interface not used by GSVI TTS
|
||||
async fetchTtsFromHistory(history_item_id) {
|
||||
return Promise.resolve(history_item_id);
|
||||
}
|
||||
|
||||
}
|
1252
public/scripts/extensions/tts/index.js
Normal file
1252
public/scripts/extensions/tts/index.js
Normal file
File diff suppressed because it is too large
Load Diff
113
public/scripts/extensions/tts/kokoro-worker.js
Normal file
113
public/scripts/extensions/tts/kokoro-worker.js
Normal file
@@ -0,0 +1,113 @@
|
||||
// kokoro-worker.js
|
||||
/** @type {import('./lib/kokoro.web.js').KokoroTTS} */
|
||||
let tts = null;
|
||||
/** @type {boolean} */
|
||||
let ready = false;
|
||||
/** @type {string[]} */
|
||||
let voices = [];
|
||||
|
||||
// Handle messages from the main thread
|
||||
self.onmessage = async function(e) {
|
||||
const { action, data } = e.data;
|
||||
|
||||
switch (action) {
|
||||
case 'initialize':
|
||||
try {
|
||||
const result = await initializeTts(data);
|
||||
self.postMessage({
|
||||
action: 'initialized',
|
||||
success: result,
|
||||
voices,
|
||||
});
|
||||
} catch (error) {
|
||||
self.postMessage({
|
||||
action: 'initialized',
|
||||
success: false,
|
||||
error: error.message,
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
case 'generateTts':
|
||||
try {
|
||||
const audioBlob = await generateTts(data.text, data.voice, data.speakingRate);
|
||||
const blobUrl = URL.createObjectURL(audioBlob);
|
||||
self.postMessage({
|
||||
action: 'generatedTts',
|
||||
success: true,
|
||||
blobUrl,
|
||||
requestId: data.requestId,
|
||||
});
|
||||
} catch (error) {
|
||||
self.postMessage({
|
||||
action: 'generatedTts',
|
||||
success: false,
|
||||
error: error.message,
|
||||
requestId: data.requestId,
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
case 'checkReady':
|
||||
self.postMessage({ action: 'readyStatus', ready });
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
// Initialize the TTS engine
|
||||
async function initializeTts(settings) {
|
||||
try {
|
||||
const { KokoroTTS } = await import('./lib/kokoro.web.js');
|
||||
|
||||
console.log('Worker: Initializing Kokoro TTS with settings:', {
|
||||
modelId: settings.modelId,
|
||||
dtype: settings.dtype,
|
||||
device: settings.device,
|
||||
});
|
||||
|
||||
// Create TTS instance
|
||||
tts = await KokoroTTS.from_pretrained(settings.modelId, {
|
||||
dtype: settings.dtype,
|
||||
device: settings.device,
|
||||
});
|
||||
|
||||
// Get available voices
|
||||
voices = Object.keys(tts.voices);
|
||||
|
||||
// Check if generate method exists
|
||||
if (typeof tts.generate !== 'function') {
|
||||
throw new Error('TTS instance does not have generate method');
|
||||
}
|
||||
|
||||
console.log('Worker: TTS initialized successfully');
|
||||
ready = true;
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error('Worker: Kokoro TTS initialization failed:', error);
|
||||
ready = false;
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
// Generate TTS audio
|
||||
async function generateTts(text, voiceId, speakingRate) {
|
||||
if (!ready || !tts) {
|
||||
throw new Error('TTS engine not initialized');
|
||||
}
|
||||
|
||||
if (text.trim().length === 0) {
|
||||
throw new Error('Empty text');
|
||||
}
|
||||
|
||||
try {
|
||||
const audio = await tts.generate(text, {
|
||||
voice: voiceId,
|
||||
speed: speakingRate || 1.0,
|
||||
});
|
||||
|
||||
return audio.toBlob();
|
||||
} catch (error) {
|
||||
console.error('Worker: TTS generation failed:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
352
public/scripts/extensions/tts/kokoro.js
Normal file
352
public/scripts/extensions/tts/kokoro.js
Normal file
@@ -0,0 +1,352 @@
|
||||
import { debounce_timeout } from '../../constants.js';
|
||||
import { debounceAsync, splitRecursive } from '../../utils.js';
|
||||
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export class KokoroTtsProvider {
|
||||
constructor() {
|
||||
this.settings = {
|
||||
modelId: 'onnx-community/Kokoro-82M-v1.0-ONNX',
|
||||
dtype: 'q8',
|
||||
device: 'wasm',
|
||||
voiceMap: {},
|
||||
defaultVoice: 'af_heart',
|
||||
speakingRate: 1.0,
|
||||
};
|
||||
this.ready = false;
|
||||
this.voices = [
|
||||
'af_heart',
|
||||
'af_alloy',
|
||||
'af_aoede',
|
||||
'af_bella',
|
||||
'af_jessica',
|
||||
'af_kore',
|
||||
'af_nicole',
|
||||
'af_nova',
|
||||
'af_river',
|
||||
'af_sarah',
|
||||
'af_sky',
|
||||
'am_adam',
|
||||
'am_echo',
|
||||
'am_eric',
|
||||
'am_fenrir',
|
||||
'am_liam',
|
||||
'am_michael',
|
||||
'am_onyx',
|
||||
'am_puck',
|
||||
'am_santa',
|
||||
'bf_emma',
|
||||
'bf_isabella',
|
||||
'bm_george',
|
||||
'bm_lewis',
|
||||
'bf_alice',
|
||||
'bf_lily',
|
||||
'bm_daniel',
|
||||
'bm_fable',
|
||||
];
|
||||
this.worker = null;
|
||||
this.separator = ' ... ... ... ';
|
||||
this.pendingRequests = new Map();
|
||||
this.nextRequestId = 1;
|
||||
|
||||
// Update display values immediately but only reinitialize TTS after a delay
|
||||
this.initTtsDebounced = debounceAsync(this.initializeWorker.bind(this), debounce_timeout.relaxed);
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform any text processing before passing to TTS engine.
|
||||
* @param {string} text Input text
|
||||
* @returns {string} Processed text
|
||||
*/
|
||||
processText(text) {
|
||||
// TILDE!
|
||||
text = text.replace(/~/g, '.');
|
||||
return text;
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
if (settings.modelId !== undefined) this.settings.modelId = settings.modelId;
|
||||
if (settings.dtype !== undefined) this.settings.dtype = settings.dtype;
|
||||
if (settings.device !== undefined) this.settings.device = settings.device;
|
||||
if (settings.voiceMap !== undefined) this.settings.voiceMap = settings.voiceMap;
|
||||
if (settings.defaultVoice !== undefined) this.settings.defaultVoice = settings.defaultVoice;
|
||||
if (settings.speakingRate !== undefined) this.settings.speakingRate = settings.speakingRate;
|
||||
|
||||
$('#kokoro_model_id').val(this.settings.modelId).on('input', this.onSettingsChange.bind(this));
|
||||
$('#kokoro_dtype').val(this.settings.dtype).on('change', this.onSettingsChange.bind(this));
|
||||
$('#kokoro_device').val(this.settings.device).on('change', this.onSettingsChange.bind(this));
|
||||
$('#kokoro_speaking_rate').val(this.settings.speakingRate).on('input', this.onSettingsChange.bind(this));
|
||||
$('#kokoro_speaking_rate_output').text(this.settings.speakingRate + 'x');
|
||||
}
|
||||
|
||||
initializeWorker() {
|
||||
return new Promise((resolve, reject) => {
|
||||
try {
|
||||
// Terminate the existing worker if it exists
|
||||
if (this.worker) {
|
||||
this.worker.terminate();
|
||||
$('#kokoro_status_text').text('Initializing...').removeAttr('style');
|
||||
}
|
||||
|
||||
// Create a new worker
|
||||
this.worker = new Worker(new URL('./kokoro-worker.js', import.meta.url), { type: 'module' });
|
||||
|
||||
// Set up message handling
|
||||
this.worker.onmessage = this.handleWorkerMessage.bind(this);
|
||||
|
||||
// Initialize the worker with the current settings
|
||||
this.worker.postMessage({
|
||||
action: 'initialize',
|
||||
data: {
|
||||
modelId: this.settings.modelId,
|
||||
dtype: this.settings.dtype,
|
||||
device: this.settings.device,
|
||||
},
|
||||
});
|
||||
|
||||
// Create a promise that will resolve when initialization completes
|
||||
const initPromise = new Promise((initResolve, initReject) => {
|
||||
const timeoutId = setTimeout(() => {
|
||||
initReject(new Error('Worker initialization timed out'));
|
||||
}, 600000); // 600 second timeout
|
||||
|
||||
this.pendingRequests.set('initialization', {
|
||||
resolve: (result) => {
|
||||
clearTimeout(timeoutId);
|
||||
initResolve(result);
|
||||
},
|
||||
reject: (error) => {
|
||||
clearTimeout(timeoutId);
|
||||
initReject(error);
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
// Resolve the outer promise when initialization completes
|
||||
initPromise.then(success => {
|
||||
this.ready = success;
|
||||
this.updateStatusDisplay();
|
||||
resolve(success);
|
||||
}).catch(error => {
|
||||
console.error('Worker initialization failed:', error);
|
||||
this.ready = false;
|
||||
this.updateStatusDisplay();
|
||||
reject(error);
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Failed to create worker:', error);
|
||||
this.ready = false;
|
||||
this.updateStatusDisplay();
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
handleWorkerMessage(event) {
|
||||
const { action, success, ready, error, requestId, blobUrl } = event.data;
|
||||
|
||||
switch (action) {
|
||||
case 'initialized': {
|
||||
const initRequest = this.pendingRequests.get('initialization');
|
||||
if (initRequest) {
|
||||
if (success) {
|
||||
initRequest.resolve(true);
|
||||
} else {
|
||||
initRequest.reject(new Error(error || 'Initialization failed'));
|
||||
}
|
||||
this.pendingRequests.delete('initialization');
|
||||
}
|
||||
} break;
|
||||
case 'generatedTts': {
|
||||
const request = this.pendingRequests.get(requestId);
|
||||
if (request) {
|
||||
if (success) {
|
||||
fetch(blobUrl).then(response => response.blob()).then(audioBlob => {
|
||||
// Clean up the blob URL
|
||||
URL.revokeObjectURL(blobUrl);
|
||||
|
||||
request.resolve(new Response(audioBlob, {
|
||||
headers: {
|
||||
'Content-Type': 'audio/wav',
|
||||
},
|
||||
}));
|
||||
}).catch(error => {
|
||||
request.reject(new Error('Failed to fetch TTS audio blob: ' + error));
|
||||
});
|
||||
} else {
|
||||
request.reject(new Error(error || 'TTS generation failed'));
|
||||
}
|
||||
this.pendingRequests.delete(requestId);
|
||||
}
|
||||
} break;
|
||||
case 'readyStatus':
|
||||
this.ready = ready;
|
||||
this.updateStatusDisplay();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
updateStatusDisplay() {
|
||||
const statusText = this.ready ? 'Ready' : 'Failed';
|
||||
const statusColor = this.ready ? 'green' : 'red';
|
||||
$('#kokoro_status_text').text(statusText).css('color', statusColor);
|
||||
}
|
||||
|
||||
async checkReady() {
|
||||
if (!this.worker) {
|
||||
return await this.initializeWorker();
|
||||
}
|
||||
|
||||
this.worker.postMessage({ action: 'checkReady' });
|
||||
return this.ready;
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return await this.initializeWorker();
|
||||
}
|
||||
|
||||
get settingsHtml() {
|
||||
return `
|
||||
<div class="kokoro_tts_settings">
|
||||
<label for="kokoro_model_id">Model ID:</label>
|
||||
<input id="kokoro_model_id" type="text" class="text_pole" value="${this.settings.modelId}" />
|
||||
|
||||
<label for="kokoro_dtype">Data Type:</label>
|
||||
<select id="kokoro_dtype" class="text_pole">
|
||||
<option value="q8" ${this.settings.dtype === 'q8' ? 'selected' : ''}>q8 (Recommended)</option>
|
||||
<option value="fp32" ${this.settings.dtype === 'fp32' ? 'selected' : ''}>fp32 (High Precision)</option>
|
||||
<option value="fp16" ${this.settings.dtype === 'fp16' ? 'selected' : ''}>fp16</option>
|
||||
<option value="q4" ${this.settings.dtype === 'q4' ? 'selected' : ''}>q4 (Low Memory)</option>
|
||||
<option value="q4f16" ${this.settings.dtype === 'q4f16' ? 'selected' : ''}>q4f16</option>
|
||||
</select>
|
||||
|
||||
<label for="kokoro_device">Device:</label>
|
||||
<select id="kokoro_device" class="text_pole">
|
||||
<option value="wasm" ${this.settings.device === 'wasm' ? 'selected' : ''}>WebAssembly (CPU)</option>
|
||||
<option value="webgpu" ${this.settings.device === 'webgpu' ? 'selected' : ''}>WebGPU (GPU Acceleration)</option>
|
||||
</select>
|
||||
|
||||
<label for="kokoro_speaking_rate">Speaking Rate: <span id="kokoro_speaking_rate_output">${this.settings.speakingRate}x</span></label>
|
||||
<input id="kokoro_speaking_rate" type="range" value="${this.settings.speakingRate}" min="0.5" max="2.0" step="0.1" />
|
||||
|
||||
<hr>
|
||||
<div>
|
||||
Status: <span id="kokoro_status_text">Initializing...</span>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
async onSettingsChange() {
|
||||
this.settings.modelId = $('#kokoro_model_id').val().toString();
|
||||
this.settings.dtype = $('#kokoro_dtype').val().toString();
|
||||
this.settings.device = $('#kokoro_device').val().toString();
|
||||
this.settings.speakingRate = parseFloat($('#kokoro_speaking_rate').val().toString());
|
||||
|
||||
// Update UI display
|
||||
$('#kokoro_speaking_rate_output').text(this.settings.speakingRate + 'x');
|
||||
|
||||
// Reinitialize TTS engine with debounce
|
||||
this.initTtsDebounced();
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async fetchTtsVoiceObjects() {
|
||||
if (!this.ready) {
|
||||
await this.checkReady();
|
||||
}
|
||||
return this.voices.map(voice => ({
|
||||
name: voice,
|
||||
voice_id: voice,
|
||||
preview_url: null,
|
||||
lang: voice.startsWith('b') ? 'en-GB' : 'en-US',
|
||||
}));
|
||||
}
|
||||
|
||||
async previewTtsVoice(voiceId) {
|
||||
if (!this.ready) {
|
||||
await this.checkReady();
|
||||
}
|
||||
|
||||
const voice = this.getVoice(voiceId);
|
||||
const previewText = getPreviewString(voice.lang);
|
||||
for await (const response of this.generateTts(previewText, voiceId)) {
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
await new Promise(resolve => {
|
||||
const audioElement = new Audio();
|
||||
audioElement.src = url;
|
||||
audioElement.play();
|
||||
audioElement.onended = () => resolve();
|
||||
});
|
||||
URL.revokeObjectURL(url);
|
||||
}
|
||||
}
|
||||
|
||||
getVoiceDisplayName(voiceId) {
|
||||
return voiceId;
|
||||
}
|
||||
|
||||
getVoice(voiceName) {
|
||||
const defaultVoice = this.settings.defaultVoice || 'af_heart';
|
||||
const actualVoiceName = this.voices.includes(voiceName) ? voiceName : defaultVoice;
|
||||
return {
|
||||
name: actualVoiceName,
|
||||
voice_id: actualVoiceName,
|
||||
preview_url: null,
|
||||
lang: actualVoiceName.startsWith('b') ? 'en-GB' : 'en-US',
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate TTS audio for the given text using the specified voice.
|
||||
* @param {string} text Text to generate
|
||||
* @param {string} voiceId Voice ID
|
||||
* @returns {AsyncGenerator<Response>} Audio response generator
|
||||
*/
|
||||
async* generateTts(text, voiceId) {
|
||||
if (!this.ready || !this.worker) {
|
||||
console.log('TTS not ready, initializing...');
|
||||
await this.initializeWorker();
|
||||
}
|
||||
|
||||
if (!this.ready || !this.worker) {
|
||||
throw new Error('Failed to initialize TTS engine');
|
||||
}
|
||||
|
||||
if (text.trim().length === 0) {
|
||||
throw new Error('Empty text');
|
||||
}
|
||||
|
||||
const voice = this.getVoice(voiceId);
|
||||
const requestId = this.nextRequestId++;
|
||||
|
||||
const chunkSize = 400;
|
||||
const chunks = splitRecursive(text, chunkSize, ['\n\n', '\n', '.', '?', '!', ',', ' ', '']);
|
||||
|
||||
for (const chunk of chunks) {
|
||||
yield await new Promise((resolve, reject) => {
|
||||
// Store the promise callbacks
|
||||
this.pendingRequests.set(requestId, { resolve, reject });
|
||||
|
||||
// Send the request to the worker
|
||||
this.worker.postMessage({
|
||||
action: 'generateTts',
|
||||
data: {
|
||||
text: chunk,
|
||||
voice: voice.voice_id,
|
||||
speakingRate: this.settings.speakingRate || 1.0,
|
||||
requestId,
|
||||
},
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
dispose() {
|
||||
// Clean up the worker when the provider is disposed
|
||||
if (this.worker) {
|
||||
this.worker.terminate();
|
||||
this.worker = null;
|
||||
}
|
||||
}
|
||||
}
|
8
public/scripts/extensions/tts/lib/README.md
Normal file
8
public/scripts/extensions/tts/lib/README.md
Normal file
@@ -0,0 +1,8 @@
|
||||
# kokoro-js
|
||||
|
||||
* Author: hexgrad
|
||||
* NPM: <https://www.npmjs.com/package/kokoro-js>
|
||||
* Version: 1.2.0
|
||||
* License: Apache-2.0
|
||||
|
||||
Last updated: 2025-03-10
|
1
public/scripts/extensions/tts/lib/kokoro.web.js
Normal file
1
public/scripts/extensions/tts/lib/kokoro.web.js
Normal file
File diff suppressed because one or more lines are too long
15
public/scripts/extensions/tts/manifest.json
Normal file
15
public/scripts/extensions/tts/manifest.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"display_name": "TTS",
|
||||
"loading_order": 10,
|
||||
"requires": [],
|
||||
"optional": [
|
||||
"silero-tts",
|
||||
"edge-tts",
|
||||
"coqui-tts"
|
||||
],
|
||||
"js": "index.js",
|
||||
"css": "style.css",
|
||||
"author": "Ouoertheo#7264",
|
||||
"version": "1.0.0",
|
||||
"homePage": "None"
|
||||
}
|
209
public/scripts/extensions/tts/novel.js
Normal file
209
public/scripts/extensions/tts/novel.js
Normal file
@@ -0,0 +1,209 @@
|
||||
import { getRequestHeaders } from '../../../script.js';
|
||||
import { POPUP_TYPE, callGenericPopup } from '../../popup.js';
|
||||
import { splitRecursive } from '../../utils.js';
|
||||
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
||||
import { initVoiceMap } from './index.js';
|
||||
|
||||
export { NovelTtsProvider };
|
||||
|
||||
class NovelTtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
voices = [];
|
||||
separator = ' . ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
defaultSettings = {
|
||||
voiceMap: {},
|
||||
customVoices: [],
|
||||
};
|
||||
|
||||
/**
|
||||
* Perform any text processing before passing to TTS engine.
|
||||
* @param {string} text Input text
|
||||
* @returns {string} Processed text
|
||||
*/
|
||||
processText(text) {
|
||||
// Novel reads tilde as a word. Replace with full stop
|
||||
text = text.replace(/~/g, '.');
|
||||
// Novel reads asterisk as a word. Remove it
|
||||
text = text.replace(/\*/g, '');
|
||||
return text;
|
||||
}
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<div class="novel_tts_hints">
|
||||
<div>Use NovelAI's TTS engine.</div>
|
||||
<div>
|
||||
The default Voice IDs are only examples. Add custom voices and Novel will create a new random voice for it.
|
||||
Feel free to try different options!
|
||||
</div>
|
||||
<i>Hint: Save an API key in the NovelAI API settings to use it here.</i>
|
||||
</div>
|
||||
<label for="tts-novel-custom-voices-add">Custom Voices</label>
|
||||
<div class="tts_custom_voices">
|
||||
<select id="tts-novel-custom-voices-select"><select>
|
||||
<i id="tts-novel-custom-voices-add" class="tts-button fa-solid fa-plus fa-xl success" title="Add"></i>
|
||||
<i id="tts-novel-custom-voices-delete" class="tts-button fa-solid fa-xmark fa-xl failure" title="Delete"></i>
|
||||
</div>
|
||||
`;
|
||||
return html;
|
||||
}
|
||||
|
||||
|
||||
// Add a new Novel custom voice to provider
|
||||
async addCustomVoice() {
|
||||
const voiceName = await callGenericPopup('Custom Voice name:', POPUP_TYPE.INPUT);
|
||||
this.settings.customVoices.push(voiceName);
|
||||
this.populateCustomVoices();
|
||||
initVoiceMap(); // Update TTS extension voiceMap
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
// Delete selected custom voice from provider
|
||||
deleteCustomVoice() {
|
||||
const selected = $('#tts-novel-custom-voices-select').find(':selected').val();
|
||||
const voiceIndex = this.settings.customVoices.indexOf(selected);
|
||||
|
||||
if (voiceIndex !== -1) {
|
||||
this.settings.customVoices.splice(voiceIndex, 1);
|
||||
}
|
||||
this.populateCustomVoices();
|
||||
initVoiceMap(); // Update TTS extension voiceMap
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
// Create the UI dropdown list of voices in provider
|
||||
populateCustomVoices() {
|
||||
let voiceSelect = $('#tts-novel-custom-voices-select');
|
||||
voiceSelect.empty();
|
||||
this.settings.customVoices.forEach(voice => {
|
||||
voiceSelect.append(`<option>${voice}</option>`);
|
||||
});
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Populate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
$('#tts-novel-custom-voices-add').on('click', () => (this.addCustomVoice()));
|
||||
$('#tts-novel-custom-voices-delete').on('click', () => (this.deleteCustomVoice()));
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
this.populateCustomVoices();
|
||||
await this.checkReady();
|
||||
console.debug('NovelTTS: Settings loaded');
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
// Doesnt really do much for Novel, not seeing a good way to test this at the moment.
|
||||
async checkReady() {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return;
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (!voiceName) {
|
||||
throw 'TTS Voice name not provided';
|
||||
}
|
||||
|
||||
return { name: voiceName, voice_id: voiceName, lang: 'en-US', preview_url: false };
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
let voices = [
|
||||
{ name: 'Ligeia', voice_id: 'Ligeia', lang: 'en-US', preview_url: false },
|
||||
{ name: 'Aini', voice_id: 'Aini', lang: 'en-US', preview_url: false },
|
||||
{ name: 'Orea', voice_id: 'Orea', lang: 'en-US', preview_url: false },
|
||||
{ name: 'Claea', voice_id: 'Claea', lang: 'en-US', preview_url: false },
|
||||
{ name: 'Lim', voice_id: 'Lim', lang: 'en-US', preview_url: false },
|
||||
{ name: 'Aurae', voice_id: 'Aurae', lang: 'en-US', preview_url: false },
|
||||
{ name: 'Naia', voice_id: 'Naia', lang: 'en-US', preview_url: false },
|
||||
{ name: 'Aulon', voice_id: 'Aulon', lang: 'en-US', preview_url: false },
|
||||
{ name: 'Elei', voice_id: 'Elei', lang: 'en-US', preview_url: false },
|
||||
{ name: 'Ogma', voice_id: 'Ogma', lang: 'en-US', preview_url: false },
|
||||
{ name: 'Raid', voice_id: 'Raid', lang: 'en-US', preview_url: false },
|
||||
{ name: 'Pega', voice_id: 'Pega', lang: 'en-US', preview_url: false },
|
||||
{ name: 'Lam', voice_id: 'Lam', lang: 'en-US', preview_url: false },
|
||||
];
|
||||
|
||||
// Add in custom voices to the map
|
||||
let addVoices = this.settings.customVoices.map(voice =>
|
||||
({ name: voice, voice_id: voice, lang: 'en-US', preview_url: false }),
|
||||
);
|
||||
voices = voices.concat(addVoices);
|
||||
|
||||
return voices;
|
||||
}
|
||||
|
||||
|
||||
async previewTtsVoice(id) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.currentTime = 0;
|
||||
|
||||
const text = getPreviewString('en-US');
|
||||
const response = await this.fetchTtsGeneration(text, id);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}`);
|
||||
}
|
||||
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
this.audioElement.src = url;
|
||||
this.audioElement.play();
|
||||
this.audioElement.onended = () => URL.revokeObjectURL(url);
|
||||
}
|
||||
|
||||
async* fetchTtsGeneration(inputText, voiceId) {
|
||||
const MAX_LENGTH = 1000;
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
const chunks = splitRecursive(inputText, MAX_LENGTH);
|
||||
for (const chunk of chunks) {
|
||||
const response = await fetch('/api/novelai/generate-voice',
|
||||
{
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
'text': chunk,
|
||||
'voice': voiceId,
|
||||
}),
|
||||
},
|
||||
);
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
yield response;
|
||||
}
|
||||
}
|
||||
}
|
193
public/scripts/extensions/tts/openai-compatible.js
Normal file
193
public/scripts/extensions/tts/openai-compatible.js
Normal file
@@ -0,0 +1,193 @@
|
||||
import { getRequestHeaders } from '../../../script.js';
|
||||
import { callGenericPopup, POPUP_RESULT, POPUP_TYPE } from '../../popup.js';
|
||||
import { findSecret, SECRET_KEYS, secret_state, writeSecret } from '../../secrets.js';
|
||||
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export { OpenAICompatibleTtsProvider };
|
||||
|
||||
class OpenAICompatibleTtsProvider {
|
||||
settings;
|
||||
voices = [];
|
||||
separator = ' . ';
|
||||
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
defaultSettings = {
|
||||
voiceMap: {},
|
||||
model: 'tts-1',
|
||||
speed: 1,
|
||||
available_voices: ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'],
|
||||
provider_endpoint: 'http://127.0.0.1:8000/v1/audio/speech',
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<label for="openai_compatible_tts_endpoint">Provider Endpoint:</label>
|
||||
<div class="flex-container alignItemsCenter">
|
||||
<div class="flex1">
|
||||
<input id="openai_compatible_tts_endpoint" type="text" class="text_pole" maxlength="500" value="${this.defaultSettings.provider_endpoint}"/>
|
||||
</div>
|
||||
<div id="openai_compatible_tts_key" class="menu_button menu_button_icon">
|
||||
<i class="fa-solid fa-key"></i>
|
||||
<span>API Key</span>
|
||||
</div>
|
||||
</div>
|
||||
<label for="openai_compatible_model">Model:</label>
|
||||
<input id="openai_compatible_model" type="text" class="text_pole" maxlength="500" value="${this.defaultSettings.model}"/>
|
||||
<label for="openai_compatible_tts_voices">Available Voices (comma separated):</label>
|
||||
<input id="openai_compatible_tts_voices" type="text" class="text_pole" value="${this.defaultSettings.available_voices.join()}"/>
|
||||
<label for="openai_compatible_tts_speed">Speed: <span id="openai_compatible_tts_speed_output"></span></label>
|
||||
<input type="range" id="openai_compatible_tts_speed" value="1" min="0.25" max="4" step="0.05">`;
|
||||
return html;
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Populate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
$('#openai_compatible_tts_endpoint').val(this.settings.provider_endpoint);
|
||||
$('#openai_compatible_tts_endpoint').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#openai_compatible_model').val(this.defaultSettings.model);
|
||||
$('#openai_compatible_model').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#openai_compatible_tts_voices').val(this.settings.available_voices.join());
|
||||
$('#openai_compatible_tts_voices').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#openai_compatible_tts_speed').val(this.settings.speed);
|
||||
$('#openai_compatible_tts_speed').on('input', () => {
|
||||
this.onSettingsChange();
|
||||
});
|
||||
|
||||
$('#openai_compatible_tts_speed_output').text(this.settings.speed);
|
||||
|
||||
$('#openai_compatible_tts_key').toggleClass('success', secret_state[SECRET_KEYS.CUSTOM_OPENAI_TTS]);
|
||||
$('#openai_compatible_tts_key').on('click', async () => {
|
||||
const popupText = 'OpenAI-compatible TTS API Key';
|
||||
const savedKey = secret_state[SECRET_KEYS.CUSTOM_OPENAI_TTS] ? await findSecret(SECRET_KEYS.CUSTOM_OPENAI_TTS) : '';
|
||||
|
||||
const key = await callGenericPopup(popupText, POPUP_TYPE.INPUT, savedKey, {
|
||||
customButtons: [{
|
||||
text: 'Remove Key',
|
||||
appendAtEnd: true,
|
||||
result: POPUP_RESULT.NEGATIVE,
|
||||
action: async () => {
|
||||
await writeSecret(SECRET_KEYS.CUSTOM_OPENAI_TTS, '');
|
||||
$('#openai_compatible_tts_key').toggleClass('success', !!secret_state[SECRET_KEYS.CUSTOM_OPENAI_TTS]);
|
||||
toastr.success('API Key removed');
|
||||
await this.onRefreshClick();
|
||||
},
|
||||
}],
|
||||
});
|
||||
|
||||
if (!key) {
|
||||
return;
|
||||
}
|
||||
|
||||
await writeSecret(SECRET_KEYS.CUSTOM_OPENAI_TTS, String(key));
|
||||
|
||||
toastr.success('API Key saved');
|
||||
$('#openai_compatible_tts_key').toggleClass('success', secret_state[SECRET_KEYS.CUSTOM_OPENAI_TTS]);
|
||||
await this.onRefreshClick();
|
||||
});
|
||||
|
||||
await this.checkReady();
|
||||
|
||||
console.debug('OpenAI Compatible TTS: Settings loaded');
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Update dynamically
|
||||
this.settings.provider_endpoint = String($('#openai_compatible_tts_endpoint').val());
|
||||
this.settings.model = String($('#openai_compatible_model').val());
|
||||
this.settings.available_voices = String($('#openai_compatible_tts_voices').val()).split(',');
|
||||
this.settings.speed = Number($('#openai_compatible_tts_speed').val());
|
||||
$('#openai_compatible_tts_speed_output').text(this.settings.speed);
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async checkReady() {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return;
|
||||
}
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
oaicVoice => oaicVoice.name == voiceName,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
async fetchTtsVoiceObjects() {
|
||||
return this.settings.available_voices.map(v => {
|
||||
return { name: v, voice_id: v, lang: 'en-US' };
|
||||
});
|
||||
}
|
||||
|
||||
async previewTtsVoice(voiceId) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.currentTime = 0;
|
||||
|
||||
const text = getPreviewString('en-US');
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}`);
|
||||
}
|
||||
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
this.audioElement.src = url;
|
||||
this.audioElement.play();
|
||||
this.audioElement.onended = () => URL.revokeObjectURL(url);
|
||||
}
|
||||
|
||||
async fetchTtsGeneration(inputText, voiceId) {
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
const response = await fetch('/api/openai/custom/generate-voice', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
provider_endpoint: this.settings.provider_endpoint,
|
||||
model: this.settings.model,
|
||||
input: inputText,
|
||||
voice: voiceId,
|
||||
response_format: 'mp3',
|
||||
speed: this.settings.speed,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
}
|
148
public/scripts/extensions/tts/openai.js
Normal file
148
public/scripts/extensions/tts/openai.js
Normal file
@@ -0,0 +1,148 @@
|
||||
import { getRequestHeaders } from '../../../script.js';
|
||||
import { saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export { OpenAITtsProvider };
|
||||
|
||||
class OpenAITtsProvider {
|
||||
static voices = [
|
||||
{ name: 'Alloy', voice_id: 'alloy', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/alloy.wav' },
|
||||
{ name: 'Echo', voice_id: 'echo', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/echo.wav' },
|
||||
{ name: 'Fable', voice_id: 'fable', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/fable.wav' },
|
||||
{ name: 'Onyx', voice_id: 'onyx', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/onyx.wav' },
|
||||
{ name: 'Nova', voice_id: 'nova', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/nova.wav' },
|
||||
{ name: 'Shimmer', voice_id: 'shimmer', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/shimmer.wav' },
|
||||
];
|
||||
|
||||
settings;
|
||||
voices = [];
|
||||
separator = ' . ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
defaultSettings = {
|
||||
voiceMap: {},
|
||||
customVoices: [],
|
||||
model: 'tts-1',
|
||||
speed: 1,
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<div>Use OpenAI's TTS engine.</div>
|
||||
<small>Hint: Save an API key in the OpenAI API settings to use it here.</small>
|
||||
<div>
|
||||
<label for="openai-tts-model">Model:</label>
|
||||
<select id="openai-tts-model">
|
||||
<optgroup label="Latest">
|
||||
<option value="tts-1">tts-1</option>
|
||||
<option value="tts-1-hd">tts-1-hd</option>
|
||||
</optgroup>
|
||||
<optgroup label="Snapshots">
|
||||
<option value="tts-1-1106">tts-1-1106</option>
|
||||
<option value="tts-1-hd-1106">tts-1-hd-1106</option>
|
||||
</optgroup>
|
||||
<select>
|
||||
</div>
|
||||
<div>
|
||||
<label for="openai-tts-speed">Speed: <span id="openai-tts-speed-output"></span></label>
|
||||
<input type="range" id="openai-tts-speed" value="1" min="0.25" max="4" step="0.05">
|
||||
</div>`;
|
||||
return html;
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Populate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
$('#openai-tts-model').val(this.settings.model);
|
||||
$('#openai-tts-model').on('change', () => {
|
||||
this.onSettingsChange();
|
||||
});
|
||||
|
||||
$('#openai-tts-speed').val(this.settings.speed);
|
||||
$('#openai-tts-speed').on('input', () => {
|
||||
this.onSettingsChange();
|
||||
});
|
||||
|
||||
$('#openai-tts-speed-output').text(this.settings.speed);
|
||||
|
||||
await this.checkReady();
|
||||
console.debug('OpenAI TTS: Settings loaded');
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Update dynamically
|
||||
this.settings.model = String($('#openai-tts-model').find(':selected').val());
|
||||
this.settings.speed = Number($('#openai-tts-speed').val());
|
||||
$('#openai-tts-speed-output').text(this.settings.speed);
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async checkReady() {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return;
|
||||
}
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (!voiceName) {
|
||||
throw 'TTS Voice name not provided';
|
||||
}
|
||||
|
||||
const voice = OpenAITtsProvider.voices.find(voice => voice.voice_id === voiceName || voice.name === voiceName);
|
||||
|
||||
if (!voice) {
|
||||
throw `TTS Voice not found: ${voiceName}`;
|
||||
}
|
||||
|
||||
return voice;
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
async fetchTtsVoiceObjects() {
|
||||
return OpenAITtsProvider.voices;
|
||||
}
|
||||
|
||||
async previewTtsVoice(_) {
|
||||
return;
|
||||
}
|
||||
|
||||
async fetchTtsGeneration(inputText, voiceId) {
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
const response = await fetch('/api/openai/generate-voice', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
'text': inputText,
|
||||
'voice': voiceId,
|
||||
'model': this.settings.model,
|
||||
'speed': this.settings.speed,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
}
|
81
public/scripts/extensions/tts/readme.md
Normal file
81
public/scripts/extensions/tts/readme.md
Normal file
@@ -0,0 +1,81 @@
|
||||
# Provider Requirements.
|
||||
Because I don't know how, or if you can, and/or maybe I am just too lazy to implement interfaces in JS, here's the requirements of a provider that the extension needs to operate.
|
||||
|
||||
### class YourTtsProvider
|
||||
#### Required
|
||||
Exported for use in extension index.js, and added to providers list in index.js
|
||||
1. generateTts(text, voiceId)
|
||||
2. fetchTtsVoiceObjects()
|
||||
3. onRefreshClick()
|
||||
4. checkReady()
|
||||
5. loadSettings(settingsObject)
|
||||
6. settings field
|
||||
7. settingsHtml field
|
||||
|
||||
#### Optional
|
||||
1. previewTtsVoice()
|
||||
2. separator field
|
||||
3. processText(text)
|
||||
4. dispose()
|
||||
|
||||
# Requirement Descriptions
|
||||
### generateTts(text, voiceId)
|
||||
Must return `audioData.type in ['audio/mpeg', 'audio/wav', 'audio/x-wav', 'audio/wave', 'audio/webm']`
|
||||
Must take text to be rendered and the voiceId to identify the voice to be used
|
||||
|
||||
### fetchTtsVoiceObjects()
|
||||
Required.
|
||||
Used by the TTS extension to get a list of voice objects from the provider.
|
||||
Must return an list of voice objects representing the available voices.
|
||||
1. name: a friendly user facing name to assign to characters. Shows in dropdown list next to user.
|
||||
2. voice_id: the provider specific id of the voice used in fetchTtsGeneration() call
|
||||
3. preview_url: a URL to a local audio file that will be used to sample voices
|
||||
4. lang: OPTIONAL language string
|
||||
|
||||
### getVoice(voiceName)
|
||||
Required.
|
||||
Must return a single voice object matching the provided voiceName. The voice object must have the following at least:
|
||||
1. name: a friendly user facing name to assign to characters. Shows in dropdown list next to user.
|
||||
2. voice_id: the provider specific id of the voice used in fetchTtsGeneration() call
|
||||
3. preview_url: a URL to a local audio file that will be used to sample voices
|
||||
4. lang: OPTIONAL language indicator
|
||||
|
||||
### onRefreshClick()
|
||||
Required.
|
||||
Users click this button to reconnect/reinit the selected provider.
|
||||
Responds to the user clicking the refresh button, which is intended to re-initialize the Provider into a working state, like retrying connections or checking if everything is loaded.
|
||||
|
||||
### checkReady()
|
||||
Required.
|
||||
Return without error to let TTS extension know that the provider is ready.
|
||||
Return an error to block the main TTS extension for initializing the provider and UI. The error will be put in the TTS extension UI directly.
|
||||
|
||||
### loadSettings(settingsObject)
|
||||
Required.
|
||||
Handle the input settings from the TTS extension on provider load.
|
||||
Put code in here to load your provider settings.
|
||||
|
||||
### settings field
|
||||
Required, used for storing any provider state that needs to be saved.
|
||||
Anything stored in this field is automatically persisted under extension_settings[providerName] by the main extension in `saveTtsProviderSettings()`, as well as loaded when the provider is selected in `loadTtsProvider(provider)`.
|
||||
TTS extension doesn't expect any specific contents.
|
||||
|
||||
### settingsHtml field
|
||||
Required, injected into the TTS extension UI. Besides adding it, not relied on by TTS extension directly.
|
||||
|
||||
### previewTtsVoice()
|
||||
Optional.
|
||||
Function to handle playing previews of voice samples if no direct preview_url is available in fetchTtsVoiceObjects() response
|
||||
|
||||
### separator field
|
||||
Optional.
|
||||
Used when narrate quoted text is enabled.
|
||||
Defines the string of characters used to introduce separation between between the groups of extracted quoted text sent to the provider. The provider will use this to introduce pauses by default using `...`
|
||||
|
||||
### processText(text)
|
||||
Optional.
|
||||
A function applied to the input text before passing it to the TTS generator. Can be async.
|
||||
|
||||
### dispose()
|
||||
Optional.
|
||||
Function to handle cleanup of provider resources when the provider is switched.
|
344
public/scripts/extensions/tts/sbvits2.js
Normal file
344
public/scripts/extensions/tts/sbvits2.js
Normal file
@@ -0,0 +1,344 @@
|
||||
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export { SBVits2TtsProvider };
|
||||
|
||||
class SBVits2TtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
ready = false;
|
||||
voices = [];
|
||||
separator = '. ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
/**
|
||||
* Perform any text processing before passing to TTS engine.
|
||||
* @param {string} text Input text
|
||||
* @returns {string} Processed text
|
||||
*/
|
||||
processText(text) {
|
||||
// backup for auto_split
|
||||
text = text.replace(/\n+/g, '<br>');
|
||||
return text;
|
||||
}
|
||||
|
||||
languageLabels = {
|
||||
'Chinese': 'ZH',
|
||||
'English': 'EN',
|
||||
'Japanese': 'JP',
|
||||
};
|
||||
|
||||
langKey2LangCode = {
|
||||
'ZH': 'zh-CN',
|
||||
'EN': 'en-US',
|
||||
'JP': 'ja-JP',
|
||||
};
|
||||
|
||||
defaultSettings = {
|
||||
provider_endpoint: 'http://localhost:5000',
|
||||
sdp_ratio: 0.2,
|
||||
noise: 0.6,
|
||||
noisew: 0.8,
|
||||
length: 1,
|
||||
language: 'JP',
|
||||
auto_split: true,
|
||||
split_interval: 0.5,
|
||||
assist_text: '',
|
||||
assist_text_weight: 1,
|
||||
style: 'Neutral',
|
||||
style_weight: 1,
|
||||
reference_audio_path: '',
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<label for="sbvits_api_language">Language</label>
|
||||
<select id="sbvits_api_language">`;
|
||||
|
||||
for (let language in this.languageLabels) {
|
||||
if (this.languageLabels[language] == this.settings?.language) {
|
||||
html += `<option value="${this.languageLabels[language]}" selected="selected">${language}</option>`;
|
||||
continue;
|
||||
}
|
||||
|
||||
html += `<option value="${this.languageLabels[language]}">${language}</option>`;
|
||||
}
|
||||
|
||||
html += `
|
||||
</select>
|
||||
<label">SBVits2 Settings:</label><br/>
|
||||
<label for="sbvits_tts_endpoint">Provider Endpoint:</label>
|
||||
<input id="sbvits_tts_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/>
|
||||
<span>Use <a target="_blank" href="https://github.com/litagin02/Style-Bert-VITS2">Style-Bert-VITS2 API Server</a>.</span><br/>
|
||||
|
||||
<label for="sbvits_sdp_ratio">sdp_ratio: <span id="sbvits_sdp_ratio_output">${this.defaultSettings.sdp_ratio}</span></label>
|
||||
<input id="sbvits_sdp_ratio" type="range" value="${this.defaultSettings.sdp_ratio}" min="0.0" max="1" step="0.01" />
|
||||
|
||||
<label for="sbvits_noise">noise: <span id="sbvits_noise_output">${this.defaultSettings.noise}</span></label>
|
||||
<input id="sbvits_noise" type="range" value="${this.defaultSettings.noise}" min="0.1" max="2" step="0.01" />
|
||||
|
||||
<label for="sbvits_noisew">noisew: <span id="sbvits_noisew_output">${this.defaultSettings.noisew}</span></label>
|
||||
<input id="sbvits_noisew" type="range" value="${this.defaultSettings.noisew}" min="0.1" max="2" step="0.01" />
|
||||
|
||||
<label for="sbvits_length">length: <span id="sbvits_length_output">${this.defaultSettings.length}</span></label>
|
||||
<input id="sbvits_length" type="range" value="${this.defaultSettings.length}" min="0.0" max="5" step="0.01" />
|
||||
|
||||
<label for="sbvits_auto_split" class="checkbox_label">
|
||||
<input id="sbvits_auto_split" type="checkbox" ${this.defaultSettings.auto_split ? 'checked' : ''} />
|
||||
Enable Text Splitting
|
||||
</label>
|
||||
|
||||
<label for="sbvits_split_interval">split_interval: <span id="sbvits_split_interval_output">${this.defaultSettings.split_interval}</span></label>
|
||||
<input id="sbvits_split_interval" type="range" value="${this.defaultSettings.split_interval}" min="0.0" max="5" step="0.01" />
|
||||
|
||||
<label for="sbvits_assist_text">assist_text:</label>
|
||||
<input id="sbvits_assist_text" type="text" class="text_pole" maxlength="512" value="${this.defaultSettings.assist_text}"/>
|
||||
|
||||
<label for="sbvits_assist_text_weight">assist_text_weight: <span id="sbvits_assist_text_weight_output">${this.defaultSettings.assist_text_weight}</span></label>
|
||||
<input id="sbvits_assist_text_weight" type="range" value="${this.defaultSettings.assist_text_weight}" min="0.0" max="1" step="0.01" />
|
||||
|
||||
<label for="sbvits_style_weight">style_weight: <span id="sbvits_style_weight_output">${this.defaultSettings.style_weight}</span></label>
|
||||
<input id="sbvits_style_weight" type="range" value="${this.defaultSettings.style_weight}" min="0.0" max="20" step="0.01" />
|
||||
|
||||
<label for="sbvits_reference_audio_path">reference_audio_path:</label>
|
||||
<input id="sbvits_reference_audio_path" type="text" class="text_pole" maxlength="512" value="${this.defaultSettings.reference_audio_path}"/>
|
||||
`;
|
||||
|
||||
return html;
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Used when provider settings are updated from UI
|
||||
this.settings.provider_endpoint = $('#sbvits_tts_endpoint').val();
|
||||
this.settings.language = $('#sbvits_api_language').val();
|
||||
this.settings.assist_text = $('#sbvits_assist_text').val();
|
||||
this.settings.reference_audio_path = $('#sbvits_reference_audio_path').val();
|
||||
|
||||
// Update the default TTS settings based on input fields
|
||||
this.settings.sdp_ratio = $('#sbvits_sdp_ratio').val();
|
||||
this.settings.noise = $('#sbvits_noise').val();
|
||||
this.settings.noisew = $('#sbvits_noisew').val();
|
||||
this.settings.length = $('#sbvits_length').val();
|
||||
this.settings.auto_split = $('#sbvits_auto_split').is(':checked');
|
||||
this.settings.split_interval = $('#sbvits_split_interval').val();
|
||||
this.settings.assist_text_weight = $('#sbvits_assist_text_weight').val();
|
||||
this.settings.style_weight = $('#sbvits_style_weight').val();
|
||||
|
||||
// Update the UI to reflect changes
|
||||
$('#sbvits_sdp_ratio_output').text(this.settings.sdp_ratio);
|
||||
$('#sbvits_noise_output').text(this.settings.noise);
|
||||
$('#sbvits_noisew_output').text(this.settings.noisew);
|
||||
$('#sbvits_length_output').text(this.settings.length);
|
||||
$('#sbvits_split_interval_output').text(this.settings.split_interval);
|
||||
$('#sbvits_assist_text_weight_output').text(this.settings.assist_text_weight);
|
||||
$('#sbvits_style_weight_output').text(this.settings.style_weight);
|
||||
|
||||
saveTtsProviderSettings();
|
||||
this.changeTTSSettings();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Pupulate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
console.debug(`Ignoring non-user-configurable setting: ${key}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Set initial values from the settings
|
||||
$('#sbvits_tts_endpoint').val(this.settings.provider_endpoint);
|
||||
$('#sbvits_api_language').val(this.settings.language);
|
||||
$('#sbvits_assist_text').val(this.settings.assist_text);
|
||||
$('#sbvits_reference_audio_path').val(this.settings.reference_audio_path);
|
||||
$('#sbvits_sdp_ratio').val(this.settings.sdp_ratio);
|
||||
$('#sbvits_noise').val(this.settings.noise);
|
||||
$('#sbvits_noisew').val(this.settings.noisew);
|
||||
$('#sbvits_length').val(this.settings.length);
|
||||
$('#sbvits_auto_split').prop('checked', this.settings.auto_split);
|
||||
$('#sbvits_split_interval').val(this.settings.split_interval);
|
||||
$('#sbvits_assist_text_weight').val(this.settings.assist_text_weight);
|
||||
$('#sbvits_style_weight').val(this.settings.style_weight);
|
||||
|
||||
// Update the UI to reflect changes
|
||||
$('#sbvits_sdp_ratio_output').text(this.settings.sdp_ratio);
|
||||
$('#sbvits_noise_output').text(this.settings.noise);
|
||||
$('#sbvits_noisew_output').text(this.settings.noisew);
|
||||
$('#sbvits_length_output').text(this.settings.length);
|
||||
$('#sbvits_split_interval_output').text(this.settings.split_interval);
|
||||
$('#sbvits_assist_text_weight_output').text(this.settings.assist_text_weight);
|
||||
$('#sbvits_style_weight_output').text(this.settings.style_weight);
|
||||
|
||||
// Register input/change event listeners to update settings on user interaction
|
||||
$('#sbvits_tts_endpoint').on('input', () => { this.onSettingsChange(); });
|
||||
$('#sbvits_api_language').on('change', () => { this.onSettingsChange(); });
|
||||
$('#sbvits_assist_text').on('input', () => { this.onSettingsChange(); });
|
||||
$('#sbvits_reference_audio_path').on('input', () => { this.onSettingsChange(); });
|
||||
$('#sbvits_sdp_ratio').on('change', () => { this.onSettingsChange(); });
|
||||
$('#sbvits_noise').on('change', () => { this.onSettingsChange(); });
|
||||
$('#sbvits_noisew').on('change', () => { this.onSettingsChange(); });
|
||||
$('#sbvits_length').on('change', () => { this.onSettingsChange(); });
|
||||
$('#sbvits_auto_split').on('change', () => { this.onSettingsChange(); });
|
||||
$('#sbvits_split_interval').on('change', () => { this.onSettingsChange(); });
|
||||
$('#sbvits_assist_text_weight').on('change', () => { this.onSettingsChange(); });
|
||||
$('#sbvits_style_weight').on('change', () => { this.onSettingsChange(); });
|
||||
|
||||
await this.checkReady();
|
||||
|
||||
console.info('SBVits2: Settings loaded');
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady() {
|
||||
await Promise.allSettled([this.fetchTtsVoiceObjects(), this.changeTTSSettings()]);
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return;
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
/**
|
||||
* Get a voice from the TTS provider.
|
||||
* @param {string} voiceName Voice name to get
|
||||
* @returns {Promise<Object>} Voice object
|
||||
*/
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
v => v.name == voiceName,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
const response = await fetch(`${this.settings.provider_endpoint}/models/info`);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.json()}`);
|
||||
}
|
||||
const data = await response.json();
|
||||
const voices = Object.keys(data).flatMap(key => {
|
||||
const config = data[key];
|
||||
const spk2id = config.spk2id;
|
||||
const style2id = config.style2id;
|
||||
|
||||
return Object.entries(spk2id).flatMap(([speaker, speaker_id]) => {
|
||||
return Object.entries(style2id).map(([style, styleId]) => {
|
||||
return {
|
||||
name: `${speaker} (${style})`,
|
||||
voice_id: `${key}-${speaker_id}-${style}`,
|
||||
preview_url: false,
|
||||
};
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
this.voices = voices; // Assign to the class property
|
||||
return voices; // Also return this list
|
||||
}
|
||||
|
||||
// Each time a parameter is changed, we change the configuration
|
||||
async changeTTSSettings() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch TTS generation from the API.
|
||||
* @param {string} inputText Text to generate TTS for
|
||||
* @param {string} voiceId Voice ID to use (model_id-speaker_id-style)
|
||||
* @returns {Promise<Response>} Fetch response
|
||||
*/
|
||||
async fetchTtsGeneration(inputText, voiceId) {
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
|
||||
const [model_id, speaker_id, ...rest] = voiceId.split('-');
|
||||
const style = rest.join('-');
|
||||
const params = new URLSearchParams();
|
||||
// restore for auto_split
|
||||
inputText = inputText.replaceAll('<br>', '\n');
|
||||
params.append('text', inputText);
|
||||
params.append('model_id', model_id);
|
||||
params.append('speaker_id', speaker_id);
|
||||
params.append('sdp_ratio', this.settings.sdp_ratio);
|
||||
params.append('noise', this.settings.noise);
|
||||
params.append('noisew', this.settings.noisew);
|
||||
params.append('length', this.settings.length);
|
||||
params.append('language', this.settings.language);
|
||||
params.append('auto_split', this.settings.auto_split);
|
||||
params.append('split_interval', this.settings.split_interval);
|
||||
if (this.settings.assist_text) {
|
||||
params.append('assist_text', this.settings.assist_text);
|
||||
params.append('assist_text_weight', this.settings.assist_text_weight);
|
||||
}
|
||||
params.append('style', style);
|
||||
params.append('style_weight', this.settings.style_weight);
|
||||
if (this.settings.reference_audio_path) {
|
||||
params.append('reference_audio_path', this.settings.reference_audio_path);
|
||||
}
|
||||
const url = `${this.settings.provider_endpoint}/voice?${params.toString()}`;
|
||||
|
||||
const response = await fetch(
|
||||
url,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
},
|
||||
},
|
||||
);
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
/**
|
||||
* Preview TTS for a given voice ID.
|
||||
* @param {string} id Voice ID
|
||||
*/
|
||||
async previewTtsVoice(id) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.currentTime = 0;
|
||||
const lang_code = this.langKey2LangCode[this.settings.lang] ?? 'ja-JP';
|
||||
const text = getPreviewString(lang_code);
|
||||
const response = await this.fetchTtsGeneration(text, id);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
this.audioElement.src = url;
|
||||
this.audioElement.play();
|
||||
}
|
||||
|
||||
// Interface not used
|
||||
async fetchTtsFromHistory(history_item_id) {
|
||||
return Promise.resolve(history_item_id);
|
||||
}
|
||||
}
|
87
public/scripts/extensions/tts/settings.html
Normal file
87
public/scripts/extensions/tts/settings.html
Normal file
@@ -0,0 +1,87 @@
|
||||
<div id="tts_settings">
|
||||
<div class="inline-drawer">
|
||||
<div class="inline-drawer-toggle inline-drawer-header">
|
||||
<b>TTS</b>
|
||||
<div class="inline-drawer-icon fa-solid fa-circle-chevron-down down"></div>
|
||||
</div>
|
||||
<div class="inline-drawer-content">
|
||||
<div id="tts_status">
|
||||
</div>
|
||||
<span data-i18n="Select TTS Provider">Select TTS Provider</span> </br>
|
||||
<div class="tts_block">
|
||||
<select id="tts_provider" class="flex1">
|
||||
</select>
|
||||
<input id="tts_refresh" class="menu_button" type="submit" value="Reload" />
|
||||
</div>
|
||||
<div>
|
||||
<label class="checkbox_label" for="tts_enabled">
|
||||
<input type="checkbox" id="tts_enabled" name="tts_enabled">
|
||||
<small data-i18n="tts_enabled">Enabled</small>
|
||||
</label>
|
||||
<label class="checkbox_label" for="tts_narrate_user">
|
||||
<input type="checkbox" id="tts_narrate_user">
|
||||
<small data-i18n="Narrate user messages">Narrate user messages</small>
|
||||
</label>
|
||||
<label class="checkbox_label" for="tts_auto_generation">
|
||||
<input type="checkbox" id="tts_auto_generation">
|
||||
<small data-i18n="Auto Generation">Auto Generation</small>
|
||||
</label>
|
||||
<label class="checkbox_label" for="tts_periodic_auto_generation" data-i18n="[title]Requires auto generation to be enabled." title="Requires auto generation to be enabled.">
|
||||
<input type="checkbox" id="tts_periodic_auto_generation">
|
||||
<small data-i18n="Narrate by paragraphs (when streaming)">Narrate by paragraphs (when streaming)</small>
|
||||
</label>
|
||||
<label class="checkbox_label" for="tts_narrate_by_paragraphs">
|
||||
<input type="checkbox" id="tts_narrate_by_paragraphs">
|
||||
<small data-i18n="Narrate by paragraphs (when not streaming)">Narrate by paragraphs (when not streaming)</small>
|
||||
</label>
|
||||
<label class="checkbox_label" for="tts_narrate_quoted">
|
||||
<input type="checkbox" id="tts_narrate_quoted">
|
||||
<small data-i18n="Only narrate quotes">Only narrate "quotes"</small>
|
||||
</label>
|
||||
<label class="checkbox_label" for="tts_narrate_dialogues">
|
||||
<input type="checkbox" id="tts_narrate_dialogues">
|
||||
<small data-i18n="Ignore text, even quotes, inside asterisk">Ignore *text, even "quotes", inside asterisks*</small>
|
||||
</label>
|
||||
<label class="checkbox_label" for="tts_narrate_translated_only">
|
||||
<input type="checkbox" id="tts_narrate_translated_only">
|
||||
<small data-i18n="Narrate only the translated text">Narrate only the translated text</small>
|
||||
</label>
|
||||
<label class="checkbox_label" for="tts_skip_codeblocks">
|
||||
<input type="checkbox" id="tts_skip_codeblocks">
|
||||
<small data-i18n="Skip codeblocks">Skip codeblocks</small>
|
||||
</label>
|
||||
<label class="checkbox_label" for="tts_skip_tags">
|
||||
<input type="checkbox" id="tts_skip_tags">
|
||||
<small data-i18n="Skip tagged blocks">Skip <tagged> blocks</small>
|
||||
</label>
|
||||
<label class="checkbox_label" for="tts_pass_asterisks">
|
||||
<input type="checkbox" id="tts_pass_asterisks">
|
||||
<small data-i18n="Pass Asterisks to TTS Engine">Pass Asterisks to TTS Engine</small>
|
||||
</label>
|
||||
</div>
|
||||
<div id="playback_rate_block" class="range-block">
|
||||
<hr>
|
||||
<div class="range-block-title justifyLeft">
|
||||
<small data-i18n="Audio Playback Speed">Audio Playback Speed</small>
|
||||
</div>
|
||||
<div class="range-block-range-and-counter">
|
||||
<div class="range-block-range">
|
||||
<input type="range" id="playback_rate" name="volume" min="0" max="3" step="0.05">
|
||||
</div>
|
||||
<div class="range-block-counter">
|
||||
<input type="number" min="0" max="3" step="0.05" data-for="playback_rate" id="playback_rate_counter">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="tts_voicemap_block">
|
||||
</div>
|
||||
<hr>
|
||||
<form id="tts_provider_settings" class="inline-drawer-content">
|
||||
</form>
|
||||
<div class="tts_buttons">
|
||||
<input id="tts_voices" class="menu_button" type="submit" value="Available voices" />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
176
public/scripts/extensions/tts/silerotts.js
Normal file
176
public/scripts/extensions/tts/silerotts.js
Normal file
@@ -0,0 +1,176 @@
|
||||
import { doExtrasFetch, getApiUrl, modules } from '../../extensions.js';
|
||||
import { saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export { SileroTtsProvider };
|
||||
|
||||
class SileroTtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
ready = false;
|
||||
voices = [];
|
||||
separator = ' ';
|
||||
|
||||
defaultSettings = {
|
||||
provider_endpoint: 'http://localhost:8001/tts',
|
||||
voiceMap: {},
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<label for="silero_tts_endpoint">Provider Endpoint:</label>
|
||||
<input id="silero_tts_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/>
|
||||
<span>
|
||||
<span>Use <a target="_blank" href="https://github.com/ChuQuadrant/ChuQuadrant-extras">ChuQuadrant Extras API</a> or <a target="_blank" href="https://github.com/ouoertheo/silero-api-server">Silero TTS Server</a>.</span>
|
||||
`;
|
||||
return html;
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Used when provider settings are updated from UI
|
||||
this.settings.provider_endpoint = $('#silero_tts_endpoint').val();
|
||||
saveTtsProviderSettings();
|
||||
this.refreshSession();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Pupulate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
const apiCheckInterval = setInterval(() => {
|
||||
// Use Extras API if TTS support is enabled
|
||||
if (modules.includes('tts') || modules.includes('silero-tts')) {
|
||||
const baseUrl = new URL(getApiUrl());
|
||||
baseUrl.pathname = '/api/tts';
|
||||
this.settings.provider_endpoint = baseUrl.toString();
|
||||
$('#silero_tts_endpoint').val(this.settings.provider_endpoint);
|
||||
clearInterval(apiCheckInterval);
|
||||
}
|
||||
}, 2000);
|
||||
|
||||
$('#silero_tts_endpoint').val(this.settings.provider_endpoint);
|
||||
$('#silero_tts_endpoint').on('input', () => { this.onSettingsChange(); });
|
||||
this.refreshSession();
|
||||
|
||||
await this.checkReady();
|
||||
|
||||
console.debug('SileroTTS: Settings loaded');
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady() {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return;
|
||||
}
|
||||
|
||||
async refreshSession() {
|
||||
await this.initSession();
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
sileroVoice => sileroVoice.name == voiceName,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
const response = await doExtrasFetch(`${this.settings.provider_endpoint}/speakers`);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.json()}`);
|
||||
}
|
||||
const responseJson = await response.json();
|
||||
return responseJson;
|
||||
}
|
||||
|
||||
async fetchTtsGeneration(inputText, voiceId) {
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
const response = await doExtrasFetch(
|
||||
`${this.settings.provider_endpoint}/generate`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Cache-Control': 'no-cache', // Added this line to disable caching of file so new files are always played - Rolyat 7/7/23
|
||||
},
|
||||
body: JSON.stringify({
|
||||
'text': inputText,
|
||||
'speaker': voiceId,
|
||||
'session': 'ChuQuadrant',
|
||||
}),
|
||||
},
|
||||
);
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
async initSession() {
|
||||
console.info('Silero TTS: requesting new session');
|
||||
try {
|
||||
const response = await doExtrasFetch(
|
||||
`${this.settings.provider_endpoint}/session`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Cache-Control': 'no-cache',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
'path': 'ChuQuadrant',
|
||||
}),
|
||||
},
|
||||
);
|
||||
|
||||
if (!response.ok && response.status !== 404) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
} catch (error) {
|
||||
console.info('Silero TTS: endpoint not available', error);
|
||||
}
|
||||
}
|
||||
|
||||
// Interface not used by Silero TTS
|
||||
async fetchTtsFromHistory(history_item_id) {
|
||||
return Promise.resolve(history_item_id);
|
||||
}
|
||||
|
||||
}
|
199
public/scripts/extensions/tts/speecht5.js
Normal file
199
public/scripts/extensions/tts/speecht5.js
Normal file
@@ -0,0 +1,199 @@
|
||||
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
||||
import { getBase64Async } from '../../utils.js';
|
||||
import { getRequestHeaders } from '../../../script.js';
|
||||
|
||||
export { SpeechT5TtsProvider };
|
||||
|
||||
class SpeechT5TtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
ready = false;
|
||||
voices = [];
|
||||
separator = ' .. ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
defaultSettings = {
|
||||
speakers: [],
|
||||
speaker: '',
|
||||
voiceMap: {},
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<label for="speecht5_tts_speaker">Speaker:</label>
|
||||
<div class="flex-container">
|
||||
<select id="speecht5_tts_speaker" class="text_pole flex1">
|
||||
</select>
|
||||
<div id="speecht5_tts_speaker_upload_button" class="menu_button" title="Upload speaker">
|
||||
<i class="fa-solid fa-upload"></i>
|
||||
</div>
|
||||
<div id="speecht5_tts_delete_speaker_button" class="menu_button" title="Delete speaker">
|
||||
<i class="fa-solid fa-trash"></i>
|
||||
</div>
|
||||
</div>
|
||||
<input type="file" id="speecht5_tts_speaker_upload" class="displayNone">
|
||||
<div><i>Loading model for the first time may take a while!</i></div>
|
||||
`;
|
||||
return html;
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Used when provider settings are updated from UI
|
||||
this.settings.speaker = $('#speecht5_tts_speaker').val();
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async previewTtsVoice(voiceId) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.currentTime = 0;
|
||||
|
||||
const text = getPreviewString('en-US');
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}`);
|
||||
}
|
||||
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
this.audioElement.src = url;
|
||||
this.audioElement.play();
|
||||
this.audioElement.onended = () => URL.revokeObjectURL(url);
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Pupulate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
for (const speaker of this.settings.speakers) {
|
||||
$('#speecht5_tts_speaker').append($('<option>', {
|
||||
value: speaker.voice_id,
|
||||
text: speaker.name,
|
||||
}));
|
||||
}
|
||||
|
||||
$('#speecht5_tts_speaker').val(this.settings.speaker);
|
||||
$('#speecht5_tts_speaker').on('change', this.onSettingsChange.bind(this));
|
||||
$('#speecht5_tts_speaker_upload_button').on('click', () => {
|
||||
$('#speecht5_tts_speaker_upload').trigger('click');
|
||||
});
|
||||
$('#speecht5_tts_speaker_upload').on('change', async (event) => {
|
||||
const file = event.target.files[0];
|
||||
if (file.size != 2048) {
|
||||
toastr.error('Invalid speaker file size, expected 2048 bytes');
|
||||
return;
|
||||
}
|
||||
|
||||
const data = await getBase64Async(file);
|
||||
const speaker = {
|
||||
voice_id: file.name,
|
||||
name: file.name,
|
||||
data: data,
|
||||
lang: 'en-US',
|
||||
preview_url: false,
|
||||
};
|
||||
this.settings.speakers.push(speaker);
|
||||
$('#speecht5_tts_speaker').append($('<option>', {
|
||||
value: speaker.voice_id,
|
||||
text: speaker.name,
|
||||
}));
|
||||
$('#speecht5_tts_speaker').val(speaker.name);
|
||||
this.onSettingsChange();
|
||||
});
|
||||
$('#speecht5_tts_delete_speaker_button').on('click', () => {
|
||||
const confirmDelete = confirm('Are you sure you want to delete this speaker?');
|
||||
|
||||
if (!confirmDelete) {
|
||||
return;
|
||||
}
|
||||
|
||||
const speaker = this.settings.speakers.find(s => s.voice_id === this.settings.speaker);
|
||||
if (!speaker) {
|
||||
toastr.error('Speaker not found');
|
||||
return;
|
||||
}
|
||||
|
||||
const index = this.settings.speakers.indexOf(speaker);
|
||||
this.settings.speakers.splice(index, 1);
|
||||
$(`#speecht5_tts_speaker option[value="${speaker.voice_id}"]`).remove();
|
||||
|
||||
if (this.settings.speakers.length == 0) {
|
||||
console.log('No speakers left');
|
||||
return;
|
||||
}
|
||||
|
||||
$('#speecht5_tts_speaker').val(this.settings.speakers[0].voice_id);
|
||||
this.onSettingsChange();
|
||||
});
|
||||
|
||||
await this.checkReady();
|
||||
|
||||
console.debug('SpeechT5: Settings loaded');
|
||||
}
|
||||
|
||||
async checkReady() {
|
||||
return Promise.resolve();
|
||||
}
|
||||
|
||||
async getVoice(voiceName) {
|
||||
return this.settings.speakers.find(s => s.voice_id === voiceName);
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
async fetchTtsVoiceObjects() {
|
||||
return this.settings.speakers;
|
||||
}
|
||||
|
||||
async fetchTtsGeneration(inputText, voiceId) {
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
const speaker = await this.getVoice(voiceId);
|
||||
|
||||
if (!speaker) {
|
||||
toastr.error(`Speaker not found: ${voiceId}`, 'TTS Generation Failed');
|
||||
throw new Error(`Speaker not found: ${voiceId}`);
|
||||
}
|
||||
|
||||
const response = await fetch(
|
||||
'/api/speech/synthesize',
|
||||
{
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
'text': inputText,
|
||||
'speaker': speaker.data,
|
||||
'model': 'Xenova/speecht5_tts',
|
||||
}),
|
||||
},
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
async fetchTtsFromHistory(history_item_id) {
|
||||
return Promise.resolve(history_item_id);
|
||||
}
|
||||
}
|
127
public/scripts/extensions/tts/style.css
Normal file
127
public/scripts/extensions/tts/style.css
Normal file
@@ -0,0 +1,127 @@
|
||||
.voice_preview {
|
||||
margin: 0.25rem 0.5rem;
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.voice_preview .voice_name {
|
||||
text-align: left;
|
||||
flex: 1;
|
||||
}
|
||||
|
||||
.voice_preview .voice_lang {
|
||||
width: 4rem;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
.voice_preview .fa-play {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.tts-button {
|
||||
margin: 0;
|
||||
outline: none;
|
||||
border: none;
|
||||
cursor: pointer;
|
||||
transition: 0.3s;
|
||||
opacity: 0.7;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
|
||||
}
|
||||
|
||||
.tts-button:hover {
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
.tts_block {
|
||||
display: flex;
|
||||
align-items: baseline;
|
||||
column-gap: 5px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.tts_custom_voices {
|
||||
display: flex;
|
||||
align-items: baseline;
|
||||
gap: 5px;
|
||||
}
|
||||
|
||||
.novel_tts_hints {
|
||||
font-size: calc(0.9 * var(--mainFontSize));
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 5px;
|
||||
margin-bottom: 5px;
|
||||
}
|
||||
|
||||
.at-settings-row {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.at-settings-option {
|
||||
flex: 1;
|
||||
margin: 0 10px;
|
||||
}
|
||||
|
||||
.at-endpoint-option {
|
||||
flex: 1;
|
||||
margin: 0 10px;
|
||||
margin-right: 25px;
|
||||
width: 38%;
|
||||
}
|
||||
|
||||
.at-website-row {
|
||||
display: flex;
|
||||
justify-content: start;
|
||||
align-items: center;
|
||||
margin-top: 10px;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
|
||||
.at-website-option {
|
||||
flex: 1;
|
||||
margin-right: 10px;
|
||||
margin-left: 10px;
|
||||
}
|
||||
|
||||
.at-settings-separator {
|
||||
margin-top: 10px;
|
||||
margin-bottom: 10px;
|
||||
padding: 18px;
|
||||
font-weight: bold;
|
||||
border-top: 1px solid #e1e1e1; /* Grey line */
|
||||
border-bottom: 1px solid #e1e1e1; /* Grey line */
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.at-status-message {
|
||||
flex: 1;
|
||||
margin: 0 10px;
|
||||
}
|
||||
|
||||
.at-model-endpoint-row {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.at-model-option, .endpoint-option {
|
||||
flex: 1;
|
||||
margin: 0 10px;
|
||||
margin-left: 10px;
|
||||
}
|
||||
|
||||
.at-endpoint-option {
|
||||
width: 38%;
|
||||
}
|
||||
|
||||
#at-status_info {
|
||||
color: lightgreen;
|
||||
}
|
246
public/scripts/extensions/tts/system.js
Normal file
246
public/scripts/extensions/tts/system.js
Normal file
@@ -0,0 +1,246 @@
|
||||
import { isMobile } from '../../RossAscends-mods.js';
|
||||
import { getPreviewString } from './index.js';
|
||||
import { saveTtsProviderSettings } from './index.js';
|
||||
export { SystemTtsProvider };
|
||||
|
||||
/**
|
||||
* Chunkify
|
||||
* Google Chrome Speech Synthesis Chunking Pattern
|
||||
* Fixes inconsistencies with speaking long texts in speechUtterance objects
|
||||
* Licensed under the MIT License
|
||||
*
|
||||
* Peter Woolley and Brett Zamir
|
||||
* Modified by Haaris for bug fixes
|
||||
*/
|
||||
|
||||
var speechUtteranceChunker = function (utt, settings, callback) {
|
||||
settings = settings || {};
|
||||
var newUtt;
|
||||
var txt = (settings && settings.offset !== undefined ? utt.text.substring(settings.offset) : utt.text);
|
||||
if (utt.voice && utt.voice.voiceURI === 'native') { // Not part of the spec
|
||||
newUtt = utt;
|
||||
newUtt.text = txt;
|
||||
newUtt.addEventListener('end', function () {
|
||||
if (speechUtteranceChunker.cancel) {
|
||||
speechUtteranceChunker.cancel = false;
|
||||
}
|
||||
if (callback !== undefined) {
|
||||
callback();
|
||||
}
|
||||
});
|
||||
}
|
||||
else {
|
||||
var chunkLength = (settings && settings.chunkLength) || 160;
|
||||
var pattRegex = new RegExp('^[\\s\\S]{' + Math.floor(chunkLength / 2) + ',' + chunkLength + '}[.!?,]{1}|^[\\s\\S]{1,' + chunkLength + '}$|^[\\s\\S]{1,' + chunkLength + '} ');
|
||||
var chunkArr = txt.match(pattRegex);
|
||||
|
||||
if (chunkArr == null || chunkArr[0] === undefined || chunkArr[0].length <= 2) {
|
||||
//call once all text has been spoken...
|
||||
if (callback !== undefined) {
|
||||
callback();
|
||||
}
|
||||
return;
|
||||
}
|
||||
var chunk = chunkArr[0];
|
||||
newUtt = new SpeechSynthesisUtterance(chunk);
|
||||
var x;
|
||||
for (x in utt) {
|
||||
if (Object.hasOwn(utt, x) && x !== 'text') {
|
||||
newUtt[x] = utt[x];
|
||||
}
|
||||
}
|
||||
newUtt.lang = utt.lang;
|
||||
newUtt.voice = utt.voice;
|
||||
newUtt.rate = utt.rate;
|
||||
newUtt.pitch = utt.pitch;
|
||||
newUtt.addEventListener('end', function () {
|
||||
if (speechUtteranceChunker.cancel) {
|
||||
speechUtteranceChunker.cancel = false;
|
||||
return;
|
||||
}
|
||||
settings.offset = settings.offset || 0;
|
||||
settings.offset += chunk.length;
|
||||
speechUtteranceChunker(utt, settings, callback);
|
||||
});
|
||||
}
|
||||
|
||||
if (settings.modifier) {
|
||||
settings.modifier(newUtt);
|
||||
}
|
||||
console.log(newUtt); //IMPORTANT!! Do not remove: Logging the object out fixes some onend firing issues.
|
||||
//placing the speak invocation inside a callback fixes ordering and onend issues.
|
||||
setTimeout(function () {
|
||||
speechSynthesis.speak(newUtt);
|
||||
}, 0);
|
||||
};
|
||||
|
||||
class SystemTtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
ready = false;
|
||||
voices = [];
|
||||
separator = ' ... ';
|
||||
|
||||
defaultSettings = {
|
||||
voiceMap: {},
|
||||
rate: 1,
|
||||
pitch: 1,
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
if (!('speechSynthesis' in window)) {
|
||||
return 'Your browser or operating system doesn\'t support speech synthesis';
|
||||
}
|
||||
|
||||
return `<p>Uses the voices provided by your operating system</p>
|
||||
<label for="system_tts_rate">Rate: <span id="system_tts_rate_output"></span></label>
|
||||
<input id="system_tts_rate" type="range" value="${this.defaultSettings.rate}" min="0.1" max="2" step="0.01" />
|
||||
<label for="system_tts_pitch">Pitch: <span id="system_tts_pitch_output"></span></label>
|
||||
<input id="system_tts_pitch" type="range" value="${this.defaultSettings.pitch}" min="0" max="2" step="0.01" />`;
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
this.settings.rate = Number($('#system_tts_rate').val());
|
||||
this.settings.pitch = Number($('#system_tts_pitch').val());
|
||||
$('#system_tts_pitch_output').text(this.settings.pitch);
|
||||
$('#system_tts_rate_output').text(this.settings.rate);
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Populate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// iOS should only allows speech synthesis trigged by user interaction
|
||||
if (isMobile()) {
|
||||
let hasEnabledVoice = false;
|
||||
|
||||
document.addEventListener('click', () => {
|
||||
if (hasEnabledVoice) {
|
||||
return;
|
||||
}
|
||||
const utterance = new SpeechSynthesisUtterance(' . ');
|
||||
utterance.volume = 0;
|
||||
speechSynthesis.speak(utterance);
|
||||
hasEnabledVoice = true;
|
||||
});
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
$('#system_tts_rate').val(this.settings.rate || this.defaultSettings.rate);
|
||||
$('#system_tts_pitch').val(this.settings.pitch || this.defaultSettings.pitch);
|
||||
|
||||
// Trigger updates
|
||||
$('#system_tts_rate').on('input', () => { this.onSettingsChange(); });
|
||||
$('#system_tts_pitch').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#system_tts_pitch_output').text(this.settings.pitch);
|
||||
$('#system_tts_rate_output').text(this.settings.rate);
|
||||
console.debug('SystemTTS: Settings loaded');
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady() {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return;
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
fetchTtsVoiceObjects() {
|
||||
if (!('speechSynthesis' in window)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return new Promise((resolve) => {
|
||||
setTimeout(() => {
|
||||
const voices = speechSynthesis
|
||||
.getVoices()
|
||||
.sort((a, b) => a.lang.localeCompare(b.lang) || a.name.localeCompare(b.name))
|
||||
.map(x => ({ name: x.name, voice_id: x.voiceURI, preview_url: false, lang: x.lang }));
|
||||
|
||||
resolve(voices);
|
||||
}, 1);
|
||||
});
|
||||
}
|
||||
|
||||
previewTtsVoice(voiceId) {
|
||||
if (!('speechSynthesis' in window)) {
|
||||
throw 'Speech synthesis API is not supported';
|
||||
}
|
||||
|
||||
const voice = speechSynthesis.getVoices().find(x => x.voiceURI === voiceId);
|
||||
|
||||
if (!voice) {
|
||||
throw `TTS Voice id ${voiceId} not found`;
|
||||
}
|
||||
|
||||
speechSynthesis.cancel();
|
||||
const text = getPreviewString(voice.lang);
|
||||
const utterance = new SpeechSynthesisUtterance(text);
|
||||
utterance.voice = voice;
|
||||
utterance.rate = this.settings.rate || 1;
|
||||
utterance.pitch = this.settings.pitch || 1;
|
||||
speechSynthesis.speak(utterance);
|
||||
}
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (!('speechSynthesis' in window)) {
|
||||
return { voice_id: null };
|
||||
}
|
||||
|
||||
const voices = speechSynthesis.getVoices();
|
||||
const match = voices.find(x => x.name == voiceName);
|
||||
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
|
||||
return { voice_id: match.voiceURI, name: match.name };
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
if (!('speechSynthesis' in window)) {
|
||||
throw 'Speech synthesis API is not supported';
|
||||
}
|
||||
|
||||
const silence = await fetch('/sounds/silence.mp3');
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const voices = speechSynthesis.getVoices();
|
||||
const voice = voices.find(x => x.voiceURI === voiceId);
|
||||
const utterance = new SpeechSynthesisUtterance(text);
|
||||
utterance.voice = voice;
|
||||
utterance.rate = this.settings.rate || 1;
|
||||
utterance.pitch = this.settings.pitch || 1;
|
||||
utterance.onend = () => resolve(silence);
|
||||
utterance.onerror = () => reject();
|
||||
speechUtteranceChunker(utterance, {
|
||||
chunkLength: 200,
|
||||
}, function () {
|
||||
//some code to execute when done
|
||||
resolve(silence);
|
||||
console.log('System TTS done');
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
404
public/scripts/extensions/tts/vits.js
Normal file
404
public/scripts/extensions/tts/vits.js
Normal file
@@ -0,0 +1,404 @@
|
||||
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export { VITSTtsProvider };
|
||||
|
||||
class VITSTtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
ready = false;
|
||||
voices = [];
|
||||
separator = '. ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
/**
|
||||
* Perform any text processing before passing to TTS engine.
|
||||
* @param {string} text Input text
|
||||
* @returns {string} Processed text
|
||||
*/
|
||||
processText(text) {
|
||||
return text;
|
||||
}
|
||||
|
||||
audioFormats = ['wav', 'ogg', 'silk', 'mp3', 'flac'];
|
||||
|
||||
languageLabels = {
|
||||
'Auto': 'auto',
|
||||
'Chinese': 'zh',
|
||||
'English': 'en',
|
||||
'Japanese': 'ja',
|
||||
'Korean': 'ko',
|
||||
};
|
||||
|
||||
langKey2LangCode = {
|
||||
'zh': 'zh-CN',
|
||||
'en': 'en-US',
|
||||
'ja': 'ja-JP',
|
||||
'ko': 'ko-KR',
|
||||
};
|
||||
|
||||
modelTypes = {
|
||||
VITS: 'VITS',
|
||||
W2V2_VITS: 'W2V2-VITS',
|
||||
BERT_VITS2: 'BERT-VITS2',
|
||||
};
|
||||
|
||||
defaultSettings = {
|
||||
provider_endpoint: 'http://localhost:23456',
|
||||
format: 'wav',
|
||||
lang: 'auto',
|
||||
length: 1.0,
|
||||
noise: 0.33,
|
||||
noisew: 0.4,
|
||||
segment_size: 50,
|
||||
streaming: false,
|
||||
dim_emotion: 0,
|
||||
sdp_ratio: 0.2,
|
||||
emotion: 0,
|
||||
text_prompt: '',
|
||||
style_text: '',
|
||||
style_weight: 1,
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<label for="vits_lang">Text Language</label>
|
||||
<select id="vits_lang">`;
|
||||
|
||||
for (let language in this.languageLabels) {
|
||||
if (this.languageLabels[language] == this.settings?.lang) {
|
||||
html += `<option value="${this.languageLabels[language]}" selected="selected">${language}</option>`;
|
||||
continue;
|
||||
}
|
||||
html += `<option value="${this.languageLabels[language]}">${language}</option>`;
|
||||
}
|
||||
|
||||
html += `
|
||||
</select>
|
||||
<label>VITS / W2V2-VITS / Bert-VITS2 Settings:</label><br/>
|
||||
<label for="vits_endpoint">Provider Endpoint:</label>
|
||||
<input id="vits_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/>
|
||||
<span>Use <a target="_blank" href="https://github.com/Artrajz/vits-simple-api">vits-simple-api</a>.</span><br/>
|
||||
|
||||
<label for="vits_format">Audio format:</label>
|
||||
<select id="vits_format">`;
|
||||
|
||||
for (let format of this.audioFormats) {
|
||||
if (format == this.settings?.format) {
|
||||
html += `<option value="${format}" selected="selected">${format}</option>`;
|
||||
continue;
|
||||
}
|
||||
html += `<option value="${format}">${format}</option>`;
|
||||
}
|
||||
|
||||
html += `
|
||||
</select>
|
||||
<label for="vits_length">Audio length: <span id="vits_length_output">${this.defaultSettings.length}</span></label>
|
||||
<input id="vits_length" type="range" value="${this.defaultSettings.length}" min="0.0" max="5" step="0.01" />
|
||||
|
||||
<label for="vits_noise">Noise: <span id="vits_noise_output">${this.defaultSettings.noise}</span></label>
|
||||
<input id="vits_noise" type="range" value="${this.defaultSettings.noise}" min="0.1" max="2" step="0.01" />
|
||||
|
||||
<label for="vits_noisew">SDP noise: <span id="vits_noisew_output">${this.defaultSettings.noisew}</span></label>
|
||||
<input id="vits_noisew" type="range" value="${this.defaultSettings.noisew}" min="0.1" max="2" step="0.01" />
|
||||
|
||||
<label for="vits_segment_size">Segment Size: <span id="vits_segment_size_output">${this.defaultSettings.segment_size}</span></label>
|
||||
<input id="vits_segment_size" type="range" value="${this.defaultSettings.segment_size}" min="0" max="1000" step="1" />
|
||||
|
||||
<label for="vits_streaming" class="checkbox_label">
|
||||
<input id="vits_streaming" type="checkbox" />
|
||||
<span>Streaming</span>
|
||||
</label>
|
||||
|
||||
<label>W2V2-VITS Settings:</label><br/>
|
||||
<label for="vits_dim_emotion">Dimensional emotion:</label>
|
||||
<input id="vits_dim_emotion" type="number" class="text_pole" min="0" max="5457" step="1" value="${this.defaultSettings.dim_emotion}"/>
|
||||
|
||||
<label>BERT-VITS2 Settings:</label><br/>
|
||||
<label for="vits_sdp_ratio">sdp_ratio: <span id="vits_sdp_ratio_output">${this.defaultSettings.sdp_ratio}</span></label>
|
||||
<input id="vits_sdp_ratio" type="range" value="${this.defaultSettings.sdp_ratio}" min="0.0" max="1" step="0.01" />
|
||||
|
||||
<label for="vits_emotion">emotion: <span id="vits_emotion_output">${this.defaultSettings.emotion}</span></label>
|
||||
<input id="vits_emotion" type="range" value="${this.defaultSettings.emotion}" min="0" max="9" step="1" />
|
||||
|
||||
<label for="vits_text_prompt">Text Prompt:</label>
|
||||
<input id="vits_text_prompt" type="text" class="text_pole" maxlength="512" value="${this.defaultSettings.text_prompt}"/>
|
||||
|
||||
<label for="vits_style_text">Style text:</label>
|
||||
<input id="vits_style_text" type="text" class="text_pole" maxlength="512" value="${this.defaultSettings.style_text}"/>
|
||||
|
||||
<label for="vits_style_weight">Style weight <span id="vits_style_weight_output">${this.defaultSettings.style_weight}</span></label>
|
||||
<input id="vits_style_weight" type="range" value="${this.defaultSettings.style_weight}" min="0" max="1" step="0.01" />
|
||||
`;
|
||||
|
||||
return html;
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Used when provider settings are updated from UI
|
||||
this.settings.provider_endpoint = $('#vits_endpoint').val();
|
||||
this.settings.lang = $('#vits_lang').val();
|
||||
this.settings.format = $('#vits_format').val();
|
||||
this.settings.dim_emotion = $('#vits_dim_emotion').val();
|
||||
this.settings.text_prompt = $('#vits_text_prompt').val();
|
||||
this.settings.style_text = $('#vits_style_text').val();
|
||||
|
||||
// Update the default TTS settings based on input fields
|
||||
this.settings.length = $('#vits_length').val();
|
||||
this.settings.noise = $('#vits_noise').val();
|
||||
this.settings.noisew = $('#vits_noisew').val();
|
||||
this.settings.segment_size = $('#vits_segment_size').val();
|
||||
this.settings.streaming = $('#vits_streaming').is(':checked');
|
||||
this.settings.sdp_ratio = $('#vits_sdp_ratio').val();
|
||||
this.settings.emotion = $('#vits_emotion').val();
|
||||
this.settings.style_weight = $('#vits_style_weight').val();
|
||||
|
||||
// Update the UI to reflect changes
|
||||
$('#vits_length_output').text(this.settings.length);
|
||||
$('#vits_noise_output').text(this.settings.noise);
|
||||
$('#vits_noisew_output').text(this.settings.noisew);
|
||||
$('#vits_segment_size_output').text(this.settings.segment_size);
|
||||
$('#vits_sdp_ratio_output').text(this.settings.sdp_ratio);
|
||||
$('#vits_emotion_output').text(this.settings.emotion);
|
||||
$('#vits_style_weight_output').text(this.settings.style_weight);
|
||||
|
||||
saveTtsProviderSettings();
|
||||
this.changeTTSSettings();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Pupulate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
console.debug(`Ignoring non-user-configurable setting: ${key}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Set initial values from the settings
|
||||
$('#vits_endpoint').val(this.settings.provider_endpoint);
|
||||
$('#vits_lang').val(this.settings.lang);
|
||||
$('#vits_format').val(this.settings.format);
|
||||
$('#vits_length').val(this.settings.length);
|
||||
$('#vits_noise').val(this.settings.noise);
|
||||
$('#vits_noisew').val(this.settings.noisew);
|
||||
$('#vits_segment_size').val(this.settings.segment_size);
|
||||
$('#vits_streaming').prop('checked', this.settings.streaming);
|
||||
$('#vits_dim_emotion').val(this.settings.dim_emotion);
|
||||
$('#vits_sdp_ratio').val(this.settings.sdp_ratio);
|
||||
$('#vits_emotion').val(this.settings.emotion);
|
||||
$('#vits_text_prompt').val(this.settings.text_prompt);
|
||||
$('#vits_style_text').val(this.settings.style_text);
|
||||
$('#vits_style_weight').val(this.settings.style_weight);
|
||||
|
||||
// Update the UI to reflect changes
|
||||
$('#vits_length_output').text(this.settings.length);
|
||||
$('#vits_noise_output').text(this.settings.noise);
|
||||
$('#vits_noisew_output').text(this.settings.noisew);
|
||||
$('#vits_segment_size_output').text(this.settings.segment_size);
|
||||
$('#vits_sdp_ratio_output').text(this.settings.sdp_ratio);
|
||||
$('#vits_emotion_output').text(this.settings.emotion);
|
||||
$('#vits_style_weight_output').text(this.settings.style_weight);
|
||||
|
||||
// Register input/change event listeners to update settings on user interaction
|
||||
$('#vits_endpoint').on('input', () => { this.onSettingsChange(); });
|
||||
$('#vits_lang').on('change', () => { this.onSettingsChange(); });
|
||||
$('#vits_format').on('change', () => { this.onSettingsChange(); });
|
||||
$('#vits_length').on('change', () => { this.onSettingsChange(); });
|
||||
$('#vits_noise').on('change', () => { this.onSettingsChange(); });
|
||||
$('#vits_noisew').on('change', () => { this.onSettingsChange(); });
|
||||
$('#vits_segment_size').on('change', () => { this.onSettingsChange(); });
|
||||
$('#vits_streaming').on('change', () => { this.onSettingsChange(); });
|
||||
$('#vits_dim_emotion').on('change', () => { this.onSettingsChange(); });
|
||||
$('#vits_sdp_ratio').on('change', () => { this.onSettingsChange(); });
|
||||
$('#vits_emotion').on('change', () => { this.onSettingsChange(); });
|
||||
$('#vits_text_prompt').on('change', () => { this.onSettingsChange(); });
|
||||
$('#vits_style_text').on('change', () => { this.onSettingsChange(); });
|
||||
$('#vits_style_weight').on('change', () => { this.onSettingsChange(); });
|
||||
|
||||
await this.checkReady();
|
||||
|
||||
console.info('VITS: Settings loaded');
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady() {
|
||||
await Promise.allSettled([this.fetchTtsVoiceObjects(), this.changeTTSSettings()]);
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return;
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
v => v.name == voiceName,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async getVoiceById(voiceId) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
v => v.voice_id == voiceId,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice id ${voiceId} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
const response = await fetch(`${this.settings.provider_endpoint}/voice/speakers`);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.json()}`);
|
||||
}
|
||||
const jsonData = await response.json();
|
||||
const voices = [];
|
||||
|
||||
const addVoices = (modelType) => {
|
||||
jsonData[modelType].forEach(voice => {
|
||||
voices.push({
|
||||
name: `[${modelType}] ${voice.name} (${voice.lang})`,
|
||||
voice_id: `${modelType}&${voice.id}`,
|
||||
preview_url: false,
|
||||
lang: voice.lang,
|
||||
});
|
||||
});
|
||||
};
|
||||
for (const key in this.modelTypes) {
|
||||
addVoices(this.modelTypes[key]);
|
||||
}
|
||||
|
||||
this.voices = voices; // Assign to the class property
|
||||
return voices; // Also return this list
|
||||
}
|
||||
|
||||
// Each time a parameter is changed, we change the configuration
|
||||
async changeTTSSettings() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch TTS generation from the API.
|
||||
* @param {string} inputText Text to generate TTS for
|
||||
* @param {string} voiceId Voice ID to use (model_type&speaker_id))
|
||||
* @returns {Promise<Response|string>} Fetch response
|
||||
*/
|
||||
async fetchTtsGeneration(inputText, voiceId, lang = null, forceNoStreaming = false) {
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
|
||||
const streaming = !forceNoStreaming && this.settings.streaming;
|
||||
const [model_type, speaker_id] = voiceId.split('&');
|
||||
const params = new URLSearchParams();
|
||||
params.append('text', inputText);
|
||||
params.append('id', speaker_id);
|
||||
if (streaming) {
|
||||
params.append('streaming', streaming);
|
||||
// Streaming response only supports MP3
|
||||
}
|
||||
else {
|
||||
params.append('format', this.settings.format);
|
||||
}
|
||||
params.append('lang', lang ?? this.settings.lang);
|
||||
params.append('length', this.settings.length);
|
||||
params.append('noise', this.settings.noise);
|
||||
params.append('noisew', this.settings.noisew);
|
||||
params.append('segment_size', this.settings.segment_size);
|
||||
|
||||
if (model_type == this.modelTypes.W2V2_VITS) {
|
||||
params.append('emotion', this.settings.dim_emotion);
|
||||
}
|
||||
else if (model_type == this.modelTypes.BERT_VITS2) {
|
||||
params.append('sdp_ratio', this.settings.sdp_ratio);
|
||||
params.append('emotion', this.settings.emotion);
|
||||
if (this.settings.text_prompt) {
|
||||
params.append('text_prompt', this.settings.text_prompt);
|
||||
}
|
||||
if (this.settings.style_text) {
|
||||
params.append('style_text', this.settings.style_text);
|
||||
params.append('style_weight', this.settings.style_weight);
|
||||
}
|
||||
}
|
||||
|
||||
const url = `${this.settings.provider_endpoint}/voice/${model_type.toLowerCase()}`;
|
||||
|
||||
if (streaming) {
|
||||
return url + `?${params.toString()}`;
|
||||
}
|
||||
|
||||
const response = await fetch(
|
||||
url,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
},
|
||||
body: params,
|
||||
},
|
||||
);
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
/**
|
||||
* Preview TTS for a given voice ID.
|
||||
* @param {string} id Voice ID
|
||||
*/
|
||||
async previewTtsVoice(id) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.currentTime = 0;
|
||||
const voice = await this.getVoiceById(id);
|
||||
const lang = voice.lang.includes(this.settings.lang) ? this.settings.lang : voice.lang[0];
|
||||
|
||||
let lang_code = this.langKey2LangCode[lang];
|
||||
const text = getPreviewString(lang_code);
|
||||
const response = await this.fetchTtsGeneration(text, id, lang, true);
|
||||
if (typeof response != 'string') {
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
this.audioElement.src = url;
|
||||
this.audioElement.play();
|
||||
}
|
||||
}
|
||||
|
||||
// Interface not used
|
||||
async fetchTtsFromHistory(history_item_id) {
|
||||
return Promise.resolve(history_item_id);
|
||||
}
|
||||
}
|
327
public/scripts/extensions/tts/xtts.js
Normal file
327
public/scripts/extensions/tts/xtts.js
Normal file
@@ -0,0 +1,327 @@
|
||||
import { doExtrasFetch, getApiUrl, modules } from '../../extensions.js';
|
||||
import { saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export { XTTSTtsProvider };
|
||||
|
||||
class XTTSTtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
ready = false;
|
||||
voices = [];
|
||||
separator = '. ';
|
||||
|
||||
/**
|
||||
* Perform any text processing before passing to TTS engine.
|
||||
* @param {string} text Input text
|
||||
* @returns {string} Processed text
|
||||
*/
|
||||
processText(text) {
|
||||
// Replace fancy ellipsis with "..."
|
||||
text = text.replace(/…/g, '...');
|
||||
// Remove quotes
|
||||
text = text.replace(/["“”‘’]/g, '');
|
||||
// Replace multiple "." with single "."
|
||||
text = text.replace(/\.+/g, '.');
|
||||
return text;
|
||||
}
|
||||
|
||||
languageLabels = {
|
||||
'Arabic': 'ar',
|
||||
'Brazilian Portuguese': 'pt',
|
||||
'Chinese': 'zh-cn',
|
||||
'Czech': 'cs',
|
||||
'Dutch': 'nl',
|
||||
'English': 'en',
|
||||
'French': 'fr',
|
||||
'German': 'de',
|
||||
'Italian': 'it',
|
||||
'Polish': 'pl',
|
||||
'Russian': 'ru',
|
||||
'Spanish': 'es',
|
||||
'Turkish': 'tr',
|
||||
'Japanese': 'ja',
|
||||
'Korean': 'ko',
|
||||
'Hungarian': 'hu',
|
||||
'Hindi': 'hi',
|
||||
};
|
||||
|
||||
defaultSettings = {
|
||||
provider_endpoint: 'http://localhost:8020',
|
||||
language: 'en',
|
||||
temperature: 0.75,
|
||||
length_penalty: 1.0,
|
||||
repetition_penalty: 5.0,
|
||||
top_k: 50,
|
||||
top_p: 0.85,
|
||||
speed: 1,
|
||||
enable_text_splitting: true,
|
||||
stream_chunk_size: 100,
|
||||
voiceMap: {},
|
||||
streaming: false,
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<label for="xtts_api_language">Language</label>
|
||||
<select id="xtts_api_language">`;
|
||||
|
||||
for (let language in this.languageLabels) {
|
||||
if (this.languageLabels[language] == this.settings?.language) {
|
||||
html += `<option value="${this.languageLabels[language]}" selected="selected">${language}</option>`;
|
||||
continue;
|
||||
}
|
||||
|
||||
html += `<option value="${this.languageLabels[language]}">${language}</option>`;
|
||||
}
|
||||
|
||||
html += `
|
||||
</select>
|
||||
<label">XTTS Settings:</label><br/>
|
||||
<label for="xtts_tts_endpoint">Provider Endpoint:</label>
|
||||
<input id="xtts_tts_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/>
|
||||
<span>Use <a target="_blank" href="https://github.com/daswer123/xtts-api-server">XTTSv2 TTS Server</a>.</span>
|
||||
<label for="xtts_tts_streaming" class="checkbox_label">
|
||||
<input id="xtts_tts_streaming" type="checkbox" />
|
||||
<span>Streaming <small>(RVC not supported)</small></span>
|
||||
</label>
|
||||
<label for="xtts_speed">Speed: <span id="xtts_tts_speed_output">${this.defaultSettings.speed}</span></label>
|
||||
<input id="xtts_speed" type="range" value="${this.defaultSettings.speed}" min="0.5" max="2" step="0.01" />
|
||||
|
||||
<label for="xtts_temperature">Temperature: <span id="xtts_tts_temperature_output">${this.defaultSettings.temperature}</span></label>
|
||||
<input id="xtts_temperature" type="range" value="${this.defaultSettings.temperature}" min="0.01" max="1" step="0.01" />
|
||||
|
||||
<label for="xtts_length_penalty">Length Penalty: <span id="xtts_length_penalty_output">${this.defaultSettings.length_penalty}</span></label>
|
||||
<input id="xtts_length_penalty" type="range" value="${this.defaultSettings.length_penalty}" min="0.5" max="2" step="0.1" />
|
||||
|
||||
<label for="xtts_repetition_penalty">Repetition Penalty: <span id="xtts_repetition_penalty_output">${this.defaultSettings.repetition_penalty}</span></label>
|
||||
<input id="xtts_repetition_penalty" type="range" value="${this.defaultSettings.repetition_penalty}" min="1" max="10" step="0.1" />
|
||||
|
||||
<label for="xtts_top_k">Top K: <span id="xtts_top_k_output">${this.defaultSettings.top_k}</span></label>
|
||||
<input id="xtts_top_k" type="range" value="${this.defaultSettings.top_k}" min="0" max="100" step="1" />
|
||||
|
||||
<label for="xtts_top_p">Top P: <span id="xtts_top_p_output">${this.defaultSettings.top_p}</span></label>
|
||||
<input id="xtts_top_p" type="range" value="${this.defaultSettings.top_p}" min="0" max="1" step="0.01" />
|
||||
|
||||
<label for="xtts_stream_chunk_size">Stream Chunk Size: <span id="xtts_stream_chunk_size_output">${this.defaultSettings.stream_chunk_size}</span></label>
|
||||
<input id="xtts_stream_chunk_size" type="range" value="${this.defaultSettings.stream_chunk_size}" min="100" max="400" step="1" />
|
||||
|
||||
<label for="xtts_enable_text_splitting" class="checkbox_label">
|
||||
<input id="xtts_enable_text_splitting" type="checkbox" ${this.defaultSettings.enable_text_splitting ? 'checked' : ''} />
|
||||
Enable Text Splitting
|
||||
</label>
|
||||
`;
|
||||
|
||||
return html;
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Used when provider settings are updated from UI
|
||||
this.settings.provider_endpoint = $('#xtts_tts_endpoint').val();
|
||||
this.settings.language = $('#xtts_api_language').val();
|
||||
|
||||
// Update the default TTS settings based on input fields
|
||||
this.settings.speed = $('#xtts_speed').val();
|
||||
this.settings.temperature = $('#xtts_temperature').val();
|
||||
this.settings.length_penalty = $('#xtts_length_penalty').val();
|
||||
this.settings.repetition_penalty = $('#xtts_repetition_penalty').val();
|
||||
this.settings.top_k = $('#xtts_top_k').val();
|
||||
this.settings.top_p = $('#xtts_top_p').val();
|
||||
this.settings.stream_chunk_size = $('#xtts_stream_chunk_size').val();
|
||||
this.settings.enable_text_splitting = $('#xtts_enable_text_splitting').is(':checked');
|
||||
this.settings.streaming = $('#xtts_tts_streaming').is(':checked');
|
||||
|
||||
// Update the UI to reflect changes
|
||||
$('#xtts_tts_speed_output').text(this.settings.speed);
|
||||
$('#xtts_tts_temperature_output').text(this.settings.temperature);
|
||||
$('#xtts_length_penalty_output').text(this.settings.length_penalty);
|
||||
$('#xtts_repetition_penalty_output').text(this.settings.repetition_penalty);
|
||||
$('#xtts_top_k_output').text(this.settings.top_k);
|
||||
$('#xtts_top_p_output').text(this.settings.top_p);
|
||||
$('#xtts_stream_chunk_size_output').text(this.settings.stream_chunk_size);
|
||||
|
||||
saveTtsProviderSettings();
|
||||
this.changeTTSSettings();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Pupulate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
const apiCheckInterval = setInterval(() => {
|
||||
// Use Extras API if TTS support is enabled
|
||||
if (modules.includes('tts') || modules.includes('xtts-tts')) {
|
||||
const baseUrl = new URL(getApiUrl());
|
||||
baseUrl.pathname = '/api/tts';
|
||||
this.settings.provider_endpoint = baseUrl.toString();
|
||||
$('#xtts_tts_endpoint').val(this.settings.provider_endpoint);
|
||||
clearInterval(apiCheckInterval);
|
||||
}
|
||||
}, 2000);
|
||||
|
||||
// Set initial values from the settings
|
||||
$('#xtts_tts_endpoint').val(this.settings.provider_endpoint);
|
||||
$('#xtts_api_language').val(this.settings.language);
|
||||
$('#xtts_speed').val(this.settings.speed);
|
||||
$('#xtts_temperature').val(this.settings.temperature);
|
||||
$('#xtts_length_penalty').val(this.settings.length_penalty);
|
||||
$('#xtts_repetition_penalty').val(this.settings.repetition_penalty);
|
||||
$('#xtts_top_k').val(this.settings.top_k);
|
||||
$('#xtts_top_p').val(this.settings.top_p);
|
||||
$('#xtts_enable_text_splitting').prop('checked', this.settings.enable_text_splitting);
|
||||
$('#xtts_stream_chunk_size').val(this.settings.stream_chunk_size);
|
||||
$('#xtts_tts_streaming').prop('checked', this.settings.streaming);
|
||||
|
||||
// Update the UI to reflect changes
|
||||
$('#xtts_tts_speed_output').text(this.settings.speed);
|
||||
$('#xtts_tts_temperature_output').text(this.settings.temperature);
|
||||
$('#xtts_length_penalty_output').text(this.settings.length_penalty);
|
||||
$('#xtts_repetition_penalty_output').text(this.settings.repetition_penalty);
|
||||
$('#xtts_top_k_output').text(this.settings.top_k);
|
||||
$('#xtts_top_p_output').text(this.settings.top_p);
|
||||
$('#xtts_stream_chunk_size_output').text(this.settings.stream_chunk_size);
|
||||
|
||||
// Register input/change event listeners to update settings on user interaction
|
||||
$('#xtts_tts_endpoint').on('input', () => { this.onSettingsChange(); });
|
||||
$('#xtts_api_language').on('change', () => { this.onSettingsChange(); });
|
||||
$('#xtts_speed').on('input', () => { this.onSettingsChange(); });
|
||||
$('#xtts_temperature').on('input', () => { this.onSettingsChange(); });
|
||||
$('#xtts_length_penalty').on('input', () => { this.onSettingsChange(); });
|
||||
$('#xtts_repetition_penalty').on('input', () => { this.onSettingsChange(); });
|
||||
$('#xtts_top_k').on('input', () => { this.onSettingsChange(); });
|
||||
$('#xtts_top_p').on('input', () => { this.onSettingsChange(); });
|
||||
$('#xtts_enable_text_splitting').on('change', () => { this.onSettingsChange(); });
|
||||
$('#xtts_stream_chunk_size').on('input', () => { this.onSettingsChange(); });
|
||||
$('#xtts_tts_streaming').on('change', () => { this.onSettingsChange(); });
|
||||
|
||||
await this.checkReady();
|
||||
|
||||
console.debug('XTTS: Settings loaded');
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady() {
|
||||
await Promise.allSettled([this.fetchTtsVoiceObjects(), this.changeTTSSettings()]);
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return;
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
XTTSVoice => XTTSVoice.name == voiceName,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
const response = await doExtrasFetch(`${this.settings.provider_endpoint}/speakers`);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.json()}`);
|
||||
}
|
||||
const responseJson = await response.json();
|
||||
return responseJson;
|
||||
}
|
||||
|
||||
// Each time a parameter is changed, we change the configuration
|
||||
async changeTTSSettings() {
|
||||
if (!this.settings.provider_endpoint) {
|
||||
return;
|
||||
}
|
||||
|
||||
const response = await doExtrasFetch(
|
||||
`${this.settings.provider_endpoint}/set_tts_settings`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Cache-Control': 'no-cache',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
'temperature': this.settings.temperature,
|
||||
'speed': this.settings.speed,
|
||||
'length_penalty': this.settings.length_penalty,
|
||||
'repetition_penalty': this.settings.repetition_penalty,
|
||||
'top_p': this.settings.top_p,
|
||||
'top_k': this.settings.top_k,
|
||||
'enable_text_splitting': this.settings.enable_text_splitting,
|
||||
'stream_chunk_size': this.settings.stream_chunk_size,
|
||||
}),
|
||||
},
|
||||
);
|
||||
return response;
|
||||
}
|
||||
|
||||
async fetchTtsGeneration(inputText, voiceId) {
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
|
||||
if (this.settings.streaming) {
|
||||
const params = new URLSearchParams();
|
||||
params.append('text', inputText);
|
||||
params.append('speaker_wav', voiceId);
|
||||
params.append('language', this.settings.language);
|
||||
return `${this.settings.provider_endpoint}/tts_stream/?${params.toString()}`;
|
||||
}
|
||||
|
||||
const response = await doExtrasFetch(
|
||||
`${this.settings.provider_endpoint}/tts_to_audio/`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Cache-Control': 'no-cache', // Added this line to disable caching of file so new files are always played - Rolyat 7/7/23
|
||||
},
|
||||
body: JSON.stringify({
|
||||
'text': inputText,
|
||||
'speaker_wav': voiceId,
|
||||
'language': this.settings.language,
|
||||
}),
|
||||
},
|
||||
);
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
// Interface not used by XTTS TTS
|
||||
async fetchTtsFromHistory(history_item_id) {
|
||||
return Promise.resolve(history_item_id);
|
||||
}
|
||||
|
||||
}
|
Reference in New Issue
Block a user