summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSolomon Peachy <pizza@shaftnet.org>2024-04-16 18:35:09 -0400
committerSolomon Peachy <pizza@shaftnet.org>2024-04-16 18:37:43 -0400
commitc8dd31aab79a5a470c95b6253f147e919f8422bc (patch)
tree0f554f1f86743d8eacb976e12682816eca3c89e5
parent9af812c320ca7273cf4a8748d5d945a435e23d90 (diff)
downloadrockbox-c8dd31aab7.tar.gz
rockbox-c8dd31aab7.zip
voice: Fix the 'gtts' voice generation backend.
* Language and dialect need to be specified separately * Convert the mp3 files generated by gtts into wav into rbspeex (Uses ffmpeg currently) Change-Id: I6d7b9494e70a61537519221522202ea28469cc70
-rw-r--r--tools/builds.pm4
-rwxr-xr-xtools/voice.pl55
2 files changed, 33 insertions, 26 deletions
diff --git a/tools/builds.pm b/tools/builds.pm
index e11b527a1f..0c83548cf2 100644
--- a/tools/builds.pm
+++ b/tools/builds.pm
@@ -536,7 +536,7 @@ sub allbuilds {
'engines' => {
'festival' => '--language english',
'espeak' => '-ven-gb -k 5',
- 'gtts' => '-l en-gb',
+ 'gtts' => '-l en -t co.uk',
},
'enabled' => 1,
},
@@ -571,7 +571,7 @@ sub allbuilds {
'engines' => {
'festival' => '--language english',
'espeak' => '-ven-us -k 5',
- 'gtts' => '-l en-us',
+ 'gtts' => '-l en -t us',
},
'enabled' => 1,
},
diff --git a/tools/voice.pl b/tools/voice.pl
index 86a018e096..0718e517ff 100755
--- a/tools/voice.pl
+++ b/tools/voice.pl
@@ -81,24 +81,24 @@ my %festival_lang_map = (
);
my %gtts_lang_map = (
- 'english' => 'en-gb', # Always first, it's the golden master
- 'czech' => 'cs', # not supported
- 'dansk' => 'da',
- 'deutsch' => 'de',
- 'english-us' => 'en-us',
- 'espanol' => 'es-es',
- 'francais' => 'fr-fr',
- 'greek' => 'el',
- 'magyar' => 'hu',
- 'italiano' => 'it',
- 'nederlands' => 'nl',
- 'norsk' => 'no',
- 'polski' => 'pl',
- 'russian' => 'ru',
- 'slovak' => 'sk',
- 'srpski' => 'sr',
- 'svenska' => 'sv',
- 'turkce' => 'tr',
+ 'english' => '-l en -t co.uk', # Always first, it's the golden master
+ 'czech' => '-l cs', # not supported
+ 'dansk' => '-l da',
+ 'deutsch' => '-l de',
+ 'english-us' => '-l en -t us',
+ 'espanol' => '-l es',
+ 'francais' => '-l fr',
+ 'greek' => '-l el',
+ 'magyar' => '-l hu',
+ 'italiano' => '-l it',
+ 'nederlands' => '-l nl',
+ 'norsk' => '-l no',
+ 'polski' => '-l pl',
+ 'russian' => '-l ru',
+ 'slovak' => '-l sk',
+ 'srpski' => '-l sr',
+ 'svenska' => '-l sv',
+ 'turkce' => '-l tr',
);
my %espeak_lang_map = (
@@ -167,7 +167,7 @@ sub init_tts {
} elsif ($tts_engine eq 'gtts') {
$ret{"format"} = 'mp3';
if (defined($gtts_lang_map{$language}) && $tts_engine_opts !~ /-l/) {
- $ret{"ttsoptions"} = "-l $gtts_lang_map{$language} ";
+ $ret{"ttsoptions"} = " $gtts_lang_map{$language} ";
}
} elsif ($tts_engine eq 'espeak' || $tts_engine eq 'espeak-ng') {
if (defined($espeak_lang_map{$language}) && $tts_engine_opts !~ /-v/) {
@@ -403,7 +403,8 @@ sub generateclips {
$voice = $1;
if ($id !~ /^NOT_USED_.*$/ && $voice ne "") {
my $wav = $id . '.wav';
- my $enc = $id . '.mp3';
+ my $enc = $id . '.enc';
+ my $format = $tts_object->{'format'};
# Print some progress information
if (++$i % 10 == 0 and !$verbose) {
@@ -415,7 +416,7 @@ sub generateclips {
# If we have a pool of snippets, see if the string exists there first
if (defined($ENV{'POOL'})) {
- $pool_file = sprintf("%s/%s-%s.mp3", $ENV{'POOL'},
+ $pool_file = sprintf("%s/%s-%s.enc", $ENV{'POOL'},
md5_hex(Encode::encode_utf8("$voice $tts_engine $tts_engine_opts $encoder_opts")),
$language);
if (-f $pool_file) {
@@ -431,12 +432,18 @@ sub generateclips {
copy(dirname($0)."/VOICE_PAUSE.wav", $wav);
} else {
voicestring($voice, $wav, $tts_engine_opts, $tts_object);
- if ($tts_object->{'format'} eq "wav") {
+ if ($format eq "wav") {
wavtrim($wav, 500, $tts_object);
# 500 seems to be a reasonable default for now
}
}
- if ($tts_object->{'format'} eq "wav" || $id eq "VOICE_PAUSE") {
+ # Convert from mp3 to wav so we can use rbspeex
+ if ($format eq "mp3") {
+ system("ffmpeg -loglevel 0 -i $wav $id$wav");
+ rename("$id$wav","$wav");
+ $format = "wav";
+ }
+ if ($format eq "wav" || $id eq "VOICE_PAUSE") {
encodewav($wav, $enc, $encoder, $encoder_opts, $tts_object);
} else {
copy($wav, $enc);
@@ -483,7 +490,7 @@ sub createvoice {
}
sub deleteencs() {
- for (glob('*.mp3')) {
+ for (glob('*.enc')) {
unlink($_);
}
for (glob('*.wav')) {