summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorDominik Riebeling <Dominik.Riebeling@gmail.com>2011-03-02 18:29:38 +0000
committerDominik Riebeling <Dominik.Riebeling@gmail.com>2011-03-02 18:29:38 +0000
commit7ad78222c45e2056edd29c16034bb6109ebef45b (patch)
treea06ccfd5f12038c0d61acfb192cd97c9c1319f2d /tools
parent1f77d091a5ce98c10e263dfdb18f2939aeb21a55 (diff)
downloadrockbox-7ad78222c45e2056edd29c16034bb6109ebef45b.tar.gz
rockbox-7ad78222c45e2056edd29c16034bb6109ebef45b.zip
FS#11913: Separate TTS correction expressions into separate file.
voice.pl will now read the TTS correction expressions from a file tools/voice-corrections.txt which includes regular expressions for adjusting the string. This makes it easier to adjust the corrections and allows integrating them into tools like Rockbox Utility. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@29500 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'tools')
-rw-r--r--tools/voice-corrections.txt92
-rwxr-xr-xtools/voice.pl138
2 files changed, 130 insertions, 100 deletions
diff --git a/tools/voice-corrections.txt b/tools/voice-corrections.txt
new file mode 100644
index 0000000000..26d2c031cf
--- /dev/null
+++ b/tools/voice-corrections.txt
@@ -0,0 +1,92 @@
+ __________ __ ___.
+ Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ \/ \/ \/ \/ \/
+ $Id$
+
+
+ Voice string corrections for voice.pl to support TTS engines.
+ The list items are separated by the separator that is defined by the first
+ character on the line. If the first character is a whitespace the line will
+ get treated as comment.
+
+ Format:
+ /language/engine/vendor/string/replacement/
+
+ Where / is the separator, and all strings are Perl regexes.
+ Empty lines and lines starting with a whitespace are ignored, for all other
+ lines the first character will become the separator.
+
+ General for all engines and languages
+
+/.*/.*/.*/USB/U S B/g
+/.*/.*/.*/ID3/I D 3/g
+ English
+/english/(sapi|festival)/.*/plugin(s?)/plug-in$1/ig
+/english/festival/.*//\ba\b/ay/ig
+/english/festival/.*//$/./
+
+ German (deutsch)
+
+/deutsch/.*/.*/alkaline/alkalein/ig
+/deutsch/.*/.*/byte(s?)/beit$1/ig
+/deutsch/.*/.*/clip(s?)/klipp$1/ig
+/deutsch/.*/.*/\bcover/kawwer/ig
+/deutsch/.*/.*/cuesheet/kjuschiet/ig
+/deutsch/.*/.*/dither/didder/ig
+/deutsch/.*/.*/equalizer/iquileiser/ig
+/deutsch/.*/.*/\bflash\b/fläsh/ig
+/deutsch/.*/.*/\bfirmware(s?)\b/firmwer$1/ig
+/deutsch/.*/.*/\bI D 3 tag\b/I D 3 täg/ig
+/deutsch/.*/.*/\bloudness\b/laudness/ig
+/deutsch/.*/.*/\bunicode\b/unikod/ig
+/deutsch/sapi/AT&T Labs/alphabet/alfabet/ig;
+/deutsch/sapi/AT&T Labs/ampere/amper/ig;
+/deutsch/sapi/AT&T Labs/\bdezibel\b/de-zibell/ig;
+/deutsch/sapi/AT&T Labs/diddering/didde-ring/ig;
+/deutsch/sapi/AT&T Labs/energie\b/ener-gie/ig;
+/deutsch/sapi/AT&T Labs/\Blauf\b/-lauf/ig;
+/deutsch/sapi/AT&T Labs/\bnumerisch\b/numehrisch/ig;
+
+ Swedish (svenska)
+ for all swedish engines (e.g. for english words)
+
+/svenska/.*/.*/kilobyte/kilobajt/ig
+/svenska/.*/.*/megabyte/megabajt/ig
+/svenska/.*/.*/gigabyte/gigabajt/ig
+/svenska/.*/.*/\bloudness\b/laudness/ig
+/svenska/espeak/.*/ampere/ampär/ig
+/svenska/espeak/.*/bokmärken/bok-märken/ig
+/svenska/espeak/.*/generella/schenerella/ig
+/svenska/espeak/.*/dithering/diddering/ig
+/svenska/espeak/.*/\bunicode\b/jynikod/ig
+/svenska/espeak/.*/uttoning/utoning/ig
+/svenska/espeak/.*/procent/pro-cent/ig
+/svenska/espeak/.*/spellistor/spelistor/ig
+/svenska/espeak/.*/cuesheet/qjyschiit/ig
+
+ Italian (italiano)
+ for all italian engines (e.g. for english words)
+
+/italiano/.*/.*/Replaygain/Ripleyghein/ig
+/italiano/.*/.*/Crossfade/Crossfeid/ig
+/italiano/.*/.*/beep/Bip/ig
+/italiano/.*/.*/cuesheet/chiushit/ig
+/italiano/.*/.*/fade/feid/ig
+/italiano/.*/.*/Crossfeed/crossfid/ig
+/italiano/.*/.*/Cache/chash/ig
+/italiano/.*/.*/\bfirmware(s?)\b/firmuer$1/ig
+/italiano/.*/.*/\bFile(s?)\b/fail$1/ig
+/italiano/.*/.*/\bloudness\b/laudness/ig
+/italiano/.*/.*/\bunicode\b/unikod/ig
+/italiano/.*/.*/Playlist/pleylist/ig
+/italiano/.*/.*/WavPack/wave pak/ig
+/italiano/.*/.*/BITRATE/bit reit/ig
+/italiano/.*/.*/Codepage/cod page/ig
+/italiano/.*/.*/PCM Wave/pcm Ue'iv/ig
+/italiano/sapi/Loquendo/Inizializza/inizializa/ig
+/italiano/sapi/ScanSoft, Inc/V/v/ig
+/italiano/sapi/ScanSoft, Inc/X/x/ig
+/italiano/sapi/ScanSoft, Inc/stop/stohp/ig
diff --git a/tools/voice.pl b/tools/voice.pl
index 32db75c5e1..ee68c30eb4 100755
--- a/tools/voice.pl
+++ b/tools/voice.pl
@@ -128,106 +128,12 @@ sub correct_string {
our $verbose;
my ($string, $language, $tts_object) = @_;
my $orig = $string;
- switch($language) {
- # General for all engines and languages
- $string =~ s/USB/U S B/g;
- $string =~ s/ID3/I D 3/g;
-
- case "english" {
- switch($$tts_object{"name"}) {
- case ["sapi","festival"] {
- $string =~ s/plugin(s?)/plug-in$1/ig; next
- }
- case "festival" {
- $string =~ s/\ba\b/ay/ig;
- $string =~ s/$/./;
- }
- }
- }
- case "deutsch" {
- # for all german engines (e.g. for english words)
- $string =~ s/alkaline/alkalein/ig;
- $string =~ s/byte(s?)/beit$1/ig;
- $string =~ s/clip(s?)/klipp$1/ig;
- $string =~ s/\bcover/kawwer/ig;
- $string =~ s/cuesheet/kjuschiet/ig;
- $string =~ s/dither/didder/ig;
- $string =~ s/equalizer/iquileiser/ig;
- $string =~ s/\bflash\b/fläsh/ig;
- $string =~ s/\bfirmware(s?)\b/firmwer$1/ig;
- $string =~ s/\bI D 3 tag\b/I D 3 täg/ig; # can't just use "tag" here
- $string =~ s/\bloudness\b/laudness/ig;
- $string =~ s/\bunicode\b/unikod/ig;
- switch($$tts_object{"name"}) {
- case "sapi" { # just for SAPI
- switch($$tts_object{"vendor"}) {
- case "AT&T Labs" {
- $string =~ s/alphabet/alfabet/ig;
- $string =~ s/ampere/amper/ig;
- $string =~ s/\bdezibel\b/de-zibell/ig;
- $string =~ s/diddering/didde-ring/ig;
- $string =~ s/energie\b/ener-gie/ig;
- $string =~ s/\Blauf\b/-lauf/ig;
- $string =~ s/\bnumerisch\b/numehrisch/ig;
- }
- }
- }
- }
- }
- case "svenska" {
- # for all swedish engines (e.g. for english words)
- $string =~ s/kilobyte/kilobajt/ig;
- $string =~ s/megabyte/megabajt/ig;
- $string =~ s/gigabyte/gigabajt/ig;
- $string =~ s/\bloudness\b/laudness/ig;
-
- switch($$tts_object{"name"}) {
- case "espeak" { # just for eSpeak
- $string =~ s/ampere/ampär/ig;
- $string =~ s/bokmärken/bok-märken/ig;
- $string =~ s/generella/schenerella/ig;
- $string =~ s/dithering/diddering/ig;
- $string =~ s/\bunicode\b/jynikod/ig;
- $string =~ s/uttoning/utoning/ig;
- $string =~ s/procent/pro-cent/ig;
- $string =~ s/spellistor/spelistor/ig;
- $string =~ s/cuesheet/qjyschiit/ig;
- }
- }
- }
- case "italiano" {
- # for all italian engines (e.g. for english words)
- $string =~ s/Replaygain/Ripleyghein/ig;
- $string =~ s/Crossfade/Crossfeid/ig;
- $string =~ s/beep/Bip/ig;
- $string =~ s/cuesheet/chiushit/ig;
- $string =~ s/fade/feid/ig;
- $string =~ s/Crossfeed/crossfid/ig;
- $string =~ s/Cache/chash/ig;
- $string =~ s/\bfirmware(s?)\b/firmuer$1/ig;
- $string =~ s/\bFile(s?)\b/fail$1/ig;
- $string =~ s/\bloudness\b/laudness/ig;
- $string =~ s/\bunicode\b/unikod/ig;
- $string =~ s/Playlist/pleylist/ig;
- $string =~ s/WavPack/wave pak/ig;
- $string =~ s/BITRATE/bit reit/ig;
- $string =~ s/Codepage/cod page/ig;
- $string =~ s/PCM Wave/pcm Ue'iv/ig;
- switch($$tts_object{"name"}) {
- case "sapi" { # just for SAPI
- switch($$tts_object{"vendor"}) {
- case "Loquendo" {
- $string =~ s/Inizializza/inizializa/ig;
- }
- case "ScanSoft, Inc" {
- $string =~ s/V/v/ig;
- $string =~ s/X/x/ig;
- $string =~ s/stop/stohp/ig;
- }
- }
- }
- }
- }
+ my $corrections = $tts_object->{"corrections"};
+
+ foreach (@$corrections) {
+ my $r = "s" . $_->{separator} . $_->{search} . $_->{separator}
+ . $_->{replace} . $_->{separator} . $_->{modifier};
+ eval ('$string =~' . "$r;");
}
if ($orig ne $string) {
printf("%s -> %s\n", $orig, $string) if $verbose;
@@ -331,6 +237,7 @@ sub generateclips {
my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_;
my $english = dirname($0) . '/../apps/lang/english.lang';
my $langfile = dirname($0) . '/../apps/lang/' . $language . '.lang';
+ my $correctionsfile = dirname($0) . '/voice-corrections.txt';
my $id = '';
my $voice = '';
my $cmd = "genlang -o -t=$target -e=$english $langfile 2>/dev/null";
@@ -340,6 +247,37 @@ sub generateclips {
local $| = 1; # make progress indicator work reliably
my $tts_object = init_tts($tts_engine, $tts_engine_opts, $language);
+ # add string corrections to tts_object.
+ my @corrects = ();
+ open(VOICEREGEXP, "<$correctionsfile") or die "Can't open corrections file!\n";
+ while(<VOICEREGEXP>) {
+ # get first character of line
+ my $line = $_;
+ my $separator = substr($_, 0, 1);
+ if($separator =~ m/\s+/) {
+ next;
+ }
+ chomp($line);
+ $line =~ s/^.//g; # remove separator at beginning
+ my ($lang, $engine, $vendor, $search, $replace, $modifier) = split(/$separator/, $line);
+
+ # does language match?
+ if($language !~ m/$lang/) {
+ next;
+ }
+ if($$tts_object{"name"} !~ m/$engine/) {
+ next;
+ }
+ my $v = $$tts_object{"vendor"} || ""; # vendor might be empty in $tts_object
+ if($v !~ m/$vendor/) {
+ next;
+ }
+ push @corrects, {separator => $separator, search => $search, replace => $replace, modifier => $modifier};
+
+ }
+ close(VOICEREGEXP);
+ $tts_object->{corrections} = [@corrects];
+
print("Generating voice clips");
print("\n") if $verbose;
for (`$cmd`) {