#! /usr/bin/perl -w # Wordnet dictionary database converter # # Converts the Wordnet prolog data to rockbox dictionary format. # # Written by Miika Pekkarinen # # $Id$ use strict; # Lookup tables my %words; my %descriptions; sub getcatname { my ($id) = @_; return 'N' if $id == 1; return 'V' if $id == 2; return 'A' if $id == 3; return 'A' if $id == 4; return '?'; } open IN_WORD, "wn_s.pl" or die "Open fail(#1): $!"; open IN_DESC, "wn_g.pl" or die "Open fail(#2): $!"; open OUTPUT, "> dict.preparsed" or die "Open fail(#3): $!"; print "Reading word file...\n"; # Read everything into memory while () { chomp ; # s(100001740,1,'entity',n,1,11). => 100001740,1,'entity',n,1,11 s/(^s\()(.*)(\)\.$)/$2/; my ($seqid, $n1, $word, $n2, $n3, $n4) = split /,/, $_, 6; # 'entity' => entity $word =~ s/(^\')(.*)(\'$)/$2/; $word =~ s/\'\'/\'/s; my $category = substr $seqid, 0, 1; $words{lc $word}{$seqid} = $category; } close IN_WORD; print "Reading description file...\n"; while () { chomp ; # g(100002056,'(a separate and self-contained entity)'). # => 100002056,'(a separate and self-contained entity)' s/(^g\()(.*)(\)\.$)/$2/; my ($seqid, $desc) = split /,/, $_, 2; $desc =~ s/(^\'\()(.*)(\)\'$)/$2/; $desc =~ s/\'\'/\'/s; $descriptions{$seqid} = $desc; } close IN_DESC; print "Sorting and writing output...\n"; # Now sort and find correct descriptions foreach my $word (sort keys %words) { my %categories; # Find all definitions of the word foreach my $id (keys %{$words{$word}}) { my $catid = $words{$word}{$id}; my $description = $descriptions{$id}; if (!defined($description) or $description eq '') { print "Error: Failed to link word: $word / ", $words{$word}, "\n"; exit 1; } push @{$categories{$catid}}, $description; } my $finaldesc; # 1 = noun # 2 = verb # 3 = adjective # 4 = adverb for my $catid (1 .. 4) { my $n = 1; my $catdesc; next unless $categories{$catid}; foreach my $desc ( @{$categories{$catid}} ) { $catdesc .= " " if $catdesc; $catdesc .= "$n. $desc"; $n++; } next unless $catdesc; $finaldesc .= "\t" if $finaldesc; $finaldesc .= getcatname($catid) . ": $catdesc" } die "Internal error" unless $finaldesc; print OUTPUT "$word\t$finaldesc\n"; } close OUTPUT; print "Done, output was successfully written!\n";