summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorTomas Salfischberger <tomas@rockbox.org>2005-05-02 15:05:07 +0000
committerTomas Salfischberger <tomas@rockbox.org>2005-05-02 15:05:07 +0000
commit52abc68b11694d2360e119543b876cf3c5768fbe (patch)
tree937b8bdc68faccf815efdccf1192c2dd92738932 /tools
parenta810a67db7c923b01c4135761ef21ab866db256d (diff)
downloadrockbox-52abc68b11694d2360e119543b876cf3c5768fbe.tar.gz
rockbox-52abc68b11694d2360e119543b876cf3c5768fbe.zip
Dictionary conversion tools.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6395 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'tools')
-rw-r--r--tools/FILES1
-rw-r--r--tools/Makefile5
-rw-r--r--tools/rdf2binary.c89
-rw-r--r--tools/wn2rdf.pl122
4 files changed, 216 insertions, 1 deletions
diff --git a/tools/FILES b/tools/FILES
index 2e2d232160..14cdeddd41 100644
--- a/tools/FILES
+++ b/tools/FILES
@@ -10,6 +10,7 @@ rockbox-style.el
sample.emacs
buildzip.pl
romsizetest.pl
+wn2rdf.pl
make.inc
makesrc.inc
fwpatcher/*.[ch]
diff --git a/tools/Makefile b/tools/Makefile
index b98c269642..d8b1545015 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -9,7 +9,7 @@
CFLAGS := -O -ansi -g
LDFLAGS := -g
-TARGETS := scramble descramble sh2d bmp2rb convbdf generate_rocklatin mkboot
+TARGETS := scramble descramble sh2d bmp2rb rdf2binary convbdf generate_rocklatin mkboot
all: $(TARGETS)
@echo "tools done"
@@ -26,6 +26,9 @@ sh2d: sh2d.c
bmp2rb: bmp2rb.c
$(CC) -DAPPLICATION_NAME=\"$@\" -g $+ -o $@
+rdf2binary: rdf2binary.c
+ $(CC) -g $+ -o $@
+
mkboot: mkboot.c
$(CC) -g $+ -o $@
diff --git a/tools/rdf2binary.c b/tools/rdf2binary.c
new file mode 100644
index 0000000000..3597efa727
--- /dev/null
+++ b/tools/rdf2binary.c
@@ -0,0 +1,89 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id$
+ *
+ * Copyright (C) 2005 Miika Pekkarinen
+ *
+ * All files in this archive are subject to the GNU General Public License.
+ * See the file COPYING in the source tree root for full license agreement.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+/*
+This tool converts the rdf file to the binary data used in the dict plugin.
+*/
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdio.h>
+
+/* maximum word lenght, has to be the same in dict.c */
+#define WORDLEN 32
+
+struct word {
+ char word[WORDLEN];
+ long offset;
+};
+
+int main()
+{
+ FILE *in;
+ int idx_out, desc_out;
+ struct word w;
+ char buf[10000];
+ long cur_offset = 0;
+
+ in = fopen("dict.preparsed", "r");
+ idx_out = open("dict.index", O_WRONLY | O_CREAT);
+ desc_out = open("dict.desc", O_WRONLY | O_CREAT);
+
+ if (in == NULL || idx_out < 0 || desc_out < 0) {
+ fprintf(stderr, "Error: Some files couldn't be opened\n");
+ return 1;
+ }
+
+ while (fgets(buf, sizeof buf, in) != NULL) {
+ /* It is safe to use strtok here */
+ const char *word = strtok(buf, "\t");
+ const char *desc = strtok(NULL, "\t");
+
+ if (word == NULL || desc == NULL) {
+ fprintf(stderr, "Parse error!\n");
+ fprintf(stderr, "word: %s\ndesc: %s\n", word, desc);
+
+ return 2;
+ }
+
+ /* We will null-terminate the words */
+ strncpy(w.word, word, WORDLEN - 1);
+ w.offset = cur_offset;
+ write(idx_out, &w, sizeof(struct word));
+
+ while (1) {
+ int len = strlen(desc);
+ cur_offset += len;
+ write(desc_out, desc, len);
+
+ desc = strtok(NULL, "\t");
+ if (desc == NULL)
+ break ;
+
+ cur_offset++;
+ write(desc_out, "\n", 1);
+
+ }
+ }
+
+ return 0;
+}
+
diff --git a/tools/wn2rdf.pl b/tools/wn2rdf.pl
new file mode 100644
index 0000000000..2fff87d66b
--- /dev/null
+++ b/tools/wn2rdf.pl
@@ -0,0 +1,122 @@
+#! /usr/bin/perl -w
+
+# Wordnet dictionary database converter
+#
+# Converts the Wordnet prolog data to rockbox dictionary format.
+#
+# Written by Miika Pekkarinen <slasher@ihme.org>
+#
+# $Id$
+
+use strict;
+
+# Lookup tables
+my %words;
+my %descriptions;
+
+sub getcatname {
+ my ($id) = @_;
+
+ return 'N' if $id == 1;
+ return 'V' if $id == 2;
+ return 'A' if $id == 3;
+ return 'A' if $id == 4;
+ return '?';
+}
+
+open IN_WORD, "wn_s.pl" or die "Open fail(#1): $!";
+open IN_DESC, "wn_g.pl" or die "Open fail(#2): $!";
+open OUTPUT, "> dict.preparsed" or die "Open fail(#3): $!";
+
+print "Reading word file...\n";
+
+# Read everything into memory
+while (<IN_WORD>) {
+ chomp ;
+
+ # s(100001740,1,'entity',n,1,11). => 100001740,1,'entity',n,1,11
+ s/(^s\()(.*)(\)\.$)/$2/;
+
+ my ($seqid, $n1, $word, $n2, $n3, $n4) = split /,/, $_, 6;
+
+ # 'entity' => entity
+ $word =~ s/(^\')(.*)(\'$)/$2/;
+ $word =~ s/\'\'/\'/s;
+
+ my $category = substr $seqid, 0, 1;
+
+ $words{lc $word}{$seqid} = $category;
+}
+
+close IN_WORD;
+
+print "Reading description file...\n";
+while (<IN_DESC>) {
+ chomp ;
+
+ # g(100002056,'(a separate and self-contained entity)').
+ # => 100002056,'(a separate and self-contained entity)'
+ s/(^g\()(.*)(\)\.$)/$2/;
+
+ my ($seqid, $desc) = split /,/, $_, 2;
+
+ $desc =~ s/(^\'\()(.*)(\)\'$)/$2/;
+ $desc =~ s/\'\'/\'/s;
+
+ $descriptions{$seqid} = $desc;
+}
+
+close IN_DESC;
+
+print "Sorting and writing output...\n";
+
+# Now sort and find correct descriptions
+foreach my $word (sort keys %words) {
+ my %categories;
+
+ # Find all definitions of the word
+ foreach my $id (keys %{$words{$word}}) {
+ my $catid = $words{$word}{$id};
+ my $description = $descriptions{$id};
+
+ if (!defined($description) or $description eq '') {
+ print "Error: Failed to link word: $word / ",
+ $words{$word}, "\n";
+ exit 1;
+ }
+
+ push @{$categories{$catid}}, $description;
+ }
+
+ my $finaldesc;
+
+ # 1 = noun
+ # 2 = verb
+ # 3 = adjective
+ # 4 = adverb
+ for my $catid (1 .. 4) {
+ my $n = 1;
+ my $catdesc;
+
+ next unless $categories{$catid};
+ foreach my $desc ( @{$categories{$catid}} ) {
+ $catdesc .= " " if $catdesc;
+ $catdesc .= "$n. $desc";
+ $n++;
+ }
+
+ next unless $catdesc;
+ $finaldesc .= "\t" if $finaldesc;
+ $finaldesc .= getcatname($catid) . ": $catdesc"
+ }
+
+ die "Internal error" unless $finaldesc;
+
+ print OUTPUT "$word\t$finaldesc\n";
+}
+
+close OUTPUT;
+
+print "Done, output was successfully written!\n";
+
+