summaryrefslogtreecommitdiffstats
path: root/tools/rdf2binary.c
blob: 3b3afd59db50a40478b98d5b180721dbaf754e33 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
/***************************************************************************
 *             __________               __   ___.
 *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
 *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
 *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
 *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
 *                     \/            \/     \/    \/            \/
 * $Id$
 *
 * Copyright (C) 2005 Miika Pekkarinen
 *
 * All files in this archive are subject to the GNU General Public License.
 * See the file COPYING in the source tree root for full license agreement.
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 * KIND, either express or implied.
 *
 ****************************************************************************/

/*
This tool converts the rdf file to the binary data used in the dict plugin.
*/

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <stdio.h>

/* maximum word lenght, has to be the same in dict.c */
#define WORDLEN 32

/* struckt packing */
#ifdef __GNUC__
#define STRUCT_PACKED __attribute__((packed))
#else
#define STRUCT_PACKED
#pragma pack (push, 2)
#endif


struct word
{
    char word[WORDLEN];
    long offset;
} STRUCT_PACKED;

/* convert offsets here, not on device. */
long reverse (long N) {
    unsigned char B[4];
    B[0] = (N & 0x000000FF) >> 0;
    B[1] = (N & 0x0000FF00) >> 8;
    B[2] = (N & 0x00FF0000) >> 16;
    B[3] = (N & 0xFF000000) >> 24;
    return ((B[0] << 24) | (B[1] << 16) | (B[2] << 8) | (B[3] << 0));
}


int main()
{
    FILE *in, *idx_out, *desc_out;
    struct word w;
    char buf[10000];
    long cur_offset = 0;

    in = fopen("dict.preparsed", "r");
    idx_out = fopen("dict.index", "wb");
    desc_out = fopen("dict.desc", "wb");

    if (in == NULL || idx_out < 0 || desc_out < 0)
    {
        fprintf(stderr, "Error: Some files couldn't be opened\n");
        return 1;
    }

    while (fgets(buf, sizeof buf, in) != NULL)
    {
        /* It is safe to use strtok here */
        const char *word = strtok(buf, "\t");
        const char *desc = strtok(NULL, "\t");

        if (word == NULL || desc == NULL)
        {
            fprintf(stderr, "Parse error!\n");
            fprintf(stderr, "word: %s\ndesc: %s\n", word, desc);

            return 2;
        }

        /* We will null-terminate the words */
        strncpy(w.word, word, WORDLEN - 1);
        w.offset = reverse(cur_offset);
        fwrite(&w, sizeof(struct word), 1, idx_out);

        while (1)
        {
            int len = strlen(desc);
            cur_offset += len;
            fwrite(desc, len, 1, desc_out);

            desc = strtok(NULL, "\t");
            if (desc == NULL)
                break ;

            cur_offset++;
            fwrite("\n", 1, 1, desc_out);

        }
    }

    return 0;
}