diff options
author | Roman Artiukhin <bahusdrive@gmail.com> | 2024-12-11 14:31:38 +0200 |
---|---|---|
committer | Solomon Peachy <pizza@shaftnet.org> | 2024-12-15 09:26:26 -0500 |
commit | 004304dc65ca9dd0d4b56a5ffef67323205600ec (patch) | |
tree | b6409b327dc5aa149845628092b2fdd64fca5b13 | |
parent | f8fa1e7d5ad937e24c872d941042a4e0639e0f03 (diff) | |
download | rockbox-004304dc65.tar.gz rockbox-004304dc65.zip |
unicode: add iso_decode_ex with utf8 buffer size check
Make use of it in id3tags, playlist and cuesheet
Change-Id: Ibc8abc0faf16688bc9b826b7a712d1dfe9bf75b2
-rw-r--r-- | apps/cuesheet.c | 6 | ||||
-rw-r--r-- | apps/playlist.c | 9 | ||||
-rw-r--r-- | firmware/common/unicode.c | 42 | ||||
-rw-r--r-- | firmware/include/rbunicode.h | 2 | ||||
-rw-r--r-- | lib/rbcodec/metadata/id3tags.c | 32 |
5 files changed, 55 insertions, 36 deletions
diff --git a/apps/cuesheet.c b/apps/cuesheet.c index 227ef5902c..69b558fa71 100644 --- a/apps/cuesheet.c +++ b/apps/cuesheet.c @@ -314,7 +314,6 @@ bool parse_cuesheet(struct cuesheet_file *cue_file, struct cuesheet *cue) break; size_t count = MAX_NAME*3 + 1; - size_t count8859 = MAX_NAME; switch (option) { @@ -339,7 +338,6 @@ bool parse_cuesheet(struct cuesheet_file *cue_file, struct cuesheet *cue) dest = cue->file; count = MAX_PATH; - count8859 = MAX_PATH/3; break; case eCS_TRACK: /*Fall-Through*/ @@ -357,8 +355,8 @@ bool parse_cuesheet(struct cuesheet_file *cue_file, struct cuesheet *cue) { if (char_enc == CHAR_ENC_ISO_8859_1) { - dest = iso_decode(string, dest, -1, - MIN(strlen(string), count8859)); + dest = iso_decode_ex(string, dest, -1, + strlen(string), count - 1); *dest = '\0'; } else diff --git a/apps/playlist.c b/apps/playlist.c index fa4ec6bd68..465ebb88a0 100644 --- a/apps/playlist.c +++ b/apps/playlist.c @@ -350,14 +350,7 @@ static int convert_m3u_name(char* buf, int buf_len, int buf_max, char* temp) buf_len = i; dest = temp; - /* Convert char by char, so as to not overflow temp (iso_decode should - * preferably handle this). No more than 4 bytes should be generated for - * each input char. - */ - for (i = 0; i < buf_len && dest < (temp + buf_max - 4); i++) - { - dest = iso_decode(&buf[i], dest, -1, 1); - } + dest = iso_decode_ex(buf, dest, -1, buf_len, buf_max - 1); *dest = 0; strcpy(buf, temp); diff --git a/firmware/common/unicode.c b/firmware/common/unicode.c index 1e719c56eb..d51ced2ba8 100644 --- a/firmware/common/unicode.c +++ b/firmware/common/unicode.c @@ -245,8 +245,8 @@ static int alloc_and_load_cp_table(int cp, void *buf) return -1; } -/* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */ -unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8) +/* returns number of additional bytes required in encoded string (bytes_count - 1) */ +static int utf8_ucs_get_extra_bytes_count(unsigned long ucs) { int tail = 0; @@ -254,17 +254,41 @@ unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8) while (ucs >> (5*tail + 6)) tail++; + return tail; +} + +static unsigned char * utf8encode_internal(unsigned long ucs, unsigned char *utf8, int tail) +{ *utf8++ = (ucs >> (6*tail)) | utf8comp[tail]; while (tail--) *utf8++ = ((ucs >> (6*tail)) & (MASK ^ 0xFF)) | COMP; - return utf8; } +static unsigned char* utf8encode_ex(unsigned long ucs, unsigned char *utf8, int* utf8_size) +{ + const int tail = utf8_ucs_get_extra_bytes_count(ucs); + *utf8_size -= tail + 1; + return *utf8_size < 0 ? utf8 : utf8encode_internal(ucs, utf8, tail); +} + +/* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */ +unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8) +{ + return utf8encode_internal(ucs, utf8, utf8_ucs_get_extra_bytes_count(ucs)); +} + +unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, int cp, int count) +{ + return iso_decode_ex(iso, utf8, cp, count, -1); +} + /* Recode an iso encoded string to UTF-8 */ -unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, - int cp, int count) +unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int cp, int count, int utf8_size) { + if (utf8_size == -1) + utf8_size = INT_MAX; + uint16_t *table = NULL; cp_lock_enter(); @@ -322,11 +346,14 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, cp_lock_leave(); - while (count--) { + while (count-- && utf8_size > 0) { unsigned short ucs, tmp; if (*iso < 128 || cp == UTF_8) /* Already UTF-8 */ + { *utf8++ = *iso++; + --utf8_size; + } else { /* tid tells us which table to use and how */ @@ -375,7 +402,8 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, if (ucs == 0) /* unknown char, use replacement char */ ucs = 0xfffd; - utf8 = utf8encode(ucs, utf8); + + utf8 = utf8encode_ex(ucs, utf8, &utf8_size); } } diff --git a/firmware/include/rbunicode.h b/firmware/include/rbunicode.h index 6dae7f169f..02183ed560 100644 --- a/firmware/include/rbunicode.h +++ b/firmware/include/rbunicode.h @@ -57,6 +57,8 @@ enum codepages { /* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */ unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8); unsigned char* iso_decode(const unsigned char *latin1, unsigned char *utf8, int cp, int count); +unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int cp, int count, int utf8_size); + unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8, int count); unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8, int count); bool utf16_has_bom(const unsigned char *utf16, bool *le); diff --git a/lib/rbcodec/metadata/id3tags.c b/lib/rbcodec/metadata/id3tags.c index 26614608e9..b138d752d3 100644 --- a/lib/rbcodec/metadata/id3tags.c +++ b/lib/rbcodec/metadata/id3tags.c @@ -568,22 +568,16 @@ static bool parse_as_utf8(char* string, int *len) /* Must be called after parse_as_utf8. Checks to see if the passed in string is a 16-bit wide Unicode v2 string. If it is, we convert it to a UTF-8 string. If it's not unicode, - we convert from the default codepage */ -static void unicode_munge(char* string, char* utf8buf, int *len) { + we convert from the default codepage + NOTE: real UTF-8 buffer size is expected to be utf8buf_size + 1 (additional byte for string terminator) */ +static void unicode_munge(unsigned char* string, unsigned char* utf8buf, int *len, int utf8buf_size) { + unsigned char *str = string; + unsigned char* utf8 = utf8buf; + int i = 0; - unsigned char *str = (unsigned char *)string; int templen = 0; - unsigned char* utf8 = (unsigned char *)utf8buf; switch (str[0]) { - case 0x00: /* Type 0x00 is ordinary ISO 8859-1 */ - str++; - (*len)--; - utf8 = iso_decode(str, utf8, -1, *len); - *utf8 = 0; - *len = (intptr_t)utf8 - (intptr_t)utf8buf; - break; - case 0x01: /* Unicode with or without BOM */ case 0x02: (*len)--; @@ -618,10 +612,15 @@ static void unicode_munge(char* string, char* utf8buf, int *len) { *len = templen - 1; break; /* case 0x03: UTF-8 encoded string handled by parse_as_utf8 */ + + case 0x00: /* Type 0x00 is ordinary ISO 8859-1 */ + str++; + (*len)--; + //fallthrough default: /* Plain old string */ - utf8 = iso_decode(str, utf8, -1, *len); + utf8 = iso_decode_ex(str, utf8, -1, *len, utf8buf_size); *utf8 = 0; - *len = (intptr_t)utf8 - (intptr_t)utf8buf; + *len = utf8 - utf8buf; break; } } @@ -1103,10 +1102,9 @@ retry_with_limit: { //limit stack allocation to avoid stack overflow utf8_size = ID3V2_BUF_SIZE; - bytesread = ID3V2_BUF_SIZE/3; } - char utf8buf[utf8_size + 1]; - unicode_munge( tag, utf8buf, &bytesread); + unsigned char utf8buf[utf8_size + 1]; + unicode_munge( (unsigned char *)tag, utf8buf, &bytesread, utf8_size); if(bytesread >= buffersize - bufferpos) bytesread = buffersize - bufferpos - 1; |