diff options
author | Roman Artiukhin <bahusdrive@gmail.com> | 2024-12-14 12:54:00 +0200 |
---|---|---|
committer | Solomon Peachy <pizza@shaftnet.org> | 2024-12-15 15:44:57 -0500 |
commit | 1f548f74e698528109fb4cf542a65b4baf21c8cf (patch) | |
tree | fe895b204f821d18d2578dc7d027de24426f66b4 | |
parent | e334a1f95e8149adefbcf1aeccd0be2649106c6a (diff) | |
download | rockbox-1f548f74e6.tar.gz rockbox-1f548f74e6.zip |
unicode: add utf16decode with utf8 buffer size check
Make use of it in id3tags and cuesheet
Change-Id: I153c23f1f7312e9d5e1de9f03725f2d2ab0abc93
-rw-r--r-- | apps/cuesheet.c | 4 | ||||
-rw-r--r-- | firmware/common/unicode.c | 49 | ||||
-rw-r--r-- | firmware/include/rbunicode.h | 1 | ||||
-rw-r--r-- | lib/rbcodec/metadata/id3tags.c | 22 |
4 files changed, 34 insertions, 42 deletions
diff --git a/apps/cuesheet.c b/apps/cuesheet.c index 69b558fa71..7d5e608ada 100644 --- a/apps/cuesheet.c +++ b/apps/cuesheet.c @@ -265,7 +265,7 @@ bool parse_cuesheet(struct cuesheet_file *cue_file, struct cuesheet *cue) { if (char_enc == CHAR_ENC_UTF_16_LE) { - s = utf16LEdecode(line, utf16_buf, line_len); + s = utf16decode(line, utf16_buf, line_len>>1, sizeof(utf16_buf) - 1, true); /* terminate the string at the newline */ *s = '\0'; strcpy(line, utf16_buf); @@ -275,7 +275,7 @@ bool parse_cuesheet(struct cuesheet_file *cue_file, struct cuesheet *cue) } else if (char_enc == CHAR_ENC_UTF_16_BE) { - s = utf16BEdecode(line, utf16_buf, line_len); + s = utf16decode(line, utf16_buf, line_len>>1, sizeof(utf16_buf) - 1, false); *s = '\0'; strcpy(line, utf16_buf); } diff --git a/firmware/common/unicode.c b/firmware/common/unicode.c index d51ced2ba8..b740967227 100644 --- a/firmware/common/unicode.c +++ b/firmware/common/unicode.c @@ -265,7 +265,7 @@ static unsigned char * utf8encode_internal(unsigned long ucs, unsigned char *utf return utf8; } -static unsigned char* utf8encode_ex(unsigned long ucs, unsigned char *utf8, int* utf8_size) +FORCE_INLINE static unsigned char* utf8encode_ex(unsigned long ucs, unsigned char *utf8, int* utf8_size) { const int tail = utf8_ucs_get_extra_bytes_count(ucs); *utf8_size -= tail + 1; @@ -420,49 +420,46 @@ unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int return utf8; } -/* Recode a UTF-16 string with little-endian byte ordering to UTF-8 */ -unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8, - int count) +unsigned char* utf16decode(const unsigned char *utf16, unsigned char *utf8, + int count, int utf8_size, bool le) { + if (utf8_size == -1) + utf8_size = INT_MAX; + + // little-endian flag is used as significant byte index + if (le) + le = 1; + unsigned long ucs; - while (count > 0) { + while (count > 0 && utf8_size > 0) { /* Check for a surrogate pair */ - if (utf16[1] >= 0xD8 && utf16[1] < 0xE0) { - ucs = 0x10000 + ((utf16[0] << 10) | ((utf16[1] - 0xD8) << 18) - | utf16[2] | ((utf16[3] - 0xDC) << 8)); + if (*(utf16 + le) >= 0xD8 && *(utf16 + le) < 0xE0) { + ucs = 0x10000 + ((utf16[1 - le] << 10) | ((utf16[le] - 0xD8) << 18) + | utf16[2 + (1 - le)] | ((utf16[2 + le] - 0xDC) << 8)); utf16 += 4; count -= 2; } else { - ucs = getle16(utf16); + ucs = utf16[le] << 8 | utf16[1 - le]; utf16 += 2; count -= 1; } - utf8 = utf8encode(ucs, utf8); + utf8 = utf8encode_ex(ucs, utf8, &utf8_size); } return utf8; } /* Recode a UTF-16 string with big-endian byte ordering to UTF-8 */ -unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8, +unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8, int count) { - unsigned long ucs; + return utf16decode(utf16, utf8, count, -1, true); +} - while (count > 0) { - if (*utf16 >= 0xD8 && *utf16 < 0xE0) { /* Check for a surrogate pair */ - ucs = 0x10000 + (((utf16[0] - 0xD8) << 18) | (utf16[1] << 10) - | ((utf16[2] - 0xDC) << 8) | utf16[3]); - utf16 += 4; - count -= 2; - } else { - ucs = getbe16(utf16); - utf16 += 2; - count -= 1; - } - utf8 = utf8encode(ucs, utf8); - } - return utf8; +unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8, + int count) +{ + return utf16decode(utf16, utf8, count, -1, false); } bool utf16_has_bom(const unsigned char *utf16, bool *le) diff --git a/firmware/include/rbunicode.h b/firmware/include/rbunicode.h index 02183ed560..e4cd6be2fe 100644 --- a/firmware/include/rbunicode.h +++ b/firmware/include/rbunicode.h @@ -61,6 +61,7 @@ unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8, int count); unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8, int count); +unsigned char* utf16decode(const unsigned char *utf16, unsigned char *utf8, int count, int utf8_size, bool le); bool utf16_has_bom(const unsigned char *utf16, bool *le); unsigned long utf8length(const unsigned char *utf8); const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs); diff --git a/lib/rbcodec/metadata/id3tags.c b/lib/rbcodec/metadata/id3tags.c index d3743b5d3b..b3e76fac14 100644 --- a/lib/rbcodec/metadata/id3tags.c +++ b/lib/rbcodec/metadata/id3tags.c @@ -574,17 +574,13 @@ static void unicode_munge(unsigned char* string, unsigned char* utf8buf, int *le unsigned char *str = string; unsigned char* utf8 = utf8buf; - int i = 0; - int templen = 0; - switch (str[0]) { case 0x01: /* Unicode with or without BOM */ case 0x02: (*len)--; str++; bool le; - - + int i = 0; /* Handle frames with more than one string (needed for TXXX frames).*/ do { @@ -593,24 +589,22 @@ static void unicode_munge(unsigned char* string, unsigned char* utf8buf, int *le str += BOM_UTF_16_SIZE; *len -= BOM_UTF_16_SIZE; } + string = str; while ((i < *len) && (str[0] || str[1])) { - if(le) - utf8 = utf16LEdecode(str, utf8, 1); - else - utf8 = utf16BEdecode(str, utf8, 1); - str+=2; i += 2; } + utf8 = utf16decode(string, utf8, (str-string)>>1 /*(str-string)/2*/, utf8buf_size, le); *utf8++ = 0; /* Terminate the string */ - templen += (strlen(&utf8buf[templen]) + 1); + utf8buf_size -= utf8 - utf8buf; str += 2; - i+=2; - } while(i < *len); - *len = templen - 1; + i += 2; + } while(i < *len && utf8buf_size > 0); + *len = utf8 - utf8buf - 1; break; + /* case 0x03: UTF-8 encoded string handled by parse_as_utf8 */ case 0x00: /* Type 0x00 is ordinary ISO 8859-1 */ |