summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoman Artiukhin <bahusdrive@gmail.com>2024-12-14 12:54:00 +0200
committerSolomon Peachy <pizza@shaftnet.org>2024-12-15 15:44:57 -0500
commit1f548f74e698528109fb4cf542a65b4baf21c8cf (patch)
treefe895b204f821d18d2578dc7d027de24426f66b4
parente334a1f95e8149adefbcf1aeccd0be2649106c6a (diff)
downloadrockbox-1f548f74e6.tar.gz
rockbox-1f548f74e6.zip
unicode: add utf16decode with utf8 buffer size check
Make use of it in id3tags and cuesheet Change-Id: I153c23f1f7312e9d5e1de9f03725f2d2ab0abc93
-rw-r--r--apps/cuesheet.c4
-rw-r--r--firmware/common/unicode.c49
-rw-r--r--firmware/include/rbunicode.h1
-rw-r--r--lib/rbcodec/metadata/id3tags.c22
4 files changed, 34 insertions, 42 deletions
diff --git a/apps/cuesheet.c b/apps/cuesheet.c
index 69b558fa71..7d5e608ada 100644
--- a/apps/cuesheet.c
+++ b/apps/cuesheet.c
@@ -265,7 +265,7 @@ bool parse_cuesheet(struct cuesheet_file *cue_file, struct cuesheet *cue)
{
if (char_enc == CHAR_ENC_UTF_16_LE)
{
- s = utf16LEdecode(line, utf16_buf, line_len);
+ s = utf16decode(line, utf16_buf, line_len>>1, sizeof(utf16_buf) - 1, true);
/* terminate the string at the newline */
*s = '\0';
strcpy(line, utf16_buf);
@@ -275,7 +275,7 @@ bool parse_cuesheet(struct cuesheet_file *cue_file, struct cuesheet *cue)
}
else if (char_enc == CHAR_ENC_UTF_16_BE)
{
- s = utf16BEdecode(line, utf16_buf, line_len);
+ s = utf16decode(line, utf16_buf, line_len>>1, sizeof(utf16_buf) - 1, false);
*s = '\0';
strcpy(line, utf16_buf);
}
diff --git a/firmware/common/unicode.c b/firmware/common/unicode.c
index d51ced2ba8..b740967227 100644
--- a/firmware/common/unicode.c
+++ b/firmware/common/unicode.c
@@ -265,7 +265,7 @@ static unsigned char * utf8encode_internal(unsigned long ucs, unsigned char *utf
return utf8;
}
-static unsigned char* utf8encode_ex(unsigned long ucs, unsigned char *utf8, int* utf8_size)
+FORCE_INLINE static unsigned char* utf8encode_ex(unsigned long ucs, unsigned char *utf8, int* utf8_size)
{
const int tail = utf8_ucs_get_extra_bytes_count(ucs);
*utf8_size -= tail + 1;
@@ -420,49 +420,46 @@ unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int
return utf8;
}
-/* Recode a UTF-16 string with little-endian byte ordering to UTF-8 */
-unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8,
- int count)
+unsigned char* utf16decode(const unsigned char *utf16, unsigned char *utf8,
+ int count, int utf8_size, bool le)
{
+ if (utf8_size == -1)
+ utf8_size = INT_MAX;
+
+ // little-endian flag is used as significant byte index
+ if (le)
+ le = 1;
+
unsigned long ucs;
- while (count > 0) {
+ while (count > 0 && utf8_size > 0) {
/* Check for a surrogate pair */
- if (utf16[1] >= 0xD8 && utf16[1] < 0xE0) {
- ucs = 0x10000 + ((utf16[0] << 10) | ((utf16[1] - 0xD8) << 18)
- | utf16[2] | ((utf16[3] - 0xDC) << 8));
+ if (*(utf16 + le) >= 0xD8 && *(utf16 + le) < 0xE0) {
+ ucs = 0x10000 + ((utf16[1 - le] << 10) | ((utf16[le] - 0xD8) << 18)
+ | utf16[2 + (1 - le)] | ((utf16[2 + le] - 0xDC) << 8));
utf16 += 4;
count -= 2;
} else {
- ucs = getle16(utf16);
+ ucs = utf16[le] << 8 | utf16[1 - le];
utf16 += 2;
count -= 1;
}
- utf8 = utf8encode(ucs, utf8);
+ utf8 = utf8encode_ex(ucs, utf8, &utf8_size);
}
return utf8;
}
/* Recode a UTF-16 string with big-endian byte ordering to UTF-8 */
-unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8,
+unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8,
int count)
{
- unsigned long ucs;
+ return utf16decode(utf16, utf8, count, -1, true);
+}
- while (count > 0) {
- if (*utf16 >= 0xD8 && *utf16 < 0xE0) { /* Check for a surrogate pair */
- ucs = 0x10000 + (((utf16[0] - 0xD8) << 18) | (utf16[1] << 10)
- | ((utf16[2] - 0xDC) << 8) | utf16[3]);
- utf16 += 4;
- count -= 2;
- } else {
- ucs = getbe16(utf16);
- utf16 += 2;
- count -= 1;
- }
- utf8 = utf8encode(ucs, utf8);
- }
- return utf8;
+unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8,
+ int count)
+{
+ return utf16decode(utf16, utf8, count, -1, false);
}
bool utf16_has_bom(const unsigned char *utf16, bool *le)
diff --git a/firmware/include/rbunicode.h b/firmware/include/rbunicode.h
index 02183ed560..e4cd6be2fe 100644
--- a/firmware/include/rbunicode.h
+++ b/firmware/include/rbunicode.h
@@ -61,6 +61,7 @@ unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int
unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8, int count);
unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8, int count);
+unsigned char* utf16decode(const unsigned char *utf16, unsigned char *utf8, int count, int utf8_size, bool le);
bool utf16_has_bom(const unsigned char *utf16, bool *le);
unsigned long utf8length(const unsigned char *utf8);
const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs);
diff --git a/lib/rbcodec/metadata/id3tags.c b/lib/rbcodec/metadata/id3tags.c
index d3743b5d3b..b3e76fac14 100644
--- a/lib/rbcodec/metadata/id3tags.c
+++ b/lib/rbcodec/metadata/id3tags.c
@@ -574,17 +574,13 @@ static void unicode_munge(unsigned char* string, unsigned char* utf8buf, int *le
unsigned char *str = string;
unsigned char* utf8 = utf8buf;
- int i = 0;
- int templen = 0;
-
switch (str[0]) {
case 0x01: /* Unicode with or without BOM */
case 0x02:
(*len)--;
str++;
bool le;
-
-
+ int i = 0;
/* Handle frames with more than one string
(needed for TXXX frames).*/
do {
@@ -593,24 +589,22 @@ static void unicode_munge(unsigned char* string, unsigned char* utf8buf, int *le
str += BOM_UTF_16_SIZE;
*len -= BOM_UTF_16_SIZE;
}
+ string = str;
while ((i < *len) && (str[0] || str[1])) {
- if(le)
- utf8 = utf16LEdecode(str, utf8, 1);
- else
- utf8 = utf16BEdecode(str, utf8, 1);
-
str+=2;
i += 2;
}
+ utf8 = utf16decode(string, utf8, (str-string)>>1 /*(str-string)/2*/, utf8buf_size, le);
*utf8++ = 0; /* Terminate the string */
- templen += (strlen(&utf8buf[templen]) + 1);
+ utf8buf_size -= utf8 - utf8buf;
str += 2;
- i+=2;
- } while(i < *len);
- *len = templen - 1;
+ i += 2;
+ } while(i < *len && utf8buf_size > 0);
+ *len = utf8 - utf8buf - 1;
break;
+
/* case 0x03: UTF-8 encoded string handled by parse_as_utf8 */
case 0x00: /* Type 0x00 is ordinary ISO 8859-1 */