summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoman Artiukhin <bahusdrive@gmail.com>2024-12-11 14:31:38 +0200
committerSolomon Peachy <pizza@shaftnet.org>2024-12-15 09:26:26 -0500
commit004304dc65ca9dd0d4b56a5ffef67323205600ec (patch)
treeb6409b327dc5aa149845628092b2fdd64fca5b13
parentf8fa1e7d5ad937e24c872d941042a4e0639e0f03 (diff)
downloadrockbox-004304dc65.tar.gz
rockbox-004304dc65.zip
unicode: add iso_decode_ex with utf8 buffer size check
Make use of it in id3tags, playlist and cuesheet Change-Id: Ibc8abc0faf16688bc9b826b7a712d1dfe9bf75b2
-rw-r--r--apps/cuesheet.c6
-rw-r--r--apps/playlist.c9
-rw-r--r--firmware/common/unicode.c42
-rw-r--r--firmware/include/rbunicode.h2
-rw-r--r--lib/rbcodec/metadata/id3tags.c32
5 files changed, 55 insertions, 36 deletions
diff --git a/apps/cuesheet.c b/apps/cuesheet.c
index 227ef5902c..69b558fa71 100644
--- a/apps/cuesheet.c
+++ b/apps/cuesheet.c
@@ -314,7 +314,6 @@ bool parse_cuesheet(struct cuesheet_file *cue_file, struct cuesheet *cue)
break;
size_t count = MAX_NAME*3 + 1;
- size_t count8859 = MAX_NAME;
switch (option)
{
@@ -339,7 +338,6 @@ bool parse_cuesheet(struct cuesheet_file *cue_file, struct cuesheet *cue)
dest = cue->file;
count = MAX_PATH;
- count8859 = MAX_PATH/3;
break;
case eCS_TRACK:
/*Fall-Through*/
@@ -357,8 +355,8 @@ bool parse_cuesheet(struct cuesheet_file *cue_file, struct cuesheet *cue)
{
if (char_enc == CHAR_ENC_ISO_8859_1)
{
- dest = iso_decode(string, dest, -1,
- MIN(strlen(string), count8859));
+ dest = iso_decode_ex(string, dest, -1,
+ strlen(string), count - 1);
*dest = '\0';
}
else
diff --git a/apps/playlist.c b/apps/playlist.c
index fa4ec6bd68..465ebb88a0 100644
--- a/apps/playlist.c
+++ b/apps/playlist.c
@@ -350,14 +350,7 @@ static int convert_m3u_name(char* buf, int buf_len, int buf_max, char* temp)
buf_len = i;
dest = temp;
- /* Convert char by char, so as to not overflow temp (iso_decode should
- * preferably handle this). No more than 4 bytes should be generated for
- * each input char.
- */
- for (i = 0; i < buf_len && dest < (temp + buf_max - 4); i++)
- {
- dest = iso_decode(&buf[i], dest, -1, 1);
- }
+ dest = iso_decode_ex(buf, dest, -1, buf_len, buf_max - 1);
*dest = 0;
strcpy(buf, temp);
diff --git a/firmware/common/unicode.c b/firmware/common/unicode.c
index 1e719c56eb..d51ced2ba8 100644
--- a/firmware/common/unicode.c
+++ b/firmware/common/unicode.c
@@ -245,8 +245,8 @@ static int alloc_and_load_cp_table(int cp, void *buf)
return -1;
}
-/* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */
-unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8)
+/* returns number of additional bytes required in encoded string (bytes_count - 1) */
+static int utf8_ucs_get_extra_bytes_count(unsigned long ucs)
{
int tail = 0;
@@ -254,17 +254,41 @@ unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8)
while (ucs >> (5*tail + 6))
tail++;
+ return tail;
+}
+
+static unsigned char * utf8encode_internal(unsigned long ucs, unsigned char *utf8, int tail)
+{
*utf8++ = (ucs >> (6*tail)) | utf8comp[tail];
while (tail--)
*utf8++ = ((ucs >> (6*tail)) & (MASK ^ 0xFF)) | COMP;
-
return utf8;
}
+static unsigned char* utf8encode_ex(unsigned long ucs, unsigned char *utf8, int* utf8_size)
+{
+ const int tail = utf8_ucs_get_extra_bytes_count(ucs);
+ *utf8_size -= tail + 1;
+ return *utf8_size < 0 ? utf8 : utf8encode_internal(ucs, utf8, tail);
+}
+
+/* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */
+unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8)
+{
+ return utf8encode_internal(ucs, utf8, utf8_ucs_get_extra_bytes_count(ucs));
+}
+
+unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, int cp, int count)
+{
+ return iso_decode_ex(iso, utf8, cp, count, -1);
+}
+
/* Recode an iso encoded string to UTF-8 */
-unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8,
- int cp, int count)
+unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int cp, int count, int utf8_size)
{
+ if (utf8_size == -1)
+ utf8_size = INT_MAX;
+
uint16_t *table = NULL;
cp_lock_enter();
@@ -322,11 +346,14 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8,
cp_lock_leave();
- while (count--) {
+ while (count-- && utf8_size > 0) {
unsigned short ucs, tmp;
if (*iso < 128 || cp == UTF_8) /* Already UTF-8 */
+ {
*utf8++ = *iso++;
+ --utf8_size;
+ }
else {
/* tid tells us which table to use and how */
@@ -375,7 +402,8 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8,
if (ucs == 0) /* unknown char, use replacement char */
ucs = 0xfffd;
- utf8 = utf8encode(ucs, utf8);
+
+ utf8 = utf8encode_ex(ucs, utf8, &utf8_size);
}
}
diff --git a/firmware/include/rbunicode.h b/firmware/include/rbunicode.h
index 6dae7f169f..02183ed560 100644
--- a/firmware/include/rbunicode.h
+++ b/firmware/include/rbunicode.h
@@ -57,6 +57,8 @@ enum codepages {
/* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */
unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8);
unsigned char* iso_decode(const unsigned char *latin1, unsigned char *utf8, int cp, int count);
+unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int cp, int count, int utf8_size);
+
unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8, int count);
unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8, int count);
bool utf16_has_bom(const unsigned char *utf16, bool *le);
diff --git a/lib/rbcodec/metadata/id3tags.c b/lib/rbcodec/metadata/id3tags.c
index 26614608e9..b138d752d3 100644
--- a/lib/rbcodec/metadata/id3tags.c
+++ b/lib/rbcodec/metadata/id3tags.c
@@ -568,22 +568,16 @@ static bool parse_as_utf8(char* string, int *len)
/* Must be called after parse_as_utf8. Checks to see if the passed in string is a 16-bit wide Unicode v2
string. If it is, we convert it to a UTF-8 string. If it's not unicode,
- we convert from the default codepage */
-static void unicode_munge(char* string, char* utf8buf, int *len) {
+ we convert from the default codepage
+ NOTE: real UTF-8 buffer size is expected to be utf8buf_size + 1 (additional byte for string terminator) */
+static void unicode_munge(unsigned char* string, unsigned char* utf8buf, int *len, int utf8buf_size) {
+ unsigned char *str = string;
+ unsigned char* utf8 = utf8buf;
+
int i = 0;
- unsigned char *str = (unsigned char *)string;
int templen = 0;
- unsigned char* utf8 = (unsigned char *)utf8buf;
switch (str[0]) {
- case 0x00: /* Type 0x00 is ordinary ISO 8859-1 */
- str++;
- (*len)--;
- utf8 = iso_decode(str, utf8, -1, *len);
- *utf8 = 0;
- *len = (intptr_t)utf8 - (intptr_t)utf8buf;
- break;
-
case 0x01: /* Unicode with or without BOM */
case 0x02:
(*len)--;
@@ -618,10 +612,15 @@ static void unicode_munge(char* string, char* utf8buf, int *len) {
*len = templen - 1;
break;
/* case 0x03: UTF-8 encoded string handled by parse_as_utf8 */
+
+ case 0x00: /* Type 0x00 is ordinary ISO 8859-1 */
+ str++;
+ (*len)--;
+ //fallthrough
default: /* Plain old string */
- utf8 = iso_decode(str, utf8, -1, *len);
+ utf8 = iso_decode_ex(str, utf8, -1, *len, utf8buf_size);
*utf8 = 0;
- *len = (intptr_t)utf8 - (intptr_t)utf8buf;
+ *len = utf8 - utf8buf;
break;
}
}
@@ -1103,10 +1102,9 @@ retry_with_limit:
{
//limit stack allocation to avoid stack overflow
utf8_size = ID3V2_BUF_SIZE;
- bytesread = ID3V2_BUF_SIZE/3;
}
- char utf8buf[utf8_size + 1];
- unicode_munge( tag, utf8buf, &bytesread);
+ unsigned char utf8buf[utf8_size + 1];
+ unicode_munge( (unsigned char *)tag, utf8buf, &bytesread, utf8_size);
if(bytesread >= buffersize - bufferpos)
bytesread = buffersize - bufferpos - 1;