diff options
author | Roman Artiukhin <bahusdrive@gmail.com> | 2024-12-12 13:33:53 +0200 |
---|---|---|
committer | Solomon Peachy <pizza@shaftnet.org> | 2024-12-15 15:44:57 -0500 |
commit | de8d1437dc29bbfefc488ea29e7870b31204c898 (patch) | |
tree | 5174e858c11e01ade524dfffead8feff145d79f9 | |
parent | 1f548f74e698528109fb4cf542a65b4baf21c8cf (diff) | |
download | rockbox-de8d1437dc.tar.gz rockbox-de8d1437dc.zip |
metadata: asf: Use system utf16decode conversion
Change-Id: I606bf5365c84cbee4badd1ac1cbaace1207834f4
-rw-r--r-- | lib/rbcodec/metadata/asf.c | 92 |
1 files changed, 43 insertions, 49 deletions
diff --git a/lib/rbcodec/metadata/asf.c b/lib/rbcodec/metadata/asf.c index 833dd62be6..04022732aa 100644 --- a/lib/rbcodec/metadata/asf.c +++ b/lib/rbcodec/metadata/asf.c @@ -33,7 +33,7 @@ #include "metadata_common.h" #include "metadata_parsers.h" #include <codecs/libasf/asf.h> - +#include "rbunicode.h" /* TODO: Just read the GUIDs into a 16-byte array, and use memcmp to compare */ struct guid_s { uint32_t v1; @@ -154,75 +154,69 @@ static int asf_intdecode(int fd, int type, int length) return 0; } +static int is_valid_utf16(const unsigned char *data, size_t length) +{ + if (length < 2) return 0; // Not enough data for even one UTF-16 character + + // Get the last two bytes as a UTF-16 character (little-endian) + uint16_t last = data[length - 2] | (data[length - 1] << 8); + + // Check if the last character is a high surrogate + if (last >= 0xD800 && last <= 0xDBFF) { + return 0; // Invalid if it's the last character + } + + // Check if the last character is a low surrogate + if (last >= 0xDC00 && last <= 0xDFFF) { + if (length < 4) return 0; // Invalid if there's no preceding character + uint16_t second_last = data[length - 4] | (data[length - 3] << 8); + + // Invalid if not preceded by a high surrogate + return second_last >= 0xD800 && second_last <= 0xDBFF; + } + + // If it's not a surrogate, it's valid + return 1; +} + /* Decode a LE utf16 string from a disk buffer into a fixed-sized utf8 buffer. */ - static void asf_utf16LEdecode(int fd, uint16_t utf16bytes, unsigned char **utf8, int* utf8bytes ) { - unsigned long ucs; + const int reserve_bytes = 6; int n; - unsigned char utf16buf[256]; - unsigned char* utf16 = utf16buf; - unsigned char* newutf8; - - n = read(fd, utf16buf, MIN(sizeof(utf16buf), utf16bytes)); - utf16bytes -= n; - - while (n > 0) { - /* Check for a surrogate pair */ - if (utf16[1] >= 0xD8 && utf16[1] < 0xE0) { - if (n < 4) { - /* Run out of utf16 bytes, read some more */ - utf16buf[0] = utf16[0]; - utf16buf[1] = utf16[1]; - - n = read(fd, utf16buf + 2, MIN(sizeof(utf16buf)-2, utf16bytes)); - utf16 = utf16buf; - utf16bytes -= n; - n += 2; - } - - if (n < 4) { - /* Truncated utf16 string, abort */ - break; - } - ucs = 0x10000 + ((utf16[0] << 10) | ((utf16[1] - 0xD8) << 18) - | utf16[2] | ((utf16[3] - 0xDC) << 8)); - utf16 += 4; - n -= 4; - } else { - ucs = (utf16[0] | (utf16[1] << 8)); - utf16 += 2; - n -= 2; - } + unsigned char utf16buf[258]; + unsigned char* newutf8 = *utf8; + const int utf8bytes_initial = *utf8bytes; - if (*utf8bytes > 6) { - newutf8 = utf8encode(ucs, *utf8); - *utf8bytes -= (newutf8 - *utf8); - *utf8 += (newutf8 - *utf8); + while ((n = read(fd, utf16buf, MIN(sizeof(utf16buf) - 2, utf16bytes))) >= 2) + { + // If the UTF-16 string ends with an incomplete surrogate pair, try to complete it. + if (!is_valid_utf16(utf16buf, n)) + { + n += read(fd, utf16buf + n, 2); } + newutf8 = utf16decode(utf16buf, newutf8, n>>1, *utf8bytes - reserve_bytes, true); + *utf8bytes = utf8bytes_initial - (newutf8 - *utf8); + utf16bytes -= n; - /* We have run out of utf16 bytes, read more if available */ - if ((n == 0) && (utf16bytes > 0)) { - n = read(fd, utf16buf, MIN(sizeof(utf16buf), utf16bytes)); - utf16 = utf16buf; - utf16bytes -= n; - } + if (*utf8bytes <= reserve_bytes) + break; } - *utf8[0] = 0; + *newutf8 = 0; --*utf8bytes; + *utf8 = newutf8; if (utf16bytes > 0) { /* Skip any remaining bytes */ lseek(fd, utf16bytes, SEEK_CUR); } - return; } static int asf_parse_header(int fd, struct mp3entry* id3, |