Mercurial > libjeffpc
changeset 456:d62c2de0c990
sexpr: use the unicode API instead of open-coding a UTF-8 parser
Signed-off-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
author | Josef 'Jeff' Sipek <jeffpc@josefsipek.net> |
---|---|
date | Mon, 02 Apr 2018 14:08:15 -0400 |
parents | 13a1d76bf8c0 |
children | ad64d5f1b038 |
files | sexpr.l |
diffstat | 1 files changed, 7 insertions(+), 40 deletions(-) [+] |
line wrap: on
line diff
--- a/sexpr.l Mon Apr 02 13:47:36 2018 -0400 +++ b/sexpr.l Mon Apr 02 14:08:15 2018 -0400 @@ -28,6 +28,7 @@ %{ #include <jeffpc/buffer.h> +#include <jeffpc/unicode.h> #include "sexpr_impl.h" #include "sexpr.tab.h" @@ -62,48 +63,14 @@ static int getutf8(const char *str, size_t len, uint64_t *out) { - const uint8_t *tmp = (const uint8_t *) str; - uint64_t c; - - /* process the first byte */ - c = *tmp; + uint32_t tmp; + int ret; - switch (len) { - case 1: - ASSERT3U(c, >=, 0x00); - ASSERT3U(c, <=, 0x7f); - c &= 0x7f; - break; - case 2: - ASSERT3U(c, >=, 0xc0); - ASSERT3U(c, <=, 0xdf); - c &= 0x1f; - break; - case 3: - ASSERT3U(c, >=, 0xe0); - ASSERT3U(c, <=, 0xef); - c &= 0x0f; - break; - case 4: - ASSERT3U(c, >=, 0xf0); - ASSERT3U(c, <=, 0xf7); - c &= 0x07; - break; - default: - return ERROR; - } + ret = utf8_to_utf32(str, len, &tmp); + if (ret != len) + return ERROR; - /* process remaining bytes */ - for (len--, tmp++; len; len--, tmp++) { - uint64_t cur = *tmp; - - ASSERT3U(cur, >=, 0x80); - ASSERT3U(cur, <=, 0xbf); - - c = (c << 6) | (*tmp & 0x3f); - } - - *out = c; + *out = tmp; return CHAR; }