changeset 456:d62c2de0c990

sexpr: use the unicode API instead of open-coding a UTF-8 parser Signed-off-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
author Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
date Mon, 02 Apr 2018 14:08:15 -0400
parents 13a1d76bf8c0
children ad64d5f1b038
files sexpr.l
diffstat 1 files changed, 7 insertions(+), 40 deletions(-) [+]
line wrap: on
line diff
--- a/sexpr.l	Mon Apr 02 13:47:36 2018 -0400
+++ b/sexpr.l	Mon Apr 02 14:08:15 2018 -0400
@@ -28,6 +28,7 @@
 
 %{
 #include <jeffpc/buffer.h>
+#include <jeffpc/unicode.h>
 
 #include "sexpr_impl.h"
 #include "sexpr.tab.h"
@@ -62,48 +63,14 @@
 
 static int getutf8(const char *str, size_t len, uint64_t *out)
 {
-	const uint8_t *tmp = (const uint8_t *) str;
-	uint64_t c;
-
-	/* process the first byte */
-	c = *tmp;
+	uint32_t tmp;
+	int ret;
 
-	switch (len) {
-		case 1:
-			ASSERT3U(c, >=, 0x00);
-			ASSERT3U(c, <=, 0x7f);
-			c &= 0x7f;
-			break;
-		case 2:
-			ASSERT3U(c, >=, 0xc0);
-			ASSERT3U(c, <=, 0xdf);
-			c &= 0x1f;
-			break;
-		case 3:
-			ASSERT3U(c, >=, 0xe0);
-			ASSERT3U(c, <=, 0xef);
-			c &= 0x0f;
-			break;
-		case 4:
-			ASSERT3U(c, >=, 0xf0);
-			ASSERT3U(c, <=, 0xf7);
-			c &= 0x07;
-			break;
-		default:
-			return ERROR;
-	}
+	ret = utf8_to_utf32(str, len, &tmp);
+	if (ret != len)
+		return ERROR;
 
-	/* process remaining bytes */
-	for (len--, tmp++; len; len--, tmp++) {
-		uint64_t cur = *tmp;
-
-		ASSERT3U(cur, >=, 0x80);
-		ASSERT3U(cur, <=, 0xbf);
-
-		c = (c << 6) | (*tmp & 0x3f);
-	}
-
-	*out = c;
+	*out = tmp;
 
 	return CHAR;
 }