From f1bea50e1ded9a3fbd0ffcd92975ecef26eab14c Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 25 Nov 2020 10:39:08 +1030 Subject: [PATCH] common/utils: simple utf8 helpers. Signed-off-by: Rusty Russell --- common/utils.c | 37 +++++++++++++++++++++++++++++++++++++ common/utils.h | 6 ++++++ 2 files changed, 43 insertions(+) diff --git a/common/utils.c b/common/utils.c index 4c99c7b1e..96e0b1188 100644 --- a/common/utils.c +++ b/common/utils.c @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include const tal_t *wally_tal_ctx; @@ -173,3 +175,38 @@ void *tal_dup_talarr_(const tal_t *ctx, const tal_t *src TAKES, const char *labe } return tal_dup_(ctx, src, 1, tal_bytelen(src), 0, label); } + +/* Check for valid UTF-8 */ +bool utf8_check(const void *vbuf, size_t buflen) +{ + const u8 *buf = vbuf; + struct utf8_state utf8_state = UTF8_STATE_INIT; + bool need_more = false; + + for (size_t i = 0; i < buflen; i++) { + if (!utf8_decode(&utf8_state, buf[i])) { + need_more = true; + continue; + } + need_more = false; + if (errno != 0) + return false; + } + return !need_more; +} + +char *utf8_str(const tal_t *ctx, const u8 *buf TAKES, size_t buflen) +{ + char *ret; + + if (!utf8_check(buf, buflen)) { + if (taken(buf)) + tal_free(buf); + return NULL; + } + + /* Add one for nul term */ + ret = tal_dup_arr(ctx, char, (const char *)buf, buflen, 1); + ret[buflen] = '\0'; + return ret; +} diff --git a/common/utils.h b/common/utils.h index fa63c6c88..851825e20 100644 --- a/common/utils.h +++ b/common/utils.h @@ -72,6 +72,12 @@ void tal_arr_remove_(void *p, size_t elemsize, size_t n); void *tal_dup_talarr_(const tal_t *ctx, const tal_t *src TAKES, const char *label); +/* Check for valid UTF-8 */ +bool utf8_check(const void *buf, size_t buflen); + +/* Check it's UTF-8, return copy (or same if TAKES), or NULL if not valid. */ +char *utf8_str(const tal_t *ctx, const u8 *buf TAKES, size_t buflen); + /* Use the POSIX C locale. */ void setup_locale(void);