From e68a86532a2830cdc98a3656805f8029358efa0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Ko=C5=82odziej?= Date: Tue, 10 Dec 2024 22:47:20 +0100 Subject: [PATCH 1/2] tests: length function with multibyte characters Depending on encoding, some characters have more than one byte. Add failing test to verify if current implementation of scalar function `length` takes that into account. --- testing/scalar-functions.test | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/testing/scalar-functions.test b/testing/scalar-functions.test index 64a9b9e5f..da7fc39ba 100755 --- a/testing/scalar-functions.test +++ b/testing/scalar-functions.test @@ -343,6 +343,10 @@ do_execsql_test length-text { SELECT length('limbo'); } {5} +do_execsql_test lenght-text-utf8-chars { + SELECT length('ąłóżźć'); +} {6} + do_execsql_test length-integer { SELECT length(12345); } {5} From 660d3e8d070d1ba099dfafe8cf6e87f055ad980f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Ko=C5=82odziej?= Date: Tue, 10 Dec 2024 22:48:50 +0100 Subject: [PATCH 2/2] fix: count characters in string in length function `length` function should count characters, not bytes. https://www.sqlite.org/lang_corefunc.html#length --- core/vdbe/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index a42b449cd..191d66684 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -2537,7 +2537,7 @@ fn exec_lower(reg: &OwnedValue) -> Option { fn exec_length(reg: &OwnedValue) -> OwnedValue { match reg { OwnedValue::Text(_) | OwnedValue::Integer(_) | OwnedValue::Float(_) => { - OwnedValue::Integer(reg.to_string().len() as i64) + OwnedValue::Integer(reg.to_string().chars().count() as i64) } OwnedValue::Blob(blob) => OwnedValue::Integer(blob.len() as i64), OwnedValue::Agg(aggctx) => exec_length(aggctx.final_value()),