From 1ccd61088ecf00dca573033626dac098c7463e67 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 24 Oct 2025 14:13:53 -0500 Subject: [PATCH] Always returns Floats for sum and avg on DBSP aggregations Trying to return integer sometimes to match SQLite led to more problems that I anticipated. The reason being, we can't *really* match SQLite's behavior unless we know the type of *every* element in the sum. This is not impossible, but it is very hard, for very little gain. Fixes #3831 --- core/incremental/aggregate_operator.rs | 21 ++- core/incremental/compiler.rs | 61 +++---- testing/materialized_views.test | 222 ++++++++++++------------- 3 files changed, 153 insertions(+), 151 deletions(-) diff --git a/core/incremental/aggregate_operator.rs b/core/incremental/aggregate_operator.rs index a24c4437c..a6d0fb5b8 100644 --- a/core/incremental/aggregate_operator.rs +++ b/core/incremental/aggregate_operator.rs @@ -1165,6 +1165,14 @@ impl AggregateState { } /// Convert aggregate state to output values + /// + /// Note: SQLite returns INTEGER for SUM when all inputs are integers, and REAL when any input is REAL. + /// However, in an incremental system like DBSP, we cannot track whether all current values are integers + /// after deletions. For example: + /// - Initial: SUM(10, 20, 30.5) = 60.5 (REAL) + /// - After DELETE 30.5: SUM(10, 20) = 30 (SQLite returns INTEGER, but we only know the sum is 30.0) + /// + /// Therefore, we always return REAL for SUM operations. pub fn to_values(&self, aggregates: &[AggregateFunction]) -> Vec { let mut result = Vec::new(); @@ -1180,21 +1188,12 @@ impl AggregateState { } AggregateFunction::Sum(col_idx) => { let sum = self.sums.get(col_idx).copied().unwrap_or(0.0); - // Return as integer if it's a whole number, otherwise as float - if sum.fract() == 0.0 { - result.push(Value::Integer(sum as i64)); - } else { - result.push(Value::Float(sum)); - } + result.push(Value::Float(sum)); } AggregateFunction::SumDistinct(col_idx) => { // Return the computed SUM(DISTINCT) let sum = self.distinct_sums.get(col_idx).copied().unwrap_or(0.0); - if sum.fract() == 0.0 { - result.push(Value::Integer(sum as i64)); - } else { - result.push(Value::Float(sum)); - } + result.push(Value::Float(sum)); } AggregateFunction::Avg(col_idx) => { if let Some((sum, count)) = self.avgs.get(col_idx) { diff --git a/core/incremental/compiler.rs b/core/incremental/compiler.rs index 316f31742..d977d65de 100644 --- a/core/incremental/compiler.rs +++ b/core/incremental/compiler.rs @@ -3381,11 +3381,12 @@ mod tests { assert_eq!(row.values.len(), 1); // The hex function converts the number to string first, then to hex - // 96 as string is "96", which in hex is "3936" (hex of ASCII '9' and '6') + // SUM now returns Float, so 96.0 as string is "96.0", which in hex is "39362E30" + // (hex of ASCII '9', '6', '.', '0') assert_eq!( row.values[0], - Value::Text("3936".to_string().into()), - "HEX(SUM(age + 2)) should return '3936' for sum of 96" + Value::Text("39362E30".to_string().into()), + "HEX(SUM(age + 2)) should return '39362E30' for sum of 96.0" ); // Test incremental update: add a new user @@ -3404,22 +3405,22 @@ mod tests { let result = test_execute(&mut circuit, input_data, pager.clone()).unwrap(); - // Expected: new SUM(age + 2) = 96 + (40+2) = 138 - // HEX(138) = hex of "138" = "313338" + // Expected: new SUM(age + 2) = 96.0 + (40+2) = 138.0 + // HEX(138.0) = hex of "138.0" = "3133382E30" assert_eq!(result.changes.len(), 2); - // First change: remove old aggregate (96) + // First change: remove old aggregate (96.0) let (row, weight) = &result.changes[0]; assert_eq!(*weight, -1); - assert_eq!(row.values[0], Value::Text("3936".to_string().into())); + assert_eq!(row.values[0], Value::Text("39362E30".to_string().into())); - // Second change: add new aggregate (138) + // Second change: add new aggregate (138.0) let (row, weight) = &result.changes[1]; assert_eq!(*weight, 1); assert_eq!( row.values[0], - Value::Text("313338".to_string().into()), - "HEX(SUM(age + 2)) should return '313338' for sum of 138" + Value::Text("3133382E30".to_string().into()), + "HEX(SUM(age + 2)) should return '3133382E30' for sum of 138.0" ); } @@ -3467,8 +3468,8 @@ mod tests { .unwrap(); // Expected results: - // Alice: SUM(25*2 + 35*2) = 50 + 70 = 120, HEX("120") = "313230" - // Bob: SUM(30*2) = 60, HEX("60") = "3630" + // Alice: SUM(25*2 + 35*2) = 50 + 70 = 120.0, HEX("120.0") = "3132302E30" + // Bob: SUM(30*2) = 60.0, HEX("60.0") = "36302E30" assert_eq!(result.changes.len(), 2); let results: HashMap = result @@ -3489,13 +3490,13 @@ mod tests { assert_eq!( results.get("Alice").unwrap(), - "313230", - "Alice's HEX(SUM(age * 2)) should be '313230' (120)" + "3132302E30", + "Alice's HEX(SUM(age * 2)) should be '3132302E30' (120.0)" ); assert_eq!( results.get("Bob").unwrap(), - "3630", - "Bob's HEX(SUM(age * 2)) should be '3630' (60)" + "36302E30", + "Bob's HEX(SUM(age * 2)) should be '36302E30' (60.0)" ); } @@ -4812,12 +4813,12 @@ mod tests { ); // Check the results - let mut results_map: HashMap = HashMap::new(); + let mut results_map: HashMap = HashMap::new(); for (row, weight) in result.changes { assert_eq!(weight, 1); assert_eq!(row.values.len(), 2); // name and total_quantity - if let (Value::Text(name), Value::Integer(total)) = (&row.values[0], &row.values[1]) { + if let (Value::Text(name), Value::Float(total)) = (&row.values[0], &row.values[1]) { results_map.insert(name.to_string(), *total); } else { panic!("Unexpected value types in result"); @@ -4826,12 +4827,12 @@ mod tests { assert_eq!( results_map.get("Alice"), - Some(&10), + Some(&10.0), "Alice should have total quantity 10" ); assert_eq!( results_map.get("Bob"), - Some(&7), + Some(&7.0), "Bob should have total quantity 7" ); } @@ -4928,24 +4929,24 @@ mod tests { ); // Check the results - let mut results_map: HashMap = HashMap::new(); + let mut results_map: HashMap = HashMap::new(); for (row, weight) in result.changes { assert_eq!(weight, 1); assert_eq!(row.values.len(), 2); // name and total - if let (Value::Text(name), Value::Integer(total)) = (&row.values[0], &row.values[1]) { + if let (Value::Text(name), Value::Float(total)) = (&row.values[0], &row.values[1]) { results_map.insert(name.to_string(), *total); } } assert_eq!( results_map.get("Alice"), - Some(&8), + Some(&8.0), "Alice should have total 8" ); assert_eq!( results_map.get("Charlie"), - Some(&7), + Some(&7.0), "Charlie should have total 7" ); assert_eq!(results_map.get("Bob"), None, "Bob should be filtered out"); @@ -5084,7 +5085,7 @@ mod tests { // Row should have name, product_name, and sum columns assert_eq!(row.values.len(), 3); - if let (Value::Text(name), Value::Text(product), Value::Integer(total)) = + if let (Value::Text(name), Value::Text(product), Value::Float(total)) = (&row.values[0], &row.values[1], &row.values[2]) { let key = format!("{}-{}", name.as_ref(), product.as_ref()); @@ -5092,12 +5093,14 @@ mod tests { match key.as_str() { "Alice-Widget" => { - assert_eq!(*total, 9, "Alice should have ordered 9 Widgets total") + assert_eq!(*total, 9.0, "Alice should have ordered 9 Widgets total") } - "Alice-Gadget" => assert_eq!(*total, 3, "Alice should have ordered 3 Gadgets"), - "Bob-Widget" => assert_eq!(*total, 7, "Bob should have ordered 7 Widgets"), + "Alice-Gadget" => { + assert_eq!(*total, 3.0, "Alice should have ordered 3 Gadgets") + } + "Bob-Widget" => assert_eq!(*total, 7.0, "Bob should have ordered 7 Widgets"), "Bob-Doohickey" => { - assert_eq!(*total, 2, "Bob should have ordered 2 Doohickeys") + assert_eq!(*total, 2.0, "Bob should have ordered 2 Doohickeys") } _ => panic!("Unexpected result: {key}"), } diff --git a/testing/materialized_views.test b/testing/materialized_views.test index e2bfd11bd..2827f30d4 100755 --- a/testing/materialized_views.test +++ b/testing/materialized_views.test @@ -66,9 +66,9 @@ do_execsql_test_on_specific_db {:memory:} matview-aggregation-population { GROUP BY day; SELECT * FROM daily_totals ORDER BY day; -} {1|7|2 -2|2|2 -3|4|2} +} {1|7.0|2 +2|2.0|2 +3|4.0|2} do_execsql_test_on_specific_db {:memory:} matview-filter-with-groupby { CREATE TABLE t(a INTEGER, b INTEGER); @@ -81,9 +81,9 @@ do_execsql_test_on_specific_db {:memory:} matview-filter-with-groupby { GROUP BY b; SELECT * FROM v ORDER BY yourb; -} {3|3|1 -6|6|1 -7|7|1} +} {3|3.0|1 +6|6.0|1 +7|7.0|1} do_execsql_test_on_specific_db {:memory:} matview-insert-maintenance { CREATE TABLE t(a INTEGER, b INTEGER); @@ -101,12 +101,12 @@ do_execsql_test_on_specific_db {:memory:} matview-insert-maintenance { INSERT INTO t VALUES (1,1), (2,2); SELECT * FROM v ORDER BY b; -} {3|3|1 -6|6|1 -3|7|2 -6|11|2 -3|7|2 -6|11|2} +} {3|3.0|1 +6|6.0|1 +3|7.0|2 +6|11.0|2 +3|7.0|2 +6|11.0|2} do_execsql_test_on_specific_db {:memory:} matview-delete-maintenance { CREATE TABLE items(id INTEGER, category TEXT, amount INTEGER); @@ -129,11 +129,11 @@ do_execsql_test_on_specific_db {:memory:} matview-delete-maintenance { DELETE FROM items WHERE category = 'B'; SELECT * FROM category_sums ORDER BY category; -} {A|90|3 -B|60|2 -A|60|2 -B|60|2 -A|60|2} +} {A|90.0|3 +B|60.0|2 +A|60.0|2 +B|60.0|2 +A|60.0|2} do_execsql_test_on_specific_db {:memory:} matview-update-maintenance { CREATE TABLE records(id INTEGER, value INTEGER, status INTEGER); @@ -155,12 +155,12 @@ do_execsql_test_on_specific_db {:memory:} matview-update-maintenance { UPDATE records SET status = 2 WHERE id = 3; SELECT * FROM status_totals ORDER BY status; -} {1|400|2 -2|600|2 -1|450|2 -2|600|2 -1|150|1 -2|900|3} +} {1|400.0|2 +2|600.0|2 +1|450.0|2 +2|600.0|2 +1|150.0|1 +2|900.0|3} do_execsql_test_on_specific_db {:memory:} matview-integer-primary-key-basic { CREATE TABLE t(a INTEGER PRIMARY KEY, b INTEGER); @@ -243,12 +243,12 @@ do_execsql_test_on_specific_db {:memory:} matview-integer-primary-key-with-aggre DELETE FROM t WHERE a = 3; SELECT * FROM v ORDER BY b; -} {10|500|1 -20|700|2 -10|600|2 -20|700|2 -10|600|2 -20|400|1} +} {10|500.0|1 +20|700.0|2 +10|600.0|2 +20|700.0|2 +10|600.0|2 +20|400.0|1} do_execsql_test_on_specific_db {:memory:} matview-complex-filter-aggregation { CREATE TABLE transactions( @@ -282,17 +282,17 @@ do_execsql_test_on_specific_db {:memory:} matview-complex-filter-aggregation { DELETE FROM transactions WHERE id = 3; SELECT * FROM account_deposits ORDER BY account; -} {100|70|2 -200|100|1 -300|60|1 -100|95|3 -200|100|1 -300|60|1 -100|125|3 -200|100|1 -300|60|1 -100|125|3 -300|60|1} +} {100|70.0|2 +200|100.0|1 +300|60.0|1 +100|95.0|3 +200|100.0|1 +300|60.0|1 +100|125.0|3 +200|100.0|1 +300|60.0|1 +100|125.0|3 +300|60.0|1} do_execsql_test_on_specific_db {:memory:} matview-sum-count-only { CREATE TABLE data(id INTEGER, value INTEGER, category INTEGER); @@ -317,12 +317,12 @@ do_execsql_test_on_specific_db {:memory:} matview-sum-count-only { UPDATE data SET value = 35 WHERE id = 3; SELECT * FROM category_stats ORDER BY category; -} {1|80|3 -2|70|2 -1|85|4 -2|70|2 -1|85|4 -2|75|2} +} {1|80.0|3 +2|70.0|2 +1|85.0|4 +2|70.0|2 +1|85.0|4 +2|75.0|2} do_execsql_test_on_specific_db {:memory:} matview-empty-table-population { CREATE TABLE t(a INTEGER, b INTEGER); @@ -337,8 +337,8 @@ do_execsql_test_on_specific_db {:memory:} matview-empty-table-population { INSERT INTO t VALUES (1, 3), (2, 7), (3, 9); SELECT * FROM v ORDER BY b; } {0 -7|2|1 -9|3|1} +7|2.0|1 +9|3.0|1} do_execsql_test_on_specific_db {:memory:} matview-all-rows-filtered { CREATE TABLE t(a INTEGER, b INTEGER); @@ -386,17 +386,17 @@ do_execsql_test_on_specific_db {:memory:} matview-mixed-operations-sequence { INSERT INTO orders VALUES (4, 300, 150); SELECT * FROM customer_totals ORDER BY customer_id; -} {100|50|1 -200|75|1 -100|75|2 -200|75|1 -100|75|2 -200|100|1 -100|25|1 -200|100|1 -100|25|1 -200|100|1 -300|150|1} +} {100|50.0|1 +200|75.0|1 +100|75.0|2 +200|75.0|1 +100|75.0|2 +200|100.0|1 +100|25.0|1 +200|100.0|1 +100|25.0|1 +200|100.0|1 +300|150.0|1} do_execsql_test_on_specific_db {:memory:} matview-projections { CREATE TABLE t(a,b); @@ -502,13 +502,13 @@ do_execsql_test_on_specific_db {:memory:} matview-rollback-aggregation { ROLLBACK; SELECT * FROM product_totals ORDER BY product_id; -} {1|300|2 -2|400|2 -1|350|3 -2|400|2 -3|300|1 -1|300|2 -2|400|2} +} {1|300.0|2 +2|400.0|2 +1|350.0|3 +2|400.0|2 +3|300.0|1 +1|300.0|2 +2|400.0|2} do_execsql_test_on_specific_db {:memory:} matview-rollback-mixed-operations { CREATE TABLE orders(id INTEGER PRIMARY KEY, customer INTEGER, amount INTEGER); @@ -529,12 +529,12 @@ do_execsql_test_on_specific_db {:memory:} matview-rollback-mixed-operations { ROLLBACK; SELECT * FROM customer_totals ORDER BY customer; -} {100|75|2 -200|75|1 -100|150|2 -200|150|1 -100|75|2 -200|75|1} +} {100|75.0|2 +200|75.0|1 +100|150.0|2 +200|150.0|1 +100|75.0|2 +200|75.0|1} do_execsql_test_on_specific_db {:memory:} matview-rollback-filtered-aggregation { CREATE TABLE transactions(id INTEGER, account INTEGER, amount INTEGER, type TEXT); @@ -560,11 +560,11 @@ do_execsql_test_on_specific_db {:memory:} matview-rollback-filtered-aggregation ROLLBACK; SELECT * FROM deposits ORDER BY account; -} {100|50|1 -200|100|1 -100|135|2 -100|50|1 -200|100|1} +} {100|50.0|1 +200|100.0|1 +100|135.0|2 +100|50.0|1 +200|100.0|1} do_execsql_test_on_specific_db {:memory:} matview-rollback-empty-view { CREATE TABLE t(a INTEGER, b INTEGER); @@ -619,8 +619,8 @@ do_execsql_test_on_specific_db {:memory:} matview-join-with-aggregation { GROUP BY u.name; SELECT * FROM user_totals ORDER BY name; -} {Alice|250 -Bob|250} +} {Alice|250.0 +Bob|250.0} do_execsql_test_on_specific_db {:memory:} matview-three-way-join { CREATE TABLE customers(id INTEGER PRIMARY KEY, name TEXT, city TEXT); @@ -661,9 +661,9 @@ do_execsql_test_on_specific_db {:memory:} matview-three-way-join-with-aggregatio GROUP BY c.name, p.name; SELECT * FROM sales_totals ORDER BY customer_name, product_name; -} {Alice|Gadget|3|60 -Alice|Widget|9|90 -Bob|Widget|2|20} +} {Alice|Gadget|3.0|60.0 +Alice|Widget|9.0|90.0 +Bob|Widget|2.0|20.0} do_execsql_test_on_specific_db {:memory:} matview-join-incremental-insert { CREATE TABLE users(id INTEGER PRIMARY KEY, name TEXT); @@ -864,9 +864,9 @@ do_execsql_test_on_specific_db {:memory:} matview-aggregation-before-join { GROUP BY c.id, c.name, c.tier; SELECT * FROM customer_order_summary ORDER BY total_quantity DESC; -} {Bob|Silver|2|7 -Alice|Gold|3|6 -Charlie|Bronze|1|1} +} {Bob|Silver|2|7.0 +Alice|Gold|3|6.0 +Charlie|Bronze|1|1.0} # Test 4: Join with aggregation AFTER the join do_execsql_test_on_specific_db {:memory:} matview-aggregation-after-join { @@ -894,8 +894,8 @@ do_execsql_test_on_specific_db {:memory:} matview-aggregation-after-join { GROUP BY st.region; SELECT * FROM regional_sales ORDER BY total_revenue DESC; -} {North|38|3150 -South|18|1500} +} {North|38.0|3150.0 +South|18.0|1500.0} # Test 5: Modifying both tables in same transaction do_execsql_test_on_specific_db {:memory:} matview-join-both-tables-modified { @@ -1223,8 +1223,8 @@ do_execsql_test_on_specific_db {:memory:} matview-union-with-aggregation { FROM q2_sales; SELECT * FROM half_year_summary ORDER BY quarter; -} {Q1|68|16450 -Q2|105|21750} +} {Q1|68.0|16450.0 +Q2|105.0|21750.0} do_execsql_test_on_specific_db {:memory:} matview-union-with-join { CREATE TABLE customers(id INTEGER PRIMARY KEY, name TEXT, type TEXT); @@ -1654,8 +1654,8 @@ do_execsql_test_on_specific_db {:memory:} matview-groupby-scalar-function { GROUP BY substr(orderdate, 1, 4); SELECT * FROM yearly_totals ORDER BY 1; -} {2020|250 -2021|200} +} {2020|250.0 +2021|200.0} do_execsql_test_on_specific_db {:memory:} matview-groupby-alias { CREATE TABLE orders(id INTEGER, orderdate TEXT, amount INTEGER); @@ -1669,8 +1669,8 @@ do_execsql_test_on_specific_db {:memory:} matview-groupby-alias { GROUP BY year; SELECT * FROM yearly_totals ORDER BY year; -} {2020|250 -2021|200} +} {2020|250.0 +2021|200.0} do_execsql_test_on_specific_db {:memory:} matview-groupby-position { CREATE TABLE orders(id INTEGER, orderdate TEXT, amount INTEGER, nation TEXT); @@ -1684,8 +1684,8 @@ do_execsql_test_on_specific_db {:memory:} matview-groupby-position { GROUP BY 1, 2; SELECT * FROM national_yearly ORDER BY nation, year; -} {UK|2021|200 -USA|2020|250} +} {UK|2021|200.0 +USA|2020|250.0} do_execsql_test_on_specific_db {:memory:} matview-groupby-scalar-incremental { CREATE TABLE orders(id INTEGER, orderdate TEXT, amount INTEGER); @@ -1701,10 +1701,10 @@ do_execsql_test_on_specific_db {:memory:} matview-groupby-scalar-incremental { SELECT * FROM yearly_totals; INSERT INTO orders VALUES (3, '2021-03-20', 200); SELECT * FROM yearly_totals ORDER BY year; -} {2020|100 -2020|250 -2020|250 -2021|200} +} {2020|100.0 +2020|250.0 +2020|250.0 +2021|200.0} do_execsql_test_on_specific_db {:memory:} matview-groupby-join-position { CREATE TABLE t(a INTEGER); @@ -2301,12 +2301,12 @@ do_execsql_test_on_specific_db {:memory:} matview-sum-distinct { -- Add a new distinct value INSERT INTO sales VALUES ('South', 500); SELECT * FROM sales_summary ORDER BY region; -} {North|300 -South|700 -North|300 -South|700 -North|300 -South|1200} +} {North|300.0 +South|700.0 +North|300.0 +South|700.0 +North|300.0 +South|1200.0} do_execsql_test_on_specific_db {:memory:} matview-avg-distinct { CREATE TABLE grades(student TEXT, score INTEGER); @@ -2434,10 +2434,10 @@ do_execsql_test_on_specific_db {:memory:} matview-multiple-distinct-aggregates-w ('B', 7, 80, 800); -- New values SELECT * FROM multi_distinct ORDER BY grp; -} {A|3|100|250.0 -B|3|180|600.0 -A|3|100|250.0 -B|4|260|650.0} +} {A|3|100.0|250.0 +B|3|180.0|600.0 +A|3|100.0|250.0 +B|4|260.0|650.0} do_execsql_test_on_specific_db {:memory:} matview-multiple-distinct-aggregates-no-groupby { CREATE TABLE data2(x INTEGER, y INTEGER, z INTEGER); @@ -2467,8 +2467,8 @@ do_execsql_test_on_specific_db {:memory:} matview-multiple-distinct-aggregates-n (7, 80, 800); -- New values SELECT * FROM multi_distinct_global; -} {6|280|400.0 -7|360|450.0} +} {6|280.0|400.0 +7|360.0|450.0} do_execsql_test_on_specific_db {:memory:} matview-count-distinct-global-aggregate { CREATE TABLE all_data(val INTEGER);