Merge 'Always returns Floats for sum and avg on DBSP aggregations' from Glauber Costa

Trying to return integer sometimes to match SQLite led to more problems
that I anticipated. The reason being, we can't *really* match SQLite's
behavior unless we know the type of *every* element in the sum. This is
not impossible, but it is very hard, for very little gain.
Fixes #3831

Closes #3832
This commit is contained in:
Pekka Enberg
2025-10-26 12:28:18 +02:00
committed by GitHub
3 changed files with 153 additions and 151 deletions

View File

@@ -1165,6 +1165,14 @@ impl AggregateState {
}
/// Convert aggregate state to output values
///
/// Note: SQLite returns INTEGER for SUM when all inputs are integers, and REAL when any input is REAL.
/// However, in an incremental system like DBSP, we cannot track whether all current values are integers
/// after deletions. For example:
/// - Initial: SUM(10, 20, 30.5) = 60.5 (REAL)
/// - After DELETE 30.5: SUM(10, 20) = 30 (SQLite returns INTEGER, but we only know the sum is 30.0)
///
/// Therefore, we always return REAL for SUM operations.
pub fn to_values(&self, aggregates: &[AggregateFunction]) -> Vec<Value> {
let mut result = Vec::new();
@@ -1180,21 +1188,12 @@ impl AggregateState {
}
AggregateFunction::Sum(col_idx) => {
let sum = self.sums.get(col_idx).copied().unwrap_or(0.0);
// Return as integer if it's a whole number, otherwise as float
if sum.fract() == 0.0 {
result.push(Value::Integer(sum as i64));
} else {
result.push(Value::Float(sum));
}
result.push(Value::Float(sum));
}
AggregateFunction::SumDistinct(col_idx) => {
// Return the computed SUM(DISTINCT)
let sum = self.distinct_sums.get(col_idx).copied().unwrap_or(0.0);
if sum.fract() == 0.0 {
result.push(Value::Integer(sum as i64));
} else {
result.push(Value::Float(sum));
}
result.push(Value::Float(sum));
}
AggregateFunction::Avg(col_idx) => {
if let Some((sum, count)) = self.avgs.get(col_idx) {

View File

@@ -3381,11 +3381,12 @@ mod tests {
assert_eq!(row.values.len(), 1);
// The hex function converts the number to string first, then to hex
// 96 as string is "96", which in hex is "3936" (hex of ASCII '9' and '6')
// SUM now returns Float, so 96.0 as string is "96.0", which in hex is "39362E30"
// (hex of ASCII '9', '6', '.', '0')
assert_eq!(
row.values[0],
Value::Text("3936".to_string().into()),
"HEX(SUM(age + 2)) should return '3936' for sum of 96"
Value::Text("39362E30".to_string().into()),
"HEX(SUM(age + 2)) should return '39362E30' for sum of 96.0"
);
// Test incremental update: add a new user
@@ -3404,22 +3405,22 @@ mod tests {
let result = test_execute(&mut circuit, input_data, pager.clone()).unwrap();
// Expected: new SUM(age + 2) = 96 + (40+2) = 138
// HEX(138) = hex of "138" = "313338"
// Expected: new SUM(age + 2) = 96.0 + (40+2) = 138.0
// HEX(138.0) = hex of "138.0" = "3133382E30"
assert_eq!(result.changes.len(), 2);
// First change: remove old aggregate (96)
// First change: remove old aggregate (96.0)
let (row, weight) = &result.changes[0];
assert_eq!(*weight, -1);
assert_eq!(row.values[0], Value::Text("3936".to_string().into()));
assert_eq!(row.values[0], Value::Text("39362E30".to_string().into()));
// Second change: add new aggregate (138)
// Second change: add new aggregate (138.0)
let (row, weight) = &result.changes[1];
assert_eq!(*weight, 1);
assert_eq!(
row.values[0],
Value::Text("313338".to_string().into()),
"HEX(SUM(age + 2)) should return '313338' for sum of 138"
Value::Text("3133382E30".to_string().into()),
"HEX(SUM(age + 2)) should return '3133382E30' for sum of 138.0"
);
}
@@ -3467,8 +3468,8 @@ mod tests {
.unwrap();
// Expected results:
// Alice: SUM(25*2 + 35*2) = 50 + 70 = 120, HEX("120") = "313230"
// Bob: SUM(30*2) = 60, HEX("60") = "3630"
// Alice: SUM(25*2 + 35*2) = 50 + 70 = 120.0, HEX("120.0") = "3132302E30"
// Bob: SUM(30*2) = 60.0, HEX("60.0") = "36302E30"
assert_eq!(result.changes.len(), 2);
let results: HashMap<String, String> = result
@@ -3489,13 +3490,13 @@ mod tests {
assert_eq!(
results.get("Alice").unwrap(),
"313230",
"Alice's HEX(SUM(age * 2)) should be '313230' (120)"
"3132302E30",
"Alice's HEX(SUM(age * 2)) should be '3132302E30' (120.0)"
);
assert_eq!(
results.get("Bob").unwrap(),
"3630",
"Bob's HEX(SUM(age * 2)) should be '3630' (60)"
"36302E30",
"Bob's HEX(SUM(age * 2)) should be '36302E30' (60.0)"
);
}
@@ -4812,12 +4813,12 @@ mod tests {
);
// Check the results
let mut results_map: HashMap<String, i64> = HashMap::new();
let mut results_map: HashMap<String, f64> = HashMap::new();
for (row, weight) in result.changes {
assert_eq!(weight, 1);
assert_eq!(row.values.len(), 2); // name and total_quantity
if let (Value::Text(name), Value::Integer(total)) = (&row.values[0], &row.values[1]) {
if let (Value::Text(name), Value::Float(total)) = (&row.values[0], &row.values[1]) {
results_map.insert(name.to_string(), *total);
} else {
panic!("Unexpected value types in result");
@@ -4826,12 +4827,12 @@ mod tests {
assert_eq!(
results_map.get("Alice"),
Some(&10),
Some(&10.0),
"Alice should have total quantity 10"
);
assert_eq!(
results_map.get("Bob"),
Some(&7),
Some(&7.0),
"Bob should have total quantity 7"
);
}
@@ -4928,24 +4929,24 @@ mod tests {
);
// Check the results
let mut results_map: HashMap<String, i64> = HashMap::new();
let mut results_map: HashMap<String, f64> = HashMap::new();
for (row, weight) in result.changes {
assert_eq!(weight, 1);
assert_eq!(row.values.len(), 2); // name and total
if let (Value::Text(name), Value::Integer(total)) = (&row.values[0], &row.values[1]) {
if let (Value::Text(name), Value::Float(total)) = (&row.values[0], &row.values[1]) {
results_map.insert(name.to_string(), *total);
}
}
assert_eq!(
results_map.get("Alice"),
Some(&8),
Some(&8.0),
"Alice should have total 8"
);
assert_eq!(
results_map.get("Charlie"),
Some(&7),
Some(&7.0),
"Charlie should have total 7"
);
assert_eq!(results_map.get("Bob"), None, "Bob should be filtered out");
@@ -5084,7 +5085,7 @@ mod tests {
// Row should have name, product_name, and sum columns
assert_eq!(row.values.len(), 3);
if let (Value::Text(name), Value::Text(product), Value::Integer(total)) =
if let (Value::Text(name), Value::Text(product), Value::Float(total)) =
(&row.values[0], &row.values[1], &row.values[2])
{
let key = format!("{}-{}", name.as_ref(), product.as_ref());
@@ -5092,12 +5093,14 @@ mod tests {
match key.as_str() {
"Alice-Widget" => {
assert_eq!(*total, 9, "Alice should have ordered 9 Widgets total")
assert_eq!(*total, 9.0, "Alice should have ordered 9 Widgets total")
}
"Alice-Gadget" => assert_eq!(*total, 3, "Alice should have ordered 3 Gadgets"),
"Bob-Widget" => assert_eq!(*total, 7, "Bob should have ordered 7 Widgets"),
"Alice-Gadget" => {
assert_eq!(*total, 3.0, "Alice should have ordered 3 Gadgets")
}
"Bob-Widget" => assert_eq!(*total, 7.0, "Bob should have ordered 7 Widgets"),
"Bob-Doohickey" => {
assert_eq!(*total, 2, "Bob should have ordered 2 Doohickeys")
assert_eq!(*total, 2.0, "Bob should have ordered 2 Doohickeys")
}
_ => panic!("Unexpected result: {key}"),
}

View File

@@ -66,9 +66,9 @@ do_execsql_test_on_specific_db {:memory:} matview-aggregation-population {
GROUP BY day;
SELECT * FROM daily_totals ORDER BY day;
} {1|7|2
2|2|2
3|4|2}
} {1|7.0|2
2|2.0|2
3|4.0|2}
do_execsql_test_on_specific_db {:memory:} matview-filter-with-groupby {
CREATE TABLE t(a INTEGER, b INTEGER);
@@ -81,9 +81,9 @@ do_execsql_test_on_specific_db {:memory:} matview-filter-with-groupby {
GROUP BY b;
SELECT * FROM v ORDER BY yourb;
} {3|3|1
6|6|1
7|7|1}
} {3|3.0|1
6|6.0|1
7|7.0|1}
do_execsql_test_on_specific_db {:memory:} matview-insert-maintenance {
CREATE TABLE t(a INTEGER, b INTEGER);
@@ -101,12 +101,12 @@ do_execsql_test_on_specific_db {:memory:} matview-insert-maintenance {
INSERT INTO t VALUES (1,1), (2,2);
SELECT * FROM v ORDER BY b;
} {3|3|1
6|6|1
3|7|2
6|11|2
3|7|2
6|11|2}
} {3|3.0|1
6|6.0|1
3|7.0|2
6|11.0|2
3|7.0|2
6|11.0|2}
do_execsql_test_on_specific_db {:memory:} matview-delete-maintenance {
CREATE TABLE items(id INTEGER, category TEXT, amount INTEGER);
@@ -129,11 +129,11 @@ do_execsql_test_on_specific_db {:memory:} matview-delete-maintenance {
DELETE FROM items WHERE category = 'B';
SELECT * FROM category_sums ORDER BY category;
} {A|90|3
B|60|2
A|60|2
B|60|2
A|60|2}
} {A|90.0|3
B|60.0|2
A|60.0|2
B|60.0|2
A|60.0|2}
do_execsql_test_on_specific_db {:memory:} matview-update-maintenance {
CREATE TABLE records(id INTEGER, value INTEGER, status INTEGER);
@@ -155,12 +155,12 @@ do_execsql_test_on_specific_db {:memory:} matview-update-maintenance {
UPDATE records SET status = 2 WHERE id = 3;
SELECT * FROM status_totals ORDER BY status;
} {1|400|2
2|600|2
1|450|2
2|600|2
1|150|1
2|900|3}
} {1|400.0|2
2|600.0|2
1|450.0|2
2|600.0|2
1|150.0|1
2|900.0|3}
do_execsql_test_on_specific_db {:memory:} matview-integer-primary-key-basic {
CREATE TABLE t(a INTEGER PRIMARY KEY, b INTEGER);
@@ -243,12 +243,12 @@ do_execsql_test_on_specific_db {:memory:} matview-integer-primary-key-with-aggre
DELETE FROM t WHERE a = 3;
SELECT * FROM v ORDER BY b;
} {10|500|1
20|700|2
10|600|2
20|700|2
10|600|2
20|400|1}
} {10|500.0|1
20|700.0|2
10|600.0|2
20|700.0|2
10|600.0|2
20|400.0|1}
do_execsql_test_on_specific_db {:memory:} matview-complex-filter-aggregation {
CREATE TABLE transactions(
@@ -282,17 +282,17 @@ do_execsql_test_on_specific_db {:memory:} matview-complex-filter-aggregation {
DELETE FROM transactions WHERE id = 3;
SELECT * FROM account_deposits ORDER BY account;
} {100|70|2
200|100|1
300|60|1
100|95|3
200|100|1
300|60|1
100|125|3
200|100|1
300|60|1
100|125|3
300|60|1}
} {100|70.0|2
200|100.0|1
300|60.0|1
100|95.0|3
200|100.0|1
300|60.0|1
100|125.0|3
200|100.0|1
300|60.0|1
100|125.0|3
300|60.0|1}
do_execsql_test_on_specific_db {:memory:} matview-sum-count-only {
CREATE TABLE data(id INTEGER, value INTEGER, category INTEGER);
@@ -317,12 +317,12 @@ do_execsql_test_on_specific_db {:memory:} matview-sum-count-only {
UPDATE data SET value = 35 WHERE id = 3;
SELECT * FROM category_stats ORDER BY category;
} {1|80|3
2|70|2
1|85|4
2|70|2
1|85|4
2|75|2}
} {1|80.0|3
2|70.0|2
1|85.0|4
2|70.0|2
1|85.0|4
2|75.0|2}
do_execsql_test_on_specific_db {:memory:} matview-empty-table-population {
CREATE TABLE t(a INTEGER, b INTEGER);
@@ -337,8 +337,8 @@ do_execsql_test_on_specific_db {:memory:} matview-empty-table-population {
INSERT INTO t VALUES (1, 3), (2, 7), (3, 9);
SELECT * FROM v ORDER BY b;
} {0
7|2|1
9|3|1}
7|2.0|1
9|3.0|1}
do_execsql_test_on_specific_db {:memory:} matview-all-rows-filtered {
CREATE TABLE t(a INTEGER, b INTEGER);
@@ -386,17 +386,17 @@ do_execsql_test_on_specific_db {:memory:} matview-mixed-operations-sequence {
INSERT INTO orders VALUES (4, 300, 150);
SELECT * FROM customer_totals ORDER BY customer_id;
} {100|50|1
200|75|1
100|75|2
200|75|1
100|75|2
200|100|1
100|25|1
200|100|1
100|25|1
200|100|1
300|150|1}
} {100|50.0|1
200|75.0|1
100|75.0|2
200|75.0|1
100|75.0|2
200|100.0|1
100|25.0|1
200|100.0|1
100|25.0|1
200|100.0|1
300|150.0|1}
do_execsql_test_on_specific_db {:memory:} matview-projections {
CREATE TABLE t(a,b);
@@ -502,13 +502,13 @@ do_execsql_test_on_specific_db {:memory:} matview-rollback-aggregation {
ROLLBACK;
SELECT * FROM product_totals ORDER BY product_id;
} {1|300|2
2|400|2
1|350|3
2|400|2
3|300|1
1|300|2
2|400|2}
} {1|300.0|2
2|400.0|2
1|350.0|3
2|400.0|2
3|300.0|1
1|300.0|2
2|400.0|2}
do_execsql_test_on_specific_db {:memory:} matview-rollback-mixed-operations {
CREATE TABLE orders(id INTEGER PRIMARY KEY, customer INTEGER, amount INTEGER);
@@ -529,12 +529,12 @@ do_execsql_test_on_specific_db {:memory:} matview-rollback-mixed-operations {
ROLLBACK;
SELECT * FROM customer_totals ORDER BY customer;
} {100|75|2
200|75|1
100|150|2
200|150|1
100|75|2
200|75|1}
} {100|75.0|2
200|75.0|1
100|150.0|2
200|150.0|1
100|75.0|2
200|75.0|1}
do_execsql_test_on_specific_db {:memory:} matview-rollback-filtered-aggregation {
CREATE TABLE transactions(id INTEGER, account INTEGER, amount INTEGER, type TEXT);
@@ -560,11 +560,11 @@ do_execsql_test_on_specific_db {:memory:} matview-rollback-filtered-aggregation
ROLLBACK;
SELECT * FROM deposits ORDER BY account;
} {100|50|1
200|100|1
100|135|2
100|50|1
200|100|1}
} {100|50.0|1
200|100.0|1
100|135.0|2
100|50.0|1
200|100.0|1}
do_execsql_test_on_specific_db {:memory:} matview-rollback-empty-view {
CREATE TABLE t(a INTEGER, b INTEGER);
@@ -619,8 +619,8 @@ do_execsql_test_on_specific_db {:memory:} matview-join-with-aggregation {
GROUP BY u.name;
SELECT * FROM user_totals ORDER BY name;
} {Alice|250
Bob|250}
} {Alice|250.0
Bob|250.0}
do_execsql_test_on_specific_db {:memory:} matview-three-way-join {
CREATE TABLE customers(id INTEGER PRIMARY KEY, name TEXT, city TEXT);
@@ -661,9 +661,9 @@ do_execsql_test_on_specific_db {:memory:} matview-three-way-join-with-aggregatio
GROUP BY c.name, p.name;
SELECT * FROM sales_totals ORDER BY customer_name, product_name;
} {Alice|Gadget|3|60
Alice|Widget|9|90
Bob|Widget|2|20}
} {Alice|Gadget|3.0|60.0
Alice|Widget|9.0|90.0
Bob|Widget|2.0|20.0}
do_execsql_test_on_specific_db {:memory:} matview-join-incremental-insert {
CREATE TABLE users(id INTEGER PRIMARY KEY, name TEXT);
@@ -864,9 +864,9 @@ do_execsql_test_on_specific_db {:memory:} matview-aggregation-before-join {
GROUP BY c.id, c.name, c.tier;
SELECT * FROM customer_order_summary ORDER BY total_quantity DESC;
} {Bob|Silver|2|7
Alice|Gold|3|6
Charlie|Bronze|1|1}
} {Bob|Silver|2|7.0
Alice|Gold|3|6.0
Charlie|Bronze|1|1.0}
# Test 4: Join with aggregation AFTER the join
do_execsql_test_on_specific_db {:memory:} matview-aggregation-after-join {
@@ -894,8 +894,8 @@ do_execsql_test_on_specific_db {:memory:} matview-aggregation-after-join {
GROUP BY st.region;
SELECT * FROM regional_sales ORDER BY total_revenue DESC;
} {North|38|3150
South|18|1500}
} {North|38.0|3150.0
South|18.0|1500.0}
# Test 5: Modifying both tables in same transaction
do_execsql_test_on_specific_db {:memory:} matview-join-both-tables-modified {
@@ -1223,8 +1223,8 @@ do_execsql_test_on_specific_db {:memory:} matview-union-with-aggregation {
FROM q2_sales;
SELECT * FROM half_year_summary ORDER BY quarter;
} {Q1|68|16450
Q2|105|21750}
} {Q1|68.0|16450.0
Q2|105.0|21750.0}
do_execsql_test_on_specific_db {:memory:} matview-union-with-join {
CREATE TABLE customers(id INTEGER PRIMARY KEY, name TEXT, type TEXT);
@@ -1654,8 +1654,8 @@ do_execsql_test_on_specific_db {:memory:} matview-groupby-scalar-function {
GROUP BY substr(orderdate, 1, 4);
SELECT * FROM yearly_totals ORDER BY 1;
} {2020|250
2021|200}
} {2020|250.0
2021|200.0}
do_execsql_test_on_specific_db {:memory:} matview-groupby-alias {
CREATE TABLE orders(id INTEGER, orderdate TEXT, amount INTEGER);
@@ -1669,8 +1669,8 @@ do_execsql_test_on_specific_db {:memory:} matview-groupby-alias {
GROUP BY year;
SELECT * FROM yearly_totals ORDER BY year;
} {2020|250
2021|200}
} {2020|250.0
2021|200.0}
do_execsql_test_on_specific_db {:memory:} matview-groupby-position {
CREATE TABLE orders(id INTEGER, orderdate TEXT, amount INTEGER, nation TEXT);
@@ -1684,8 +1684,8 @@ do_execsql_test_on_specific_db {:memory:} matview-groupby-position {
GROUP BY 1, 2;
SELECT * FROM national_yearly ORDER BY nation, year;
} {UK|2021|200
USA|2020|250}
} {UK|2021|200.0
USA|2020|250.0}
do_execsql_test_on_specific_db {:memory:} matview-groupby-scalar-incremental {
CREATE TABLE orders(id INTEGER, orderdate TEXT, amount INTEGER);
@@ -1701,10 +1701,10 @@ do_execsql_test_on_specific_db {:memory:} matview-groupby-scalar-incremental {
SELECT * FROM yearly_totals;
INSERT INTO orders VALUES (3, '2021-03-20', 200);
SELECT * FROM yearly_totals ORDER BY year;
} {2020|100
2020|250
2020|250
2021|200}
} {2020|100.0
2020|250.0
2020|250.0
2021|200.0}
do_execsql_test_on_specific_db {:memory:} matview-groupby-join-position {
CREATE TABLE t(a INTEGER);
@@ -2301,12 +2301,12 @@ do_execsql_test_on_specific_db {:memory:} matview-sum-distinct {
-- Add a new distinct value
INSERT INTO sales VALUES ('South', 500);
SELECT * FROM sales_summary ORDER BY region;
} {North|300
South|700
North|300
South|700
North|300
South|1200}
} {North|300.0
South|700.0
North|300.0
South|700.0
North|300.0
South|1200.0}
do_execsql_test_on_specific_db {:memory:} matview-avg-distinct {
CREATE TABLE grades(student TEXT, score INTEGER);
@@ -2434,10 +2434,10 @@ do_execsql_test_on_specific_db {:memory:} matview-multiple-distinct-aggregates-w
('B', 7, 80, 800); -- New values
SELECT * FROM multi_distinct ORDER BY grp;
} {A|3|100|250.0
B|3|180|600.0
A|3|100|250.0
B|4|260|650.0}
} {A|3|100.0|250.0
B|3|180.0|600.0
A|3|100.0|250.0
B|4|260.0|650.0}
do_execsql_test_on_specific_db {:memory:} matview-multiple-distinct-aggregates-no-groupby {
CREATE TABLE data2(x INTEGER, y INTEGER, z INTEGER);
@@ -2467,8 +2467,8 @@ do_execsql_test_on_specific_db {:memory:} matview-multiple-distinct-aggregates-n
(7, 80, 800); -- New values
SELECT * FROM multi_distinct_global;
} {6|280|400.0
7|360|450.0}
} {6|280.0|400.0
7|360.0|450.0}
do_execsql_test_on_specific_db {:memory:} matview-count-distinct-global-aggregate {
CREATE TABLE all_data(val INTEGER);