Commit Graph

1088 Commits

Author SHA1 Message Date
Pekka Enberg
2bf5eb84cf Merge 'Prevent misuse of subqueries that return multiple columns' from Jussi Saurio
Closes #3892
Closes #3888
Stuff like:
```sql
turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values (1, 2);
    select case (select y, z from t2) when 1 then 'one' else 'other' end from t1;
  × Parse error: base expression in CASE must return 1 value

turso>     create table t(x, y);
    insert into t values (1, 2);
    select (select x, y from t) as result;
  × Parse error: result column must return 1 value, got 2

turso>     create table t1(x,y);
    create table t2(y);
    insert into t1 values (1,1);
    insert into t2 values (1);
    select * from t2 where y = (select x,y from t1);
  × Parse error: all arguments to binary operator = must return the same number of
  │ values. Got: (1) = (2)

turso>     create table orders(customer_id, amount);
    create table thresholds(min_amount, max_amount);
    insert into orders values (100, 50), (100, 150);
    insert into thresholds values (100, 200);
    select customer_id, sum(amount) as total 
    from orders 
    group by customer_id 
    having total > (select min_amount, max_amount from thresholds);
  × Parse error: all arguments to binary operator > must return the same number of
  │ values. Got: (1) > (2)

turso>     create table items(id);
    create table config(max_results, other_col);
    insert into items values (1), (2), (3);
    insert into config values (2, 3);
    select * from items limit (select max_results, other_col from config);
  × Parse error: limit expression must return 1 value, got 2

turso>     create table items(id);
    create table config(skip_count, other_col);
    insert into items values (1), (2), (3);
    insert into config values (1, 2);
    select * from items limit 1 offset (select skip_count, other_col from config);
  × Parse error: offset expression must return 1 value, got 2

turso>     create table items(id, name);
    create table sort_order(priority, other_col);
    insert into items values (1, 'a'), (2, 'b');
    insert into sort_order values (1, 2);
    select * from items order by (select priority, other_col from sort_order);
  × Parse error: order by expression must return 1 value, got 2

turso>     create table sales(product_id, amount);
    create table grouping(category, other_col);
    insert into sales values (1, 100), (2, 200);
    insert into grouping values (1, 2);
    select sum(amount) from sales group by (select category, other_col from grouping);
  × Parse error: group by expression must return 1 value, got 2

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values (1, 2);
    select case when (select y, z from t2) then 'yes' else 'no' end from t1;
  × Parse error: when expression in CASE must return 1 value. Got: (2)

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values (1, 2);
    select case when x = 1 then (select y, z from t2) else 0 end from t1;
  × Parse error: then expression in CASE must return 1 value. Got: (2)

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values (1, 2);
    select case when x = 2 then 0 else (select y, z from t2) end from t1;
  × Parse error: else expression in CASE must return 1 value. Got: (2)

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values (1, 2);
    select max((select y, z from t2)) from t1;
  × Parse error: argument 0 to function call max must return 1 value. Got: (2)

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values (1, 2);
    select x + (select y, z from t2) from t1;
  × Parse error: all arguments to binary operator + must return the same number of
  │ values. Got: (1) + (2)

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (5);
    insert into t2 values (1, 2);
    select * from t1 where x between (select y, z from t2) and 10;
  × Parse error: all arguments to binary operator <= must return the same number of
  │ values. Got: (2) <= (1)

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values (1, 2);
    select cast((select y, z from t2) as integer) from t1;
  × Parse error: argument to CAST must return 1 value. Got: (2)

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values ('a', 'b');
    select (select y, z from t2) collate nocase from t1;
  × Parse error: argument to COLLATE must return 1 value. Got: (2)

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values (1, 2);
    select * from t1 where (select y, z from t2) is null;
  × Parse error: all arguments to binary operator IS must return the same number of
  │ values. Got: (2) IS (1)

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values (1, 2);
    select * from t1 where (select y, z from t2) not null;
  × Parse error: argument to NOT NULL must return 1 value. Got: (2)

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values ('a', 'b');
    select * from t1 where (select y, z from t2) like 'a%';
  × Parse error: left operand of LIKE must return 1 value. Got: (2)

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values (1, 2);
    select -(select y, z from t2) from t1;
  × Parse error: argument to unary operator - must return 1 value. Got: (2)

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values (1, 2);
    select abs((select y, z from t2)) from t1;
  × Parse error: argument 0 to function call abs must return 1 value. Got: (2)
  ```

Closes #3906
2025-11-03 13:06:38 +02:00
Jussi Saurio
1c2a8e62ca Fix: return error on provided insert column count mismatch 2025-11-03 11:41:50 +02:00
Jussi Saurio
005d922ab4 Fix: prevent misuse of subqueries that return multiple columns 2025-11-03 11:04:09 +02:00
pedrocarlo
0eb0fc5c22 add tests 2025-11-02 11:27:05 -03:00
Pekka Enberg
c091f94de8 Merge 'Fix INSERT UNION ALL' from Duy Dang
Close #3849
Close #3855

Reviewed-by: Jussi Saurio <jussi.saurio@gmail.com>

Closes #3877
2025-11-01 11:12:38 +02:00
Pekka Enberg
7283f35a29 Merge 'Fix LEFT JOIN subqueries reusing stale right-side values' from Duy Dang
Close #3867

Reviewed-by: Jussi Saurio <jussi.saurio@gmail.com>

Closes #3874
2025-11-01 11:12:24 +02:00
Duy Dang
e0f6b7cffe Fix INSERT handling for compound VALUES sources 2025-11-01 02:27:42 +07:00
Duy Dang
4b18e3bab5 Fix VALUES UNION ALL register reuse during INSERTs 2025-11-01 02:01:30 +07:00
Duy Dang
3ee47a2c3c Fix LEFT JOIN subqueries reusing stale right-side values 2025-11-01 01:24:31 +07:00
RS2007
bdf720d205 adding regression test for duplicate cte 2025-10-31 23:15:11 +05:30
Duy Dang
733dc762ed Fix self-insert SUM when table uses INTEGER PRIMARY KEY 2025-10-31 03:34:10 +07:00
Jussi Saurio
7e65657ab0 Add 'make test-single'
e.g. `make test-single TEST=subquery.test`

Plus: chmod +x to all tcl tests in testing folder
2025-10-30 11:38:56 +02:00
Jussi Saurio
6cf2072b51 translate: disallow correlated subqueries in HAVING and ORDER BY
These are supported by SQLite, but we cannot handle them correctly yet.
2025-10-29 15:37:19 +02:00
Jussi Saurio
29fe3b585a Add more tests and disable correlated IN-subqueries in HAVING position
I discovered a flaw in our current translation that makes queries of type
HAVING foo IN (SELECT ...) not work properly - in these cases we need to
defer translation of the subquery until later.

I will fix this in a future PR because I suspect it's not trivial.
2025-10-29 09:57:55 +02:00
Jussi Saurio
5fa73679f3 Add TCL tests for subqueries in all positions of a SELECT 2025-10-28 13:11:12 +02:00
Jussi Saurio
3294b78051 Initialize LIMIT after after ORDER BY / GROUP BY initialization
Currently LIMIT 0 jumps to "after the main loop", and it is done
before ORDER BY and GROUP BY cursor have had a chance to be initialized,
which causes a panic.

Simplest fix for now is to delay the LIMIT initialization.
2025-10-28 13:08:05 +02:00
Jussi Saurio
82995b4264 Add subquery TCL tests 2025-10-27 16:10:49 +02:00
Pekka Enberg
7d035f27d8 Merge 'Strict numeric cast for op_must_be_int' from bit-aloo
closes: #3302

Reviewed-by: Jussi Saurio <jussi.saurio@gmail.com>

Closes #3771
2025-10-26 16:42:35 +02:00
Glauber Costa
1ccd61088e Always returns Floats for sum and avg on DBSP aggregations
Trying to return integer sometimes to match SQLite led to more problems
that I anticipated. The reason being, we can't *really* match SQLite's
behavior unless we know the type of *every* element in the sum. This is
not impossible, but it is very hard, for very little gain.

Fixes #3831
2025-10-24 14:13:53 -05:00
Pekka Enberg
f85ba9198f Merge 'Add DISTINCT support to aggregate operator' from Glauber Costa
Implements COUNT/SUM/AVG(DISTINCT) and SELECT DISTINCT for materialized
views. To do this we have to keep a list of the actual distinct values
(similarly to how we do for min/max). We then update the operator (and
issue deltas) only when there is a state transition (for example, if we
already count the value x = 1, and we see an insert for x = 1, we do
nothing).
SELECT DISTINCT (with no aggregator) is similar. We already have to keep
a list of the values anyway to power the aggregates. So we just issue
new deltas based on the transition, without updating the aggregator.

Closes #3808
2025-10-24 18:47:11 +03:00
bit-aloo
b2769afffd add test 2025-10-24 16:08:15 +05:30
Jussi Saurio
18e6a23f23 Fix foreign key constraint enforcement on UNIQUE indexes
Closes #3648

Co-authored-by: Pavan-Nambi <pavannambi999@gmail.com>
2025-10-24 11:03:55 +03:00
Jussi Saurio
ae22468d8b Merge 'Order by heap sort' from Nikita Sivukhin
This PR implements simple heap-sort approach for query plans like
`SELECT ... FROM t WHERE ... ORDER BY ... LIMIT N` in order to maintain
small set of top N elements in the ephemeral B-tree and avoid sort and
materialization of whole dataset.
I removed all optimizations not related to this particular change in
order to make branch lightweight.

Reviewed-by: Jussi Saurio <jussi.saurio@gmail.com>

Closes #3726
2025-10-23 15:00:42 +03:00
Glauber Costa
92751e621b Add DISTINCT support to aggregate operator
Implements COUNT/SUM/AVG(DISTINCT) and SELECT DISTINCT for materialized views.
To do this we have to keep a list of the actual distinct values
(similarly to how we do for min/max). We then update the operator (and
issue deltas) only when there is a state transition (for example, if we
already count the value x = 1, and we see an insert for x = 1, we do
nothing).

SELECT DISTINCT (with no aggregator) is similar. We already have to keep
a list of the values anyway to power the aggregates. So we just issue
new deltas based on the transition, without updating the aggregator.
2025-10-22 16:32:18 -05:00
Jussi Saurio
ffc601b4b0 Merge 'Return better syntax error messages' from Diego Reis
Current error messages are too "low level", e.g returning tokens in
messages. This PR improves this a bit.
Before:
```text
 turso> with t as (select * from pragma_schema_version); select c.schema_version from t as c;

  × unexpected token at SourceSpan { offset: SourceOffset(47), length: 1 }
   ╭────
 1 │ with t as (select * from pragma_schema_version); select c.schema_version from t as c;
   ·                                                ┬
   ·                                                ╰── here
   ╰────
  help: expected [TK_SELECT, TK_VALUES, TK_UPDATE, TK_DELETE, TK_INSERT, TK_REPLACE] but found TK_SEMI
```
Now:
```text
 turso> with t as (select * from pragma_schema_version); select c.schema_version from t as c;

  × unexpected token ';' at offset 47
   ╭────
 1 │ with t as (select * from pragma_schema_version);select c.schema_version from t as c;
   ·                                                ┬
   ·                                                ╰── here
   ╰────
  help: expected SELECT, VALUES, UPDATE, DELETE, INSERT, or REPLACE but found ';'
  ```
@TcMits WDYT?

Closes #3190
2025-10-22 10:57:54 +03:00
Nikita Sivukhin
bf77862fab Merge branch 'main' into order-by-heap-sort 2025-10-22 11:44:55 +04:00
PThorpe92
2f401c0bcc Add regression tcl test for #3796 default bool col constraints 2025-10-21 21:22:09 -04:00
Pekka Enberg
eb835c39d4 Merge 'core/vdbe: fix ALTER COLUMN to propagate constraints to other table references' from Preston Thorpe
with fix to ensure `PRIMARY KEY` isn't written twice

Closes #3798
2025-10-21 20:05:41 +03:00
Pekka Enberg
d2d995a9c0 Merge 'Make sure explicit column aliases have binding precedence in orderby' from Pavan Nambi
closes https://github.com/tursodatabase/turso/issues/3684

Reviewed-by: Jussi Saurio <jussi.saurio@gmail.com>

Closes #3709
2025-10-21 19:04:42 +03:00
PThorpe92
c48d7a0963 Add tcl tests for alter column fixes 2025-10-21 10:47:08 -04:00
Pekka Enberg
6139dde081 Revert "Merge 'core/translate: fix ALTER COLUMN to propagate other constraint references' from Preston Thorpe"
This reverts commit 1151f49ff4, reversing
changes made to f4da2194f4.
2025-10-21 16:00:04 +03:00
Pekka Enberg
1151f49ff4 Merge 'core/translate: fix ALTER COLUMN to propagate other constraint references' from Preston Thorpe
closes #3666
and probably other issues i'll have to go digging through to see if
there is any others.
<img width="948" height="445" alt="image" src="https://github.com/user-
attachments/assets/2844e09b-109a-4a70-bd18-d8a814e49ea0" />
Any ALTER COLUMN stmt will now update the constraints on the table
(primary key, foreign key, unique)

Closes #3776
2025-10-21 11:53:42 +03:00
PThorpe92
fe3a4de0ab Add TCL tests for altering columns that have foreign keys 2025-10-18 13:38:11 -04:00
PThorpe92
b837232b13 Remove tests that alter testing.db from views.test 2025-10-18 12:05:33 -04:00
PThorpe92
79c5234122 Add TCL test for self ambiguous join 2025-10-16 16:43:08 -04:00
PThorpe92
959df4c4bc Add TCL test for rowid from clause subquery panic 2025-10-16 13:40:10 -04:00
Preston Thorpe
79e4176163 Merge 'cli: .tables and .indexes to show data from attached tables aswell' from Konstantinos Artopoulos
Closes #3545

Reviewed-by: Preston Thorpe <preston@turso.tech>

Closes #3627
2025-10-16 11:42:29 -04:00
PThorpe92
3112f55e05 Add TCL tests for INSERT OR IGNORE handling 2025-10-15 22:51:10 -04:00
Pavan-Nambi
2df3d3ee0c add more tests 2025-10-15 21:51:52 +05:30
Nikita Sivukhin
a2dbaafe69 add explicit test for multiple rows heap-sort bug 2025-10-15 17:27:22 +04:00
Jussi Saurio
d7a719418e Fix: outer CTEs should be available in subqueries 2025-10-15 15:15:55 +03:00
Jussi Saurio
4e6f373e3d Merge 'Fix: Evaluating expression in LIMIT and OFFSET clauses.' from
Closes #3687 .
Previously, the `try_fold_expr_to_i64` function casted `NULL` as `0`
when evaluating expressions in `LIMIT` or `OFFSET` clauses. I removed
this function since evaluating the expression directly and relying on
the MustBeInt operation for casting seems to handle everything.

Closes #3695
2025-10-15 10:36:36 +03:00
Jussi Saurio
25cf56b8e8 Fix expected error message 2025-10-15 09:41:44 +03:00
Pekka Enberg
ab2911b370 Merge 'Fix change counter incrementation' from Jussi Saurio
We merged two concurrent fixes to `nchange` handling last night and
AFAICT the fix in #3692 was incorrect because it doesn't count UPDATEs
in cases where the original row was DELETEd as part of the UPDATE
statement.
The correct fix was in 87434b8
EDIT: okay, it's not strictly _incorrect_ in #3692 I guess, I just think
it's more intuitive to increment the change for UPDATE in the `insert`
opcode because that's what performs the actual update.

Closes #3735
2025-10-15 09:28:17 +03:00
Jussi Saurio
b1cb897216 Merge 'Fix another "should have been rewritten" translation panic' from Jussi Saurio
Closes #2158

Closes #3702
2025-10-15 09:25:01 +03:00
Jussi Saurio
2791f2f479 Fix change counter incrementation
We merged two concurrent fixes to `nchange` handling last night and
AFAICT the fix in #3692 was incorrect because it doesn't count UPDATEs
in cases where the original row was DELETEd as part of the UPDATE
statement.

The correct fix was in 87434b8
2025-10-15 08:51:27 +03:00
Preston Thorpe
74bbb0d5a3 Merge 'Allow using indexes to iterate rows in UPDATE statements' from Jussi Saurio
Closes #2600
## Problem
Every btree has a key it is sorted by - this is the integer `rowid` for
tables and an arbitrary-sized, potentially multi-column key for indexes.
Executing an UPDATE in a loop is not safe if the update modifies any
part of the key of the btree that is used for iterating the rows in said
loop. For example:
- Using the table itself to iterate rows is not safe if the UPDATE
modifies the rowid (or rowid alias) of a row, because since it modifies
the iteration order itself, it may cause rows to be skipped:
```sql
CREATE TABLE t(x INTEGER PRIMARY KEY, y);
INSERT <something>
UPDATE t SET y = RANDOM() where x > 100; // safe to iterate 't', 'y' is not being modified
UPDATE t SET x = RANDOM() where x > 100; // not safe to iterate 't', 'x' is being modified
```
- Using an index to iterate rows is not safe if the UPDATE modifies any
of the columns in the index key
```sql
CREATE TABLE t(x, y, z);
CREATE INDEX txy ON t (x,y);
INSERT <something>
UPDATE t SET z = RANDOM() where x = 100 and y > 0; // safe to iterate txy, neither x or y is being modified
UPDATE t SET x = RANDOM() where x = 100 and y > 0; // not safe to iterate txy, 'x' is being modified
UPDATE t SET y = RANDOM() where x = 100 and y > 0; // not safe to iterate txy, 'y' is being modified
```
## Current solution in tursodb
Our current `main` code recognizes this issue and adopts this pseudocode
algorithm from SQLite:
- open a table or index for reading the rows of the source table,
- for each row that matches the condition in the UPDATE statement, write
the row into a temporary table
- then use that temporary table for iteration in the UPDATE loop.
This guarantees that the iteration order will not be affected by the
UPDATEs because the ephemeral table is not under modification.
## Problem with current solution
Our `main` code specialcases the ephemeral table solution to rowids /
rowid aliases only. Using indexes for UPDATE iteration was disabled in
an earlier PR (#2599) due to the safety issue mentioned above, which
means that many UPDATE statements become full table scans:
```sql
turso> create table t(x PRIMARY KEY);
turso> insert into t select value from generate_series(1,10000);
turso> explain update t set x = x + 100000 where x > 50 and x < 60;
addr  opcode             p1    p2    p3    p4             p5  comment
----  -----------------  ----  ----  ----  -------------  --  -------
0     Init               0     28    0                    0   Start at 28
1     OpenWrite          0     2     0                    0   root=2; iDb=0
2     OpenWrite          1     3     0                    0   root=3; iDb=0
-- scan entire 't' despite very narrow update range!
3     Rewind             0     27    0                    0   Rewind table t
...
```
## Solution
We move the ephemeral table logic to _after_ the optimizer has selected
the best access path for the table, and then, if the UPDATE modifies the
key of the chosen access path (table or index; whichever was selected by
the optimizer), we change the plan to include the ephemeral table
prepopulation. Hence, the same query from above becomes:
```sql
turso> explain update t set x = x + 100000 where x > 50 and x < 60;
addr  opcode             p1    p2    p3    p4             p5  comment
----  -----------------  ----  ----  ----  -------------  --  -------
0     Init               0     35    0                    0   Start at 35
1     OpenEphemeral      0     1     0                    0   cursor=0 is_table=true
2     OpenRead           1     3     0                    0   index=sqlite_autoindex_t_1, root=3, iDb=0
3     Integer            50    2     0                    0   r[2]=50
-- index seek on PRIMARY KEY index
4     SeekGT             1     10    2                    0   key=[2..2]
5       Integer          60    2     0                    0   r[2]=60
6       IdxGE            1     10    2                    0   key=[2..2]
7       IdxRowId         1     1     0                    0   r[1]=cursor 1 for index sqlite_autoindex_t_1.rowid
8       Insert           0     3     1     ephemeral_scratch  2   intkey=r[1] data=r[3]
9     Next               1     6     0                    0   
10    OpenWrite          2     2     0                    0   root=2; iDb=0
11    OpenWrite          3     3     0                    0   root=3; iDb=0
-- only scan rows that were inserted to ephemeral index
12    Rewind             0     34    0                    0   Rewind table ephemeral_scratch
13      RowId            0     5     0                    0   r[5]=ephemeral_scratch.rowid
```
Note that an ephemeral index does not have to be used if the index is
not affected:
```sql
turso> create table t(x PRIMARY KEY, data);
turso> explain update t set data = 'some_data' where x > 50 and x < 60;
addr  opcode             p1    p2    p3    p4             p5  comment
----  -----------------  ----  ----  ----  -------------  --  -------
0     Init               0     15    0                    0   Start at 15
1     OpenWrite          0     2     0                    0   root=2; iDb=0
2     OpenWrite          1     3     0                    0   root=3; iDb=0
3     Integer            50    1     0                    0   r[1]=50
-- direct index seek
4     SeekGT             1     14    1                    0   key=[1..1]
```

Reviewed-by: Preston Thorpe <preston@turso.tech>

Closes #3728
2025-10-14 16:11:25 -04:00
Pere Diaz Bou
1a464664a7 Merge 'increment Changes() only once conditionally ' from Pavan Nambi
closes #3688

Reviewed-by: Pere Diaz Bou <pere-altea@homail.com>

Closes #3692
2025-10-14 20:26:04 +02:00
Pere Diaz Bou
a2097188f0 Merge 'make comparison case sensitive' from Pavan Nambi
closes https://github.com/tursodatabase/turso/issues/3672

Reviewed-by: Pere Diaz Bou <pere-altea@homail.com>

Closes #3686
2025-10-14 20:20:02 +02:00
Jussi Saurio
b3b07252dc Add TCL smoke tests for UPDATEs affecting indexes 2025-10-14 16:25:05 +03:00