Merge 'Add CSV import support to shell' from Vrishabh

I was trying to get limbo to a position where we can run the benchmark [
clickbench](https://github.com/ClickHouse/ClickBench/blob/main/sqlite/be
nchmark.sh)  and found that `.import` command was not supported in cli.
This PR adds that support for command `.import` which has the same
parameters as sqlite cli.
Do note that not all options from sqlite `.import` is implemented yet in
this PR.

Reviewed-by: Preston Thorpe <preston@unlockedlabs.org>

Closes #598
This commit is contained in:
Pekka Enberg
2025-01-04 10:11:41 +02:00
7 changed files with 186 additions and 1 deletions

1
Cargo.lock generated
View File

@@ -1105,6 +1105,7 @@ dependencies = [
"anyhow",
"clap",
"cli-table",
"csv",
"ctrlc",
"dirs",
"env_logger 0.10.2",

View File

@@ -27,3 +27,4 @@ env_logger = "0.10.1"
limbo_core = { path = "../core" }
rustyline = "12.0.0"
ctrlc = "3.4.4"
csv = "1.3.1"

View File

@@ -1,4 +1,7 @@
use crate::opcodes_dictionary::OPCODE_DESCRIPTIONS;
use crate::{
import::{ImportFile, IMPORT_HELP},
opcodes_dictionary::OPCODE_DESCRIPTIONS,
};
use cli_table::{Cell, Table};
use limbo_core::{Database, LimboError, StepResult, Value};
@@ -78,6 +81,8 @@ pub enum Command {
Echo,
/// Display tables
Tables,
/// Import data from FILE into TABLE
Import,
}
impl Command {
@@ -91,6 +96,7 @@ impl Command {
| Self::Tables
| Self::SetOutput => 0,
Self::Open | Self::OutputMode | Self::Cwd | Self::Echo | Self::NullValue => 1,
Self::Import => 2,
} + 1) // argv0
}
@@ -108,6 +114,7 @@ impl Command {
Self::NullValue => ".nullvalue <string>",
Self::Echo => ".echo on|off",
Self::Tables => ".tables",
Self::Import => &IMPORT_HELP,
}
}
}
@@ -128,6 +135,7 @@ impl FromStr for Command {
".show" => Ok(Self::ShowInfo),
".nullvalue" => Ok(Self::NullValue),
".echo" => Ok(Self::Echo),
".import" => Ok(Self::Import),
_ => Err("Unknown command".to_string()),
}
}
@@ -476,6 +484,13 @@ impl Limbo {
Command::Help => {
let _ = self.writeln(HELP_MSG);
}
Command::Import => {
let mut import_file =
ImportFile::new(self.conn.clone(), self.io.clone(), &mut self.writer);
if let Err(e) = import_file.import(&args) {
let _ = self.writeln(e.to_string());
};
}
}
} else {
let _ = self.write_fmt(format_args!(
@@ -744,6 +759,7 @@ Special Commands:
.cd <directory> Change the current working directory.
.nullvalue <string> Set the value to be displayed for null values.
.echo on|off Toggle echo mode to repeat commands before execution.
.import --csv FILE TABLE Import csv data from FILE into TABLE
.help Display this help message.
Usage Examples:
@@ -775,6 +791,9 @@ Usage Examples:
9. Show the current values of settings:
.show
10. To import csv file 'sample.csv' into 'csv_table' table:
.import --csv sample.csv csv_table
Note:
- All SQL commands must end with a semicolon (;).
- Special commands do not require a semicolon."#;

134
cli/import.rs Normal file
View File

@@ -0,0 +1,134 @@
use anyhow::Error;
use clap::Parser;
use limbo_core::Connection;
use std::{
fs::File,
io::Write,
path::PathBuf,
rc::Rc,
sync::{Arc, LazyLock},
};
pub static IMPORT_HELP: LazyLock<String> = LazyLock::new(|| {
let empty: [&'static str; 2] = [".import", "--help"];
let opts = ImportArgs::try_parse_from(empty);
opts.map_err(|e| e.to_string()).unwrap_err()
});
#[derive(Debug, Parser)]
#[command(name = ".import")]
pub struct ImportArgs {
/// Use , and \n as column and row separators
#[arg(long, default_value = "true")]
csv: bool,
/// "Verbose" - increase auxiliary output
#[arg(short, default_value = "false")]
verbose: bool,
/// Skip the first N rows of input
#[arg(long, default_value = "0")]
skip: u64,
file: PathBuf,
table: String,
}
pub struct ImportFile<'a> {
conn: Rc<Connection>,
io: Arc<dyn limbo_core::IO>,
writer: &'a mut dyn Write,
}
impl<'a> ImportFile<'a> {
pub fn new(
conn: Rc<Connection>,
io: Arc<dyn limbo_core::IO>,
writer: &'a mut dyn Write,
) -> Self {
Self { conn, io, writer }
}
pub fn import(&mut self, args: &[&str]) -> Result<(), Error> {
let import_args = ImportArgs::try_parse_from(args.iter());
match import_args {
Ok(args) => {
self.import_csv(args);
Ok(())
}
Err(err) => Err(anyhow::anyhow!(err.to_string())),
}
}
pub fn import_csv(&mut self, args: ImportArgs) {
let file = match File::open(args.file) {
Ok(file) => file,
Err(e) => {
let _ = self.writer.write_all(format!("{:?}\n", e).as_bytes());
return;
}
};
let mut rdr = csv::ReaderBuilder::new()
.has_headers(false)
.from_reader(file);
let mut success_rows = 0u64;
let mut failed_rows = 0u64;
for result in rdr.records().skip(args.skip as usize) {
let record = result.unwrap();
if !record.is_empty() {
let mut values_string = String::new();
for r in record.iter() {
values_string.push('\'');
values_string.push_str(r);
values_string.push_str("',");
}
// remove the last comma after last element
values_string.pop();
let insert_string =
format!("INSERT INTO {} VALUES ({});", args.table, values_string);
match self.conn.query(insert_string) {
Ok(rows) => {
if let Some(mut rows) = rows {
while let Ok(x) = rows.next_row() {
match x {
limbo_core::StepResult::IO => {
self.io.run_once().unwrap();
}
limbo_core::StepResult::Done => break,
limbo_core::StepResult::Interrupt => break,
limbo_core::StepResult::Busy => {
let _ =
self.writer.write_all("database is busy\n".as_bytes());
break;
}
limbo_core::StepResult::Row(_) => todo!(),
}
}
}
success_rows += 1;
}
Err(_err) => {
failed_rows += 1;
}
}
}
}
if args.verbose {
let _ = self.writer.write_all(
format!(
"Added {} rows with {} errors using {} lines of input\n",
success_rows,
failed_rows,
success_rows + failed_rows
)
.as_bytes(),
);
}
}
}

View File

@@ -1,5 +1,6 @@
#![allow(clippy::arc_with_non_send_sync)]
mod app;
mod import;
mod opcodes_dictionary;
use rustyline::{error::ReadlineError, DefaultEditor};

View File

@@ -200,6 +200,33 @@ do_execshell_test(
do_execshell_test(pipe, "test-verify-null-value", "select NULL;", "LIMBO")
# test import csv
csv_file = "./test_files/test.csv"
write_to_pipe(".open :memory:")
def test_import_csv(test_name: str, options: str, import_output: str, table_output: str):
csv_table_name = f'csv_table_{test_name}'
write_to_pipe(f"CREATE TABLE {csv_table_name} (c1 INT, c2 REAL, c3 String);")
do_execshell_test(
pipe,
f"test-import-csv-{test_name}",
f".import {options} {csv_file} {csv_table_name}",
import_output,
)
do_execshell_test(
pipe,
f"test-import-csv-{test_name}-output",
f"select * from {csv_table_name};",
table_output,
)
test_import_csv('no_options', '--csv', '', '1|2.0|String1\n3|4.0|String2')
test_import_csv('verbose', '--csv -v',
'Added 2 rows with 0 errors using 2 lines of input'
,'1|2.0|String1\n3|4.0|String2')
test_import_csv('skip', '--csv --skip 1', '' ,'3|4.0|String2')
# Verify the output file exists and contains expected content
filepath = os.path.join(cwd, dir, outfile)

View File

@@ -0,0 +1,2 @@
1,2.0,"String1"
3,4.0,"String2"
1 1 2.0 String1
2 3 4.0 String2