From 1f21cf6a71eaccf5347982bebc87306546cf3aa5 Mon Sep 17 00:00:00 2001 From: psvri Date: Wed, 1 Jan 2025 15:53:53 +0530 Subject: [PATCH 1/2] Feat: Import csv support --- Cargo.lock | 1 + cli/Cargo.toml | 1 + cli/app.rs | 17 ++++- cli/import.rs | 134 ++++++++++++++++++++++++++++++++++++ cli/main.rs | 1 + testing/shelltests.py | 27 ++++++++ testing/test_files/test.csv | 2 + 7 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 cli/import.rs create mode 100644 testing/test_files/test.csv diff --git a/Cargo.lock b/Cargo.lock index e3370cd59..fa685398a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1105,6 +1105,7 @@ dependencies = [ "anyhow", "clap", "cli-table", + "csv", "ctrlc", "dirs", "env_logger 0.10.2", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 3e4512eda..076f9e775 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -27,3 +27,4 @@ env_logger = "0.10.1" limbo_core = { path = "../core" } rustyline = "12.0.0" ctrlc = "3.4.4" +csv = "1.3.1" diff --git a/cli/app.rs b/cli/app.rs index 56f5007ef..1cea07594 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -1,4 +1,7 @@ -use crate::opcodes_dictionary::OPCODE_DESCRIPTIONS; +use crate::{ + import::{ImportFile, IMPORT_HELP}, + opcodes_dictionary::OPCODE_DESCRIPTIONS, +}; use cli_table::{Cell, Table}; use limbo_core::{Database, LimboError, StepResult, Value}; @@ -78,6 +81,8 @@ pub enum Command { Echo, /// Display tables Tables, + /// Import data from FILE into TABLE + Import, } impl Command { @@ -91,6 +96,7 @@ impl Command { | Self::Tables | Self::SetOutput => 0, Self::Open | Self::OutputMode | Self::Cwd | Self::Echo | Self::NullValue => 1, + Self::Import => 2, } + 1) // argv0 } @@ -108,6 +114,7 @@ impl Command { Self::NullValue => ".nullvalue ", Self::Echo => ".echo on|off", Self::Tables => ".tables", + Self::Import => &IMPORT_HELP, } } } @@ -128,6 +135,7 @@ impl FromStr for Command { ".show" => Ok(Self::ShowInfo), ".nullvalue" => Ok(Self::NullValue), ".echo" => Ok(Self::Echo), + ".import" => Ok(Self::Import), _ => Err("Unknown command".to_string()), } } @@ -476,6 +484,13 @@ impl Limbo { Command::Help => { let _ = self.writeln(HELP_MSG); } + Command::Import => { + let mut import_file = + ImportFile::new(self.conn.clone(), self.io.clone(), &mut self.writer); + if let Err(e) = import_file.import(&args) { + let _ = self.writeln(e.to_string()); + }; + } } } else { let _ = self.write_fmt(format_args!( diff --git a/cli/import.rs b/cli/import.rs new file mode 100644 index 000000000..f996723f9 --- /dev/null +++ b/cli/import.rs @@ -0,0 +1,134 @@ +use anyhow::Error; +use clap::Parser; +use limbo_core::Connection; +use std::{ + fs::File, + io::Write, + path::PathBuf, + rc::Rc, + sync::{Arc, LazyLock}, +}; + +pub static IMPORT_HELP: LazyLock = LazyLock::new(|| { + let empty: [&'static str; 2] = [".import", "--help"]; + let opts = ImportArgs::try_parse_from(empty); + opts.map_err(|e| e.to_string()).unwrap_err() +}); + +#[derive(Debug, Parser)] +#[command(name = ".import")] +pub struct ImportArgs { + /// Use , and \n as column and row separators + #[arg(long, default_value = "true")] + csv: bool, + /// "Verbose" - increase auxiliary output + #[arg(short, default_value = "false")] + verbose: bool, + /// Skip the first N rows of input + #[arg(long, default_value = "0")] + skip: u64, + file: PathBuf, + table: String, +} + +pub struct ImportFile<'a> { + conn: Rc, + io: Arc, + writer: &'a mut dyn Write, +} + +impl<'a> ImportFile<'a> { + pub fn new( + conn: Rc, + io: Arc, + writer: &'a mut dyn Write, + ) -> Self { + Self { conn, io, writer } + } + + pub fn import(&mut self, args: &[&str]) -> Result<(), Error> { + let import_args = ImportArgs::try_parse_from(args.iter()); + match import_args { + Ok(args) => { + self.import_csv(args); + Ok(()) + } + Err(err) => Err(anyhow::anyhow!(err.to_string())), + } + } + + pub fn import_csv(&mut self, args: ImportArgs) { + let file = match File::open(args.file) { + Ok(file) => file, + Err(e) => { + let _ = self.writer.write_all(format!("{:?}\n", e).as_bytes()); + return; + } + }; + + let mut rdr = csv::ReaderBuilder::new() + .has_headers(false) + .from_reader(file); + + let mut success_rows = 0u64; + let mut failed_rows = 0u64; + + for result in rdr.records().skip(args.skip as usize) { + let record = result.unwrap(); + + if !record.is_empty() { + let mut values_string = String::new(); + + for r in record.iter() { + values_string.push('\''); + values_string.push_str(r); + values_string.push_str("',"); + } + + // remove the last comma after last element + values_string.pop(); + + let insert_string = + format!("INSERT INTO {} VALUES ({});", args.table, values_string); + + match self.conn.query(insert_string) { + Ok(rows) => { + if let Some(mut rows) = rows { + while let Ok(x) = rows.next_row() { + match x { + limbo_core::StepResult::IO => { + self.io.run_once().unwrap(); + } + limbo_core::StepResult::Done => break, + limbo_core::StepResult::Interrupt => break, + limbo_core::StepResult::Busy => { + let _ = + self.writer.write_all("database is busy\n".as_bytes()); + break; + } + limbo_core::StepResult::Row(_) => todo!(), + } + } + } + success_rows += 1; + } + Err(_err) => { + failed_rows += 1; + } + } + } + } + + if args.verbose { + let _ = self.writer.write_all( + format!( + "Added {} rows with {} errors using {} lines of input\n", + success_rows, + failed_rows, + success_rows + failed_rows + ) + .as_bytes(), + ); + } + } +} diff --git a/cli/main.rs b/cli/main.rs index 8af57e2ca..df4aa9787 100644 --- a/cli/main.rs +++ b/cli/main.rs @@ -1,5 +1,6 @@ #![allow(clippy::arc_with_non_send_sync)] mod app; +mod import; mod opcodes_dictionary; use rustyline::{error::ReadlineError, DefaultEditor}; diff --git a/testing/shelltests.py b/testing/shelltests.py index 6b1dab121..0eaa1a62c 100755 --- a/testing/shelltests.py +++ b/testing/shelltests.py @@ -200,6 +200,33 @@ do_execshell_test( do_execshell_test(pipe, "test-verify-null-value", "select NULL;", "LIMBO") +# test import csv +csv_file = "./test_files/test.csv" +write_to_pipe(".open :memory:") + + +def test_import_csv(test_name: str, options: str, import_output: str, table_output: str): + csv_table_name = f'csv_table_{test_name}' + write_to_pipe(f"CREATE TABLE {csv_table_name} (c1 INT, c2 REAL, c3 String);") + do_execshell_test( + pipe, + f"test-import-csv-{test_name}", + f".import {options} {csv_file} {csv_table_name}", + import_output, + ) + do_execshell_test( + pipe, + f"test-import-csv-{test_name}-output", + f"select * from {csv_table_name};", + table_output, + ) + +test_import_csv('no_options', '--csv', '', '1|2.0|String1\n3|4.0|String2') +test_import_csv('verbose', '--csv -v', + 'Added 2 rows with 0 errors using 2 lines of input' + ,'1|2.0|String1\n3|4.0|String2') +test_import_csv('skip', '--csv --skip 1', '' ,'3|4.0|String2') + # Verify the output file exists and contains expected content filepath = os.path.join(cwd, dir, outfile) diff --git a/testing/test_files/test.csv b/testing/test_files/test.csv new file mode 100644 index 000000000..34a2e6cd7 --- /dev/null +++ b/testing/test_files/test.csv @@ -0,0 +1,2 @@ +1,2.0,"String1" +3,4.0,"String2" \ No newline at end of file From 18a1055088f0fe3537d714c336dfb24f18e5ed96 Mon Sep 17 00:00:00 2001 From: psvri Date: Fri, 3 Jan 2025 19:40:35 +0530 Subject: [PATCH 2/2] Edit help message --- cli/app.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cli/app.rs b/cli/app.rs index 1cea07594..6e3ad4357 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -759,6 +759,7 @@ Special Commands: .cd Change the current working directory. .nullvalue Set the value to be displayed for null values. .echo on|off Toggle echo mode to repeat commands before execution. +.import --csv FILE TABLE Import csv data from FILE into TABLE .help Display this help message. Usage Examples: @@ -790,6 +791,9 @@ Usage Examples: 9. Show the current values of settings: .show +10. To import csv file 'sample.csv' into 'csv_table' table: + .import --csv sample.csv csv_table + Note: - All SQL commands must end with a semicolon (;). - Special commands do not require a semicolon."#;