Working local vector search

This commit is contained in:
SilasMarvin
2024-07-03 20:46:43 -07:00
parent b035b8f769
commit c7182fffe7
3 changed files with 34 additions and 34 deletions

View File

@@ -1,7 +1,7 @@
use anyhow::Context; use anyhow::Context;
use indexmap::IndexSet; use indexmap::IndexSet;
use lsp_types::TextDocumentPositionParams; use lsp_types::TextDocumentPositionParams;
use parking_lot::Mutex; use parking_lot::{Mutex, RwLock};
use ropey::Rope; use ropey::Rope;
use serde_json::Value; use serde_json::Value;
use std::{collections::HashMap, io::Read}; use std::{collections::HashMap, io::Read};
@@ -49,7 +49,7 @@ impl File {
pub(crate) struct FileStore { pub(crate) struct FileStore {
params: AdditionalFileStoreParams, params: AdditionalFileStoreParams,
file_map: Mutex<HashMap<String, File>>, file_map: RwLock<HashMap<String, File>>,
accessed_files: Mutex<IndexSet<String>>, accessed_files: Mutex<IndexSet<String>>,
crawl: Option<Mutex<Crawl>>, crawl: Option<Mutex<Crawl>>,
} }
@@ -65,7 +65,7 @@ impl FileStore {
.map(|x| Mutex::new(Crawl::new(x, config.clone()))); .map(|x| Mutex::new(Crawl::new(x, config.clone())));
let s = Self { let s = Self {
params: AdditionalFileStoreParams::default(), params: AdditionalFileStoreParams::default(),
file_map: Mutex::new(HashMap::new()), file_map: RwLock::new(HashMap::new()),
accessed_files: Mutex::new(IndexSet::new()), accessed_files: Mutex::new(IndexSet::new()),
crawl, crawl,
}; };
@@ -86,7 +86,7 @@ impl FileStore {
.map(|x| Mutex::new(Crawl::new(x, config.clone()))); .map(|x| Mutex::new(Crawl::new(x, config.clone())));
let s = Self { let s = Self {
params, params,
file_map: Mutex::new(HashMap::new()), file_map: RwLock::new(HashMap::new()),
accessed_files: Mutex::new(IndexSet::new()), accessed_files: Mutex::new(IndexSet::new()),
crawl, crawl,
}; };
@@ -111,7 +111,7 @@ impl FileStore {
None None
}; };
self.file_map self.file_map
.lock() .write()
.insert(uri.to_string(), File::new(Rope::from_str(&contents), tree)); .insert(uri.to_string(), File::new(Rope::from_str(&contents), tree));
self.accessed_files.lock().insert(uri.to_string()); self.accessed_files.lock().insert(uri.to_string());
} }
@@ -130,7 +130,7 @@ impl FileStore {
} }
// This means it has been opened before // This means it has been opened before
let insert_uri = format!("file:///{path}"); let insert_uri = format!("file:///{path}");
if self.file_map.lock().contains_key(&insert_uri) { if self.file_map.read().contains_key(&insert_uri) {
return Ok(true); return Ok(true);
} }
// Open the file and see if it is small enough to read // Open the file and see if it is small enough to read
@@ -163,7 +163,7 @@ impl FileStore {
let current_document_uri = position.text_document.uri.to_string(); let current_document_uri = position.text_document.uri.to_string();
let mut rope = self let mut rope = self
.file_map .file_map
.lock() .read()
.get(&current_document_uri) .get(&current_document_uri)
.context("Error file not found")? .context("Error file not found")?
.rope .rope
@@ -181,7 +181,7 @@ impl FileStore {
if needed == 0 || !pull_from_multiple_files { if needed == 0 || !pull_from_multiple_files {
break; break;
} }
let file_map = self.file_map.lock(); let file_map = self.file_map.read();
let r = &file_map.get(file).context("Error file not found")?.rope; let r = &file_map.get(file).context("Error file not found")?.rope;
let slice_max = needed.min(r.len_chars() + 1); let slice_max = needed.min(r.len_chars() + 1);
let rope_str_slice = r let rope_str_slice = r
@@ -202,7 +202,7 @@ impl FileStore {
) -> anyhow::Result<String> { ) -> anyhow::Result<String> {
let rope = self let rope = self
.file_map .file_map
.lock() .read()
.get(position.text_document.uri.as_str()) .get(position.text_document.uri.as_str())
.context("Error file not found")? .context("Error file not found")?
.rope .rope
@@ -275,19 +275,16 @@ impl FileStore {
}) })
} }
pub(crate) fn file_map(&self) -> &Mutex<HashMap<String, File>> { pub fn file_map(&self) -> &RwLock<HashMap<String, File>> {
&self.file_map &self.file_map
} }
pub(crate) fn contains_file(&self, uri: &str) -> bool { pub fn contains_file(&self, uri: &str) -> bool {
self.file_map.lock().contains_key(uri) self.file_map.read().contains_key(uri)
} }
pub(crate) fn position_to_byte( pub fn position_to_byte(&self, position: &TextDocumentPositionParams) -> anyhow::Result<usize> {
&self, let file_map = self.file_map.read();
position: &TextDocumentPositionParams,
) -> anyhow::Result<usize> {
let file_map = self.file_map.lock();
let uri = position.text_document.uri.to_string(); let uri = position.text_document.uri.to_string();
let file = file_map let file = file_map
.get(&uri) .get(&uri)
@@ -307,7 +304,7 @@ impl MemoryBackend for FileStore {
fn get_filter_text(&self, position: &TextDocumentPositionParams) -> anyhow::Result<String> { fn get_filter_text(&self, position: &TextDocumentPositionParams) -> anyhow::Result<String> {
let rope = self let rope = self
.file_map .file_map
.lock() .read()
.get(position.text_document.uri.as_str()) .get(position.text_document.uri.as_str())
.context("Error file not found")? .context("Error file not found")?
.rope .rope
@@ -351,7 +348,7 @@ impl MemoryBackend for FileStore {
params: lsp_types::DidChangeTextDocumentParams, params: lsp_types::DidChangeTextDocumentParams,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
let uri = params.text_document.uri.to_string(); let uri = params.text_document.uri.to_string();
let mut file_map = self.file_map.lock(); let mut file_map = self.file_map.write();
let file = file_map let file = file_map
.get_mut(&uri) .get_mut(&uri)
.with_context(|| format!("Trying to get file that does not exist {uri}"))?; .with_context(|| format!("Trying to get file that does not exist {uri}"))?;
@@ -450,7 +447,7 @@ impl MemoryBackend for FileStore {
#[instrument(skip(self))] #[instrument(skip(self))]
fn renamed_files(&self, params: lsp_types::RenameFilesParams) -> anyhow::Result<()> { fn renamed_files(&self, params: lsp_types::RenameFilesParams) -> anyhow::Result<()> {
for file_rename in params.files { for file_rename in params.files {
let mut file_map = self.file_map.lock(); let mut file_map = self.file_map.write();
if let Some(rope) = file_map.remove(&file_rename.old_uri) { if let Some(rope) = file_map.remove(&file_rename.old_uri) {
file_map.insert(file_rename.new_uri, rope); file_map.insert(file_rename.new_uri, rope);
} }
@@ -537,7 +534,7 @@ mod tests {
file_store.opened_text_document(params)?; file_store.opened_text_document(params)?;
let file = file_store let file = file_store
.file_map .file_map
.lock() .read()
.get("file:///filler/") .get("file:///filler/")
.unwrap() .unwrap()
.clone(); .clone();
@@ -563,7 +560,7 @@ mod tests {
let file = file_store let file = file_store
.file_map .file_map
.lock() .read()
.get("file:///filler2/") .get("file:///filler2/")
.unwrap() .unwrap()
.clone(); .clone();
@@ -604,7 +601,7 @@ mod tests {
file_store.changed_text_document(params)?; file_store.changed_text_document(params)?;
let file = file_store let file = file_store
.file_map .file_map
.lock() .read()
.get("file:///filler/") .get("file:///filler/")
.unwrap() .unwrap()
.clone(); .clone();
@@ -624,7 +621,7 @@ mod tests {
file_store.changed_text_document(params)?; file_store.changed_text_document(params)?;
let file = file_store let file = file_store
.file_map .file_map
.lock() .read()
.get("file:///filler/") .get("file:///filler/")
.unwrap() .unwrap()
.clone(); .clone();
@@ -881,7 +878,7 @@ fn main() {
}], }],
}; };
file_store.changed_text_document(params)?; file_store.changed_text_document(params)?;
let file = file_store.file_map.lock().get(uri).unwrap().clone(); let file = file_store.file_map.read().get(uri).unwrap().clone();
assert_eq!(file.tree.unwrap().root_node().to_sexp(), "(source_file (attribute_item (attribute (identifier) arguments: (token_tree (identifier)))) (struct_item name: (type_identifier) body: (field_declaration_list (field_declaration name: (field_identifier) type: (primitive_type)) (field_declaration name: (field_identifier) type: (primitive_type)))) (impl_item type: (type_identifier) body: (declaration_list (function_item name: (identifier) parameters: (parameters (self_parameter (self))) return_type: (primitive_type) body: (block (binary_expression left: (field_expression value: (self) field: (field_identifier)) right: (field_expression value: (self) field: (field_identifier))))))) (function_item name: (identifier) parameters: (parameters) body: (block (let_declaration pattern: (identifier) value: (struct_expression name: (type_identifier) body: (field_initializer_list (field_initializer field: (field_identifier) value: (integer_literal)) (field_initializer field: (field_identifier) value: (integer_literal))))) (expression_statement (macro_invocation macro: (identifier) (token_tree (string_literal (string_content)) (identifier) (identifier) (token_tree)))))))"); assert_eq!(file.tree.unwrap().root_node().to_sexp(), "(source_file (attribute_item (attribute (identifier) arguments: (token_tree (identifier)))) (struct_item name: (type_identifier) body: (field_declaration_list (field_declaration name: (field_identifier) type: (primitive_type)) (field_declaration name: (field_identifier) type: (primitive_type)))) (impl_item type: (type_identifier) body: (declaration_list (function_item name: (identifier) parameters: (parameters (self_parameter (self))) return_type: (primitive_type) body: (block (binary_expression left: (field_expression value: (self) field: (field_identifier)) right: (field_expression value: (self) field: (field_identifier))))))) (function_item name: (identifier) parameters: (parameters) body: (block (let_declaration pattern: (identifier) value: (struct_expression name: (type_identifier) body: (field_initializer_list (field_initializer field: (field_identifier) value: (integer_literal)) (field_initializer field: (field_identifier) value: (integer_literal))))) (expression_statement (macro_invocation macro: (identifier) (token_tree (string_literal (string_content)) (identifier) (identifier) (token_tree)))))))");
// Test delete // Test delete
@@ -906,7 +903,7 @@ fn main() {
}], }],
}; };
file_store.changed_text_document(params)?; file_store.changed_text_document(params)?;
let file = file_store.file_map.lock().get(uri).unwrap().clone(); let file = file_store.file_map.read().get(uri).unwrap().clone();
assert_eq!(file.tree.unwrap().root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block (let_declaration pattern: (identifier) value: (struct_expression name: (type_identifier) body: (field_initializer_list (field_initializer field: (field_identifier) value: (integer_literal)) (field_initializer field: (field_identifier) value: (integer_literal))))) (expression_statement (macro_invocation macro: (identifier) (token_tree (string_literal (string_content)) (identifier) (identifier) (token_tree)))))))"); assert_eq!(file.tree.unwrap().root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block (let_declaration pattern: (identifier) value: (struct_expression name: (type_identifier) body: (field_initializer_list (field_initializer field: (field_identifier) value: (integer_literal)) (field_initializer field: (field_identifier) value: (integer_literal))))) (expression_statement (macro_invocation macro: (identifier) (token_tree (string_literal (string_content)) (identifier) (identifier) (token_tree)))))))");
// Test replace // Test replace
@@ -922,7 +919,7 @@ fn main() {
}], }],
}; };
file_store.changed_text_document(params)?; file_store.changed_text_document(params)?;
let file = file_store.file_map.lock().get(uri).unwrap().clone(); let file = file_store.file_map.read().get(uri).unwrap().clone();
assert_eq!(file.tree.unwrap().root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))"); assert_eq!(file.tree.unwrap().root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))");
Ok(()) Ok(())

View File

@@ -50,7 +50,7 @@ async fn split_and_upsert_file(
let chunks = { let chunks = {
file_store file_store
.file_map() .file_map()
.lock() .read()
.get(uri) .get(uri)
.map(|f| splitter.split(f)) .map(|f| splitter.split(f))
}; };
@@ -184,7 +184,7 @@ impl PostgresML {
let chunks: Vec<Vec<Chunk>> = match file_uris let chunks: Vec<Vec<Chunk>> = match file_uris
.iter() .iter()
.map(|uri| { .map(|uri| {
let file_store = task_file_store.file_map().lock(); let file_store = task_file_store.file_map().read();
let file = file_store let file = file_store
.get(uri) .get(uri)
.with_context(|| format!("getting file for splitting: {uri}"))?; .with_context(|| format!("getting file for splitting: {uri}"))?;

View File

@@ -226,7 +226,8 @@ impl VS {
if acc.is_empty() { if acc.is_empty() {
acc.insert(score, chunk); acc.insert(score, chunk);
} else if acc.first_key_value().unwrap().0 < &score { } else if acc.first_key_value().unwrap().0 < &score {
if acc.len() == limit { // We want to get limit + 1 here in case the limit is 1 and then we filter the chunk out later
if acc.len() == limit + 1 {
acc.pop_first(); acc.pop_first();
} }
acc.insert(score, chunk); acc.insert(score, chunk);
@@ -395,7 +396,7 @@ impl MemoryBackend for VectorStore {
let uri = params.text_document.uri.to_string(); let uri = params.text_document.uri.to_string();
self.file_store.opened_text_document(params)?; self.file_store.opened_text_document(params)?;
let file_map = self.file_store.file_map().lock(); let file_map = self.file_store.file_map().read();
let file = file_map.get(&uri).context("file not found")?; let file = file_map.get(&uri).context("file not found")?;
let chunks = self.splitter.split(file); let chunks = self.splitter.split(file);
self.upsert_chunks(&uri, chunks); self.upsert_chunks(&uri, chunks);
@@ -411,9 +412,11 @@ impl MemoryBackend for VectorStore {
let uri = params.text_document.uri.to_string(); let uri = params.text_document.uri.to_string();
self.file_store.changed_text_document(params.clone())?; self.file_store.changed_text_document(params.clone())?;
let file_map = self.file_store.file_map().lock(); let chunks = {
let file = file_map.get(&uri).context("file not found")?; let file_map = self.file_store.file_map().read();
let chunks = self.splitter.split(file); let file = file_map.get(&uri).context("file not found")?;
self.splitter.split(file)
};
let chunks_size = chunks.len(); let chunks_size = chunks.len();
// This is not as efficient as it could be, but it is ok for now // This is not as efficient as it could be, but it is ok for now