mirror of
https://github.com/SilasMarvin/lsp-ai.git
synced 2025-12-18 15:04:29 +01:00
Working local vector search
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use indexmap::IndexSet;
|
use indexmap::IndexSet;
|
||||||
use lsp_types::TextDocumentPositionParams;
|
use lsp_types::TextDocumentPositionParams;
|
||||||
use parking_lot::Mutex;
|
use parking_lot::{Mutex, RwLock};
|
||||||
use ropey::Rope;
|
use ropey::Rope;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use std::{collections::HashMap, io::Read};
|
use std::{collections::HashMap, io::Read};
|
||||||
@@ -49,7 +49,7 @@ impl File {
|
|||||||
|
|
||||||
pub(crate) struct FileStore {
|
pub(crate) struct FileStore {
|
||||||
params: AdditionalFileStoreParams,
|
params: AdditionalFileStoreParams,
|
||||||
file_map: Mutex<HashMap<String, File>>,
|
file_map: RwLock<HashMap<String, File>>,
|
||||||
accessed_files: Mutex<IndexSet<String>>,
|
accessed_files: Mutex<IndexSet<String>>,
|
||||||
crawl: Option<Mutex<Crawl>>,
|
crawl: Option<Mutex<Crawl>>,
|
||||||
}
|
}
|
||||||
@@ -65,7 +65,7 @@ impl FileStore {
|
|||||||
.map(|x| Mutex::new(Crawl::new(x, config.clone())));
|
.map(|x| Mutex::new(Crawl::new(x, config.clone())));
|
||||||
let s = Self {
|
let s = Self {
|
||||||
params: AdditionalFileStoreParams::default(),
|
params: AdditionalFileStoreParams::default(),
|
||||||
file_map: Mutex::new(HashMap::new()),
|
file_map: RwLock::new(HashMap::new()),
|
||||||
accessed_files: Mutex::new(IndexSet::new()),
|
accessed_files: Mutex::new(IndexSet::new()),
|
||||||
crawl,
|
crawl,
|
||||||
};
|
};
|
||||||
@@ -86,7 +86,7 @@ impl FileStore {
|
|||||||
.map(|x| Mutex::new(Crawl::new(x, config.clone())));
|
.map(|x| Mutex::new(Crawl::new(x, config.clone())));
|
||||||
let s = Self {
|
let s = Self {
|
||||||
params,
|
params,
|
||||||
file_map: Mutex::new(HashMap::new()),
|
file_map: RwLock::new(HashMap::new()),
|
||||||
accessed_files: Mutex::new(IndexSet::new()),
|
accessed_files: Mutex::new(IndexSet::new()),
|
||||||
crawl,
|
crawl,
|
||||||
};
|
};
|
||||||
@@ -111,7 +111,7 @@ impl FileStore {
|
|||||||
None
|
None
|
||||||
};
|
};
|
||||||
self.file_map
|
self.file_map
|
||||||
.lock()
|
.write()
|
||||||
.insert(uri.to_string(), File::new(Rope::from_str(&contents), tree));
|
.insert(uri.to_string(), File::new(Rope::from_str(&contents), tree));
|
||||||
self.accessed_files.lock().insert(uri.to_string());
|
self.accessed_files.lock().insert(uri.to_string());
|
||||||
}
|
}
|
||||||
@@ -130,7 +130,7 @@ impl FileStore {
|
|||||||
}
|
}
|
||||||
// This means it has been opened before
|
// This means it has been opened before
|
||||||
let insert_uri = format!("file:///{path}");
|
let insert_uri = format!("file:///{path}");
|
||||||
if self.file_map.lock().contains_key(&insert_uri) {
|
if self.file_map.read().contains_key(&insert_uri) {
|
||||||
return Ok(true);
|
return Ok(true);
|
||||||
}
|
}
|
||||||
// Open the file and see if it is small enough to read
|
// Open the file and see if it is small enough to read
|
||||||
@@ -163,7 +163,7 @@ impl FileStore {
|
|||||||
let current_document_uri = position.text_document.uri.to_string();
|
let current_document_uri = position.text_document.uri.to_string();
|
||||||
let mut rope = self
|
let mut rope = self
|
||||||
.file_map
|
.file_map
|
||||||
.lock()
|
.read()
|
||||||
.get(¤t_document_uri)
|
.get(¤t_document_uri)
|
||||||
.context("Error file not found")?
|
.context("Error file not found")?
|
||||||
.rope
|
.rope
|
||||||
@@ -181,7 +181,7 @@ impl FileStore {
|
|||||||
if needed == 0 || !pull_from_multiple_files {
|
if needed == 0 || !pull_from_multiple_files {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
let file_map = self.file_map.lock();
|
let file_map = self.file_map.read();
|
||||||
let r = &file_map.get(file).context("Error file not found")?.rope;
|
let r = &file_map.get(file).context("Error file not found")?.rope;
|
||||||
let slice_max = needed.min(r.len_chars() + 1);
|
let slice_max = needed.min(r.len_chars() + 1);
|
||||||
let rope_str_slice = r
|
let rope_str_slice = r
|
||||||
@@ -202,7 +202,7 @@ impl FileStore {
|
|||||||
) -> anyhow::Result<String> {
|
) -> anyhow::Result<String> {
|
||||||
let rope = self
|
let rope = self
|
||||||
.file_map
|
.file_map
|
||||||
.lock()
|
.read()
|
||||||
.get(position.text_document.uri.as_str())
|
.get(position.text_document.uri.as_str())
|
||||||
.context("Error file not found")?
|
.context("Error file not found")?
|
||||||
.rope
|
.rope
|
||||||
@@ -275,19 +275,16 @@ impl FileStore {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn file_map(&self) -> &Mutex<HashMap<String, File>> {
|
pub fn file_map(&self) -> &RwLock<HashMap<String, File>> {
|
||||||
&self.file_map
|
&self.file_map
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn contains_file(&self, uri: &str) -> bool {
|
pub fn contains_file(&self, uri: &str) -> bool {
|
||||||
self.file_map.lock().contains_key(uri)
|
self.file_map.read().contains_key(uri)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn position_to_byte(
|
pub fn position_to_byte(&self, position: &TextDocumentPositionParams) -> anyhow::Result<usize> {
|
||||||
&self,
|
let file_map = self.file_map.read();
|
||||||
position: &TextDocumentPositionParams,
|
|
||||||
) -> anyhow::Result<usize> {
|
|
||||||
let file_map = self.file_map.lock();
|
|
||||||
let uri = position.text_document.uri.to_string();
|
let uri = position.text_document.uri.to_string();
|
||||||
let file = file_map
|
let file = file_map
|
||||||
.get(&uri)
|
.get(&uri)
|
||||||
@@ -307,7 +304,7 @@ impl MemoryBackend for FileStore {
|
|||||||
fn get_filter_text(&self, position: &TextDocumentPositionParams) -> anyhow::Result<String> {
|
fn get_filter_text(&self, position: &TextDocumentPositionParams) -> anyhow::Result<String> {
|
||||||
let rope = self
|
let rope = self
|
||||||
.file_map
|
.file_map
|
||||||
.lock()
|
.read()
|
||||||
.get(position.text_document.uri.as_str())
|
.get(position.text_document.uri.as_str())
|
||||||
.context("Error file not found")?
|
.context("Error file not found")?
|
||||||
.rope
|
.rope
|
||||||
@@ -351,7 +348,7 @@ impl MemoryBackend for FileStore {
|
|||||||
params: lsp_types::DidChangeTextDocumentParams,
|
params: lsp_types::DidChangeTextDocumentParams,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
let uri = params.text_document.uri.to_string();
|
let uri = params.text_document.uri.to_string();
|
||||||
let mut file_map = self.file_map.lock();
|
let mut file_map = self.file_map.write();
|
||||||
let file = file_map
|
let file = file_map
|
||||||
.get_mut(&uri)
|
.get_mut(&uri)
|
||||||
.with_context(|| format!("Trying to get file that does not exist {uri}"))?;
|
.with_context(|| format!("Trying to get file that does not exist {uri}"))?;
|
||||||
@@ -450,7 +447,7 @@ impl MemoryBackend for FileStore {
|
|||||||
#[instrument(skip(self))]
|
#[instrument(skip(self))]
|
||||||
fn renamed_files(&self, params: lsp_types::RenameFilesParams) -> anyhow::Result<()> {
|
fn renamed_files(&self, params: lsp_types::RenameFilesParams) -> anyhow::Result<()> {
|
||||||
for file_rename in params.files {
|
for file_rename in params.files {
|
||||||
let mut file_map = self.file_map.lock();
|
let mut file_map = self.file_map.write();
|
||||||
if let Some(rope) = file_map.remove(&file_rename.old_uri) {
|
if let Some(rope) = file_map.remove(&file_rename.old_uri) {
|
||||||
file_map.insert(file_rename.new_uri, rope);
|
file_map.insert(file_rename.new_uri, rope);
|
||||||
}
|
}
|
||||||
@@ -537,7 +534,7 @@ mod tests {
|
|||||||
file_store.opened_text_document(params)?;
|
file_store.opened_text_document(params)?;
|
||||||
let file = file_store
|
let file = file_store
|
||||||
.file_map
|
.file_map
|
||||||
.lock()
|
.read()
|
||||||
.get("file:///filler/")
|
.get("file:///filler/")
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.clone();
|
.clone();
|
||||||
@@ -563,7 +560,7 @@ mod tests {
|
|||||||
|
|
||||||
let file = file_store
|
let file = file_store
|
||||||
.file_map
|
.file_map
|
||||||
.lock()
|
.read()
|
||||||
.get("file:///filler2/")
|
.get("file:///filler2/")
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.clone();
|
.clone();
|
||||||
@@ -604,7 +601,7 @@ mod tests {
|
|||||||
file_store.changed_text_document(params)?;
|
file_store.changed_text_document(params)?;
|
||||||
let file = file_store
|
let file = file_store
|
||||||
.file_map
|
.file_map
|
||||||
.lock()
|
.read()
|
||||||
.get("file:///filler/")
|
.get("file:///filler/")
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.clone();
|
.clone();
|
||||||
@@ -624,7 +621,7 @@ mod tests {
|
|||||||
file_store.changed_text_document(params)?;
|
file_store.changed_text_document(params)?;
|
||||||
let file = file_store
|
let file = file_store
|
||||||
.file_map
|
.file_map
|
||||||
.lock()
|
.read()
|
||||||
.get("file:///filler/")
|
.get("file:///filler/")
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.clone();
|
.clone();
|
||||||
@@ -881,7 +878,7 @@ fn main() {
|
|||||||
}],
|
}],
|
||||||
};
|
};
|
||||||
file_store.changed_text_document(params)?;
|
file_store.changed_text_document(params)?;
|
||||||
let file = file_store.file_map.lock().get(uri).unwrap().clone();
|
let file = file_store.file_map.read().get(uri).unwrap().clone();
|
||||||
assert_eq!(file.tree.unwrap().root_node().to_sexp(), "(source_file (attribute_item (attribute (identifier) arguments: (token_tree (identifier)))) (struct_item name: (type_identifier) body: (field_declaration_list (field_declaration name: (field_identifier) type: (primitive_type)) (field_declaration name: (field_identifier) type: (primitive_type)))) (impl_item type: (type_identifier) body: (declaration_list (function_item name: (identifier) parameters: (parameters (self_parameter (self))) return_type: (primitive_type) body: (block (binary_expression left: (field_expression value: (self) field: (field_identifier)) right: (field_expression value: (self) field: (field_identifier))))))) (function_item name: (identifier) parameters: (parameters) body: (block (let_declaration pattern: (identifier) value: (struct_expression name: (type_identifier) body: (field_initializer_list (field_initializer field: (field_identifier) value: (integer_literal)) (field_initializer field: (field_identifier) value: (integer_literal))))) (expression_statement (macro_invocation macro: (identifier) (token_tree (string_literal (string_content)) (identifier) (identifier) (token_tree)))))))");
|
assert_eq!(file.tree.unwrap().root_node().to_sexp(), "(source_file (attribute_item (attribute (identifier) arguments: (token_tree (identifier)))) (struct_item name: (type_identifier) body: (field_declaration_list (field_declaration name: (field_identifier) type: (primitive_type)) (field_declaration name: (field_identifier) type: (primitive_type)))) (impl_item type: (type_identifier) body: (declaration_list (function_item name: (identifier) parameters: (parameters (self_parameter (self))) return_type: (primitive_type) body: (block (binary_expression left: (field_expression value: (self) field: (field_identifier)) right: (field_expression value: (self) field: (field_identifier))))))) (function_item name: (identifier) parameters: (parameters) body: (block (let_declaration pattern: (identifier) value: (struct_expression name: (type_identifier) body: (field_initializer_list (field_initializer field: (field_identifier) value: (integer_literal)) (field_initializer field: (field_identifier) value: (integer_literal))))) (expression_statement (macro_invocation macro: (identifier) (token_tree (string_literal (string_content)) (identifier) (identifier) (token_tree)))))))");
|
||||||
|
|
||||||
// Test delete
|
// Test delete
|
||||||
@@ -906,7 +903,7 @@ fn main() {
|
|||||||
}],
|
}],
|
||||||
};
|
};
|
||||||
file_store.changed_text_document(params)?;
|
file_store.changed_text_document(params)?;
|
||||||
let file = file_store.file_map.lock().get(uri).unwrap().clone();
|
let file = file_store.file_map.read().get(uri).unwrap().clone();
|
||||||
assert_eq!(file.tree.unwrap().root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block (let_declaration pattern: (identifier) value: (struct_expression name: (type_identifier) body: (field_initializer_list (field_initializer field: (field_identifier) value: (integer_literal)) (field_initializer field: (field_identifier) value: (integer_literal))))) (expression_statement (macro_invocation macro: (identifier) (token_tree (string_literal (string_content)) (identifier) (identifier) (token_tree)))))))");
|
assert_eq!(file.tree.unwrap().root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block (let_declaration pattern: (identifier) value: (struct_expression name: (type_identifier) body: (field_initializer_list (field_initializer field: (field_identifier) value: (integer_literal)) (field_initializer field: (field_identifier) value: (integer_literal))))) (expression_statement (macro_invocation macro: (identifier) (token_tree (string_literal (string_content)) (identifier) (identifier) (token_tree)))))))");
|
||||||
|
|
||||||
// Test replace
|
// Test replace
|
||||||
@@ -922,7 +919,7 @@ fn main() {
|
|||||||
}],
|
}],
|
||||||
};
|
};
|
||||||
file_store.changed_text_document(params)?;
|
file_store.changed_text_document(params)?;
|
||||||
let file = file_store.file_map.lock().get(uri).unwrap().clone();
|
let file = file_store.file_map.read().get(uri).unwrap().clone();
|
||||||
assert_eq!(file.tree.unwrap().root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))");
|
assert_eq!(file.tree.unwrap().root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))");
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -50,7 +50,7 @@ async fn split_and_upsert_file(
|
|||||||
let chunks = {
|
let chunks = {
|
||||||
file_store
|
file_store
|
||||||
.file_map()
|
.file_map()
|
||||||
.lock()
|
.read()
|
||||||
.get(uri)
|
.get(uri)
|
||||||
.map(|f| splitter.split(f))
|
.map(|f| splitter.split(f))
|
||||||
};
|
};
|
||||||
@@ -184,7 +184,7 @@ impl PostgresML {
|
|||||||
let chunks: Vec<Vec<Chunk>> = match file_uris
|
let chunks: Vec<Vec<Chunk>> = match file_uris
|
||||||
.iter()
|
.iter()
|
||||||
.map(|uri| {
|
.map(|uri| {
|
||||||
let file_store = task_file_store.file_map().lock();
|
let file_store = task_file_store.file_map().read();
|
||||||
let file = file_store
|
let file = file_store
|
||||||
.get(uri)
|
.get(uri)
|
||||||
.with_context(|| format!("getting file for splitting: {uri}"))?;
|
.with_context(|| format!("getting file for splitting: {uri}"))?;
|
||||||
|
|||||||
@@ -226,7 +226,8 @@ impl VS {
|
|||||||
if acc.is_empty() {
|
if acc.is_empty() {
|
||||||
acc.insert(score, chunk);
|
acc.insert(score, chunk);
|
||||||
} else if acc.first_key_value().unwrap().0 < &score {
|
} else if acc.first_key_value().unwrap().0 < &score {
|
||||||
if acc.len() == limit {
|
// We want to get limit + 1 here in case the limit is 1 and then we filter the chunk out later
|
||||||
|
if acc.len() == limit + 1 {
|
||||||
acc.pop_first();
|
acc.pop_first();
|
||||||
}
|
}
|
||||||
acc.insert(score, chunk);
|
acc.insert(score, chunk);
|
||||||
@@ -395,7 +396,7 @@ impl MemoryBackend for VectorStore {
|
|||||||
let uri = params.text_document.uri.to_string();
|
let uri = params.text_document.uri.to_string();
|
||||||
self.file_store.opened_text_document(params)?;
|
self.file_store.opened_text_document(params)?;
|
||||||
|
|
||||||
let file_map = self.file_store.file_map().lock();
|
let file_map = self.file_store.file_map().read();
|
||||||
let file = file_map.get(&uri).context("file not found")?;
|
let file = file_map.get(&uri).context("file not found")?;
|
||||||
let chunks = self.splitter.split(file);
|
let chunks = self.splitter.split(file);
|
||||||
self.upsert_chunks(&uri, chunks);
|
self.upsert_chunks(&uri, chunks);
|
||||||
@@ -411,9 +412,11 @@ impl MemoryBackend for VectorStore {
|
|||||||
let uri = params.text_document.uri.to_string();
|
let uri = params.text_document.uri.to_string();
|
||||||
self.file_store.changed_text_document(params.clone())?;
|
self.file_store.changed_text_document(params.clone())?;
|
||||||
|
|
||||||
let file_map = self.file_store.file_map().lock();
|
let chunks = {
|
||||||
let file = file_map.get(&uri).context("file not found")?;
|
let file_map = self.file_store.file_map().read();
|
||||||
let chunks = self.splitter.split(file);
|
let file = file_map.get(&uri).context("file not found")?;
|
||||||
|
self.splitter.split(file)
|
||||||
|
};
|
||||||
let chunks_size = chunks.len();
|
let chunks_size = chunks.len();
|
||||||
|
|
||||||
// This is not as efficient as it could be, but it is ok for now
|
// This is not as efficient as it could be, but it is ok for now
|
||||||
|
|||||||
Reference in New Issue
Block a user