diff --git a/Cargo.lock b/Cargo.lock index 8ce268a..69ad672 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -98,7 +98,7 @@ dependencies = [ "bitflags 2.4.2", "cexpr", "clang-sys", - "itertools 0.12.1", + "itertools", "lazy_static", "lazycell", "log", @@ -566,15 +566,6 @@ dependencies = [ "either", ] -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] - [[package]] name = "itoa" version = "1.0.10" @@ -628,9 +619,8 @@ checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" [[package]] name = "llama-cpp-2" -version = "0.1.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "747243ba163eb361f5d6d483a177450240ce6ca70cefcb7c489f6333e9fe4300" +version = "0.1.25" +source = "git+https://github.com/SilasMarvin/llama-cpp-rs?branch=silas-8-metal-on-mac#8c61f584e7aa200581b711147e685821190aa025" dependencies = [ "llama-cpp-sys-2", "thiserror", @@ -639,9 +629,8 @@ dependencies = [ [[package]] name = "llama-cpp-sys-2" -version = "0.1.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3844a3f833eca795309fec9223a316ced1cebeff9c5cfce5ee760825040d281f" +version = "0.1.25" +source = "git+https://github.com/SilasMarvin/llama-cpp-rs?branch=silas-8-metal-on-mac#8c61f584e7aa200581b711147e685821190aa025" dependencies = [ "bindgen", "cc", @@ -685,8 +674,6 @@ dependencies = [ [[package]] name = "lsp-server" version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "248f65b78f6db5d8e1b1604b4098a28b43d21a8eb1deeca22b1c421b276c7095" dependencies = [ "crossbeam-channel", "log", @@ -1011,7 +998,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9" dependencies = [ "either", - "itertools 0.11.0", + "itertools", "rayon", ] @@ -1387,7 +1374,7 @@ dependencies = [ "esaxx-rs", "getrandom", "indicatif", - "itertools 0.11.0", + "itertools", "lazy_static", "log", "macro_rules_attribute", diff --git a/Cargo.toml b/Cargo.toml index 943172c..6bd0bba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,8 @@ edition = "2021" [dependencies] anyhow = "1.0.75" -lsp-server = "0.7.4" +# lsp-server = "0.7.4" +lsp-server = { path = "../rust-analyzer/lib/lsp-server" } lsp-types = "0.94.1" ropey = "1.6.1" serde = "1.0.190" @@ -18,7 +19,8 @@ tokenizers = "0.14.1" parking_lot = "0.12.1" once_cell = "1.19.0" directories = "5.0.1" -llama-cpp-2 = "0.1.27" +# llama-cpp-2 = "0.1.27" +llama-cpp-2 = { path = "../llama-cpp-rs/llama-cpp-2" } [features] default = [] diff --git a/src/configuration.rs b/src/configuration.rs index d8593e7..08a26f0 100644 --- a/src/configuration.rs +++ b/src/configuration.rs @@ -1,6 +1,6 @@ use anyhow::{Context, Result}; use serde::Deserialize; -use serde_json::Value; +use serde_json::{json, Value}; use std::collections::HashMap; #[cfg(target_os = "macos")] @@ -21,7 +21,6 @@ pub enum ValidTransformerBackend { PostgresML, } -// TODO: Review this for real lol #[derive(Clone, Deserialize)] pub struct FIM { pub start: String, @@ -49,6 +48,14 @@ struct ValidMemoryConfiguration { file_store: Option, } +impl Default for ValidMemoryConfiguration { + fn default() -> Self { + Self { + file_store: Some(json!({})), + } + } +} + #[derive(Clone, Deserialize)] struct ChatMessages { role: String, @@ -84,17 +91,52 @@ struct ModelGGUF { kwargs: Kwargs, } +impl Default for ModelGGUF { + fn default() -> Self { + Self { + model: Model { + repository: "stabilityai/stable-code-3b".to_string(), + name: Some("stable-code-3b-Q5_K_M.gguf".to_string()), + }, + fim: Some(FIM { + start: "".to_string(), + middle: "".to_string(), + end: "".to_string(), + }), + max_new_tokens: MaxNewTokens::default(), + chat: None, + kwargs: Kwargs::default(), + } + } +} + #[derive(Clone, Deserialize)] struct ValidMacTransformerConfiguration { model_gguf: Option, } +impl Default for ValidMacTransformerConfiguration { + fn default() -> Self { + Self { + model_gguf: Some(ModelGGUF::default()), + } + } +} + #[derive(Clone, Deserialize)] struct ValidLinuxTransformerConfiguration { model_gguf: Option, } -#[derive(Clone, Deserialize)] +impl Default for ValidLinuxTransformerConfiguration { + fn default() -> Self { + Self { + model_gguf: Some(ModelGGUF::default()), + } + } +} + +#[derive(Clone, Deserialize, Default)] struct ValidConfiguration { memory: ValidMemoryConfiguration, #[cfg(target_os = "macos")] @@ -115,10 +157,11 @@ impl Configuration { let configuration_args = args .as_object_mut() .context("Server configuration must be a JSON object")? - .remove("initializationOptions") - .unwrap_or_default(); - let valid_args: ValidConfiguration = serde_json::from_value(configuration_args)?; - // TODO: Make sure they only specified one model or something ya know + .remove("initializationOptions"); + let valid_args = match configuration_args { + Some(configuration_args) => serde_json::from_value(configuration_args)?, + None => ValidConfiguration::default(), + }; Ok(Self { valid_config: valid_args, }) @@ -192,7 +235,6 @@ impl Configuration { #[cfg(test)] mod tests { use super::*; - use serde_json::json; #[test] fn custom_mac_gguf_model() { @@ -239,7 +281,6 @@ mod tests { ] }, "n_ctx": 2048, - "n_threads": 8, "n_gpu_layers": 35, } }, diff --git a/src/transformer_backends/llama_cpp/mod.rs b/src/transformer_backends/llama_cpp/mod.rs index 472986c..037f9af 100644 --- a/src/transformer_backends/llama_cpp/mod.rs +++ b/src/transformer_backends/llama_cpp/mod.rs @@ -47,12 +47,14 @@ impl TransformerBackend for LlamaCPP { fn do_generate(&self, prompt: &str) -> anyhow::Result { let max_new_tokens = self.configuration.get_max_new_tokens().generation; - unimplemented!() + self.model + .complete(prompt, max_new_tokens) + .map(|generated_text| DoGenerateResponse { generated_text }) } fn do_generate_stream( &self, - request: &GenerateStreamRequest, + _request: &GenerateStreamRequest, ) -> anyhow::Result { unimplemented!() } diff --git a/src/transformer_backends/llama_cpp/model.rs b/src/transformer_backends/llama_cpp/model.rs index a614e40..d98adb6 100644 --- a/src/transformer_backends/llama_cpp/model.rs +++ b/src/transformer_backends/llama_cpp/model.rs @@ -7,21 +7,20 @@ use llama_cpp_2::{ model::{params::LlamaModelParams, AddBos, LlamaModel}, token::data_array::LlamaTokenDataArray, }; +use once_cell::sync::Lazy; use std::{num::NonZeroU32, path::PathBuf, time::Duration}; use crate::configuration::Kwargs; +static BACKEND: Lazy = Lazy::new(|| LlamaBackend::init().unwrap()); + pub struct Model { - backend: LlamaBackend, model: LlamaModel, n_ctx: NonZeroU32, } impl Model { pub fn new(model_path: PathBuf, kwargs: &Kwargs) -> anyhow::Result { - // Init the backend - let backend = LlamaBackend::init()?; - // Get n_gpu_layers if set in kwargs // As a default we set it to 1000, which should put all layers on the GPU let n_gpu_layers = kwargs @@ -43,7 +42,7 @@ impl Model { // Load the model eprintln!("SETTING MODEL AT PATH: {:?}", model_path); - let model = LlamaModel::load_from_file(&backend, model_path, &model_params)?; + let model = LlamaModel::load_from_file(&BACKEND, model_path, &model_params)?; eprintln!("\nMODEL SET\n"); // Get n_ctx if set in kwargs @@ -58,11 +57,7 @@ impl Model { .unwrap_or_else(|| Ok(NonZeroU32::new(2048)))? .context("n_ctx must not be zero")?; - Ok(Model { - backend, - model, - n_ctx, - }) + Ok(Model { model, n_ctx }) } pub fn complete(&self, prompt: &str, max_new_tokens: usize) -> anyhow::Result { @@ -71,7 +66,7 @@ impl Model { let mut ctx = self .model - .new_context(&self.backend, ctx_params) + .new_context(&BACKEND, ctx_params) .with_context(|| "unable to create the llama_context")?; let tokens_list = self diff --git a/src/worker.rs b/src/worker.rs index a29cd94..641fdd5 100644 --- a/src/worker.rs +++ b/src/worker.rs @@ -142,7 +142,7 @@ impl Worker { .memory_backend .lock() .build_prompt(&request.params.text_document_position)?; - eprintln!("\n\n****************{}***************\n\n", prompt); + eprintln!("\nPROMPT*************\n{}\n************\n", prompt); let response = self.transformer_backend.do_generate(&prompt)?; let result = GenerateResult { generated_text: response.generated_text, diff --git a/test.json b/test.json deleted file mode 100644 index 58ac9e8..0000000 --- a/test.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "macos": { - "model_gguf": { - "repository": "deepseek-coder-6.7b-base", - "name": "Q4_K_M.gguf", - "fim": false, - "n_ctx": 2048, - "n_threads": 8, - "n_gpu_layers": 35 - } - }, - "linux": { - "model_gptq": { - "repository": "theblokesomething", - "name": "some q5 or something" - } - } -}