mirror of
https://github.com/SilasMarvin/lsp-ai.git
synced 2025-12-24 01:44:23 +01:00
Using llamacpp with rust
This commit is contained in:
27
Cargo.lock
generated
27
Cargo.lock
generated
@@ -98,7 +98,7 @@ dependencies = [
|
||||
"bitflags 2.4.2",
|
||||
"cexpr",
|
||||
"clang-sys",
|
||||
"itertools 0.12.1",
|
||||
"itertools",
|
||||
"lazy_static",
|
||||
"lazycell",
|
||||
"log",
|
||||
@@ -566,15 +566,6 @@ dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.10"
|
||||
@@ -628,9 +619,8 @@ checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c"
|
||||
|
||||
[[package]]
|
||||
name = "llama-cpp-2"
|
||||
version = "0.1.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "747243ba163eb361f5d6d483a177450240ce6ca70cefcb7c489f6333e9fe4300"
|
||||
version = "0.1.25"
|
||||
source = "git+https://github.com/SilasMarvin/llama-cpp-rs?branch=silas-8-metal-on-mac#8c61f584e7aa200581b711147e685821190aa025"
|
||||
dependencies = [
|
||||
"llama-cpp-sys-2",
|
||||
"thiserror",
|
||||
@@ -639,9 +629,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "llama-cpp-sys-2"
|
||||
version = "0.1.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3844a3f833eca795309fec9223a316ced1cebeff9c5cfce5ee760825040d281f"
|
||||
version = "0.1.25"
|
||||
source = "git+https://github.com/SilasMarvin/llama-cpp-rs?branch=silas-8-metal-on-mac#8c61f584e7aa200581b711147e685821190aa025"
|
||||
dependencies = [
|
||||
"bindgen",
|
||||
"cc",
|
||||
@@ -685,8 +674,6 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "lsp-server"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "248f65b78f6db5d8e1b1604b4098a28b43d21a8eb1deeca22b1c421b276c7095"
|
||||
dependencies = [
|
||||
"crossbeam-channel",
|
||||
"log",
|
||||
@@ -1011,7 +998,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9"
|
||||
dependencies = [
|
||||
"either",
|
||||
"itertools 0.11.0",
|
||||
"itertools",
|
||||
"rayon",
|
||||
]
|
||||
|
||||
@@ -1387,7 +1374,7 @@ dependencies = [
|
||||
"esaxx-rs",
|
||||
"getrandom",
|
||||
"indicatif",
|
||||
"itertools 0.11.0",
|
||||
"itertools",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"macro_rules_attribute",
|
||||
|
||||
@@ -7,7 +7,8 @@ edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.75"
|
||||
lsp-server = "0.7.4"
|
||||
# lsp-server = "0.7.4"
|
||||
lsp-server = { path = "../rust-analyzer/lib/lsp-server" }
|
||||
lsp-types = "0.94.1"
|
||||
ropey = "1.6.1"
|
||||
serde = "1.0.190"
|
||||
@@ -18,7 +19,8 @@ tokenizers = "0.14.1"
|
||||
parking_lot = "0.12.1"
|
||||
once_cell = "1.19.0"
|
||||
directories = "5.0.1"
|
||||
llama-cpp-2 = "0.1.27"
|
||||
# llama-cpp-2 = "0.1.27"
|
||||
llama-cpp-2 = { path = "../llama-cpp-rs/llama-cpp-2" }
|
||||
|
||||
[features]
|
||||
default = []
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use anyhow::{Context, Result};
|
||||
use serde::Deserialize;
|
||||
use serde_json::Value;
|
||||
use serde_json::{json, Value};
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
@@ -21,7 +21,6 @@ pub enum ValidTransformerBackend {
|
||||
PostgresML,
|
||||
}
|
||||
|
||||
// TODO: Review this for real lol
|
||||
#[derive(Clone, Deserialize)]
|
||||
pub struct FIM {
|
||||
pub start: String,
|
||||
@@ -49,6 +48,14 @@ struct ValidMemoryConfiguration {
|
||||
file_store: Option<Value>,
|
||||
}
|
||||
|
||||
impl Default for ValidMemoryConfiguration {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
file_store: Some(json!({})),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize)]
|
||||
struct ChatMessages {
|
||||
role: String,
|
||||
@@ -84,17 +91,52 @@ struct ModelGGUF {
|
||||
kwargs: Kwargs,
|
||||
}
|
||||
|
||||
impl Default for ModelGGUF {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
model: Model {
|
||||
repository: "stabilityai/stable-code-3b".to_string(),
|
||||
name: Some("stable-code-3b-Q5_K_M.gguf".to_string()),
|
||||
},
|
||||
fim: Some(FIM {
|
||||
start: "<fim_prefix>".to_string(),
|
||||
middle: "<fim_suffix>".to_string(),
|
||||
end: "<fim_middle>".to_string(),
|
||||
}),
|
||||
max_new_tokens: MaxNewTokens::default(),
|
||||
chat: None,
|
||||
kwargs: Kwargs::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize)]
|
||||
struct ValidMacTransformerConfiguration {
|
||||
model_gguf: Option<ModelGGUF>,
|
||||
}
|
||||
|
||||
impl Default for ValidMacTransformerConfiguration {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
model_gguf: Some(ModelGGUF::default()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize)]
|
||||
struct ValidLinuxTransformerConfiguration {
|
||||
model_gguf: Option<ModelGGUF>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize)]
|
||||
impl Default for ValidLinuxTransformerConfiguration {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
model_gguf: Some(ModelGGUF::default()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize, Default)]
|
||||
struct ValidConfiguration {
|
||||
memory: ValidMemoryConfiguration,
|
||||
#[cfg(target_os = "macos")]
|
||||
@@ -115,10 +157,11 @@ impl Configuration {
|
||||
let configuration_args = args
|
||||
.as_object_mut()
|
||||
.context("Server configuration must be a JSON object")?
|
||||
.remove("initializationOptions")
|
||||
.unwrap_or_default();
|
||||
let valid_args: ValidConfiguration = serde_json::from_value(configuration_args)?;
|
||||
// TODO: Make sure they only specified one model or something ya know
|
||||
.remove("initializationOptions");
|
||||
let valid_args = match configuration_args {
|
||||
Some(configuration_args) => serde_json::from_value(configuration_args)?,
|
||||
None => ValidConfiguration::default(),
|
||||
};
|
||||
Ok(Self {
|
||||
valid_config: valid_args,
|
||||
})
|
||||
@@ -192,7 +235,6 @@ impl Configuration {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
|
||||
#[test]
|
||||
fn custom_mac_gguf_model() {
|
||||
@@ -239,7 +281,6 @@ mod tests {
|
||||
]
|
||||
},
|
||||
"n_ctx": 2048,
|
||||
"n_threads": 8,
|
||||
"n_gpu_layers": 35,
|
||||
}
|
||||
},
|
||||
|
||||
@@ -47,12 +47,14 @@ impl TransformerBackend for LlamaCPP {
|
||||
|
||||
fn do_generate(&self, prompt: &str) -> anyhow::Result<DoGenerateResponse> {
|
||||
let max_new_tokens = self.configuration.get_max_new_tokens().generation;
|
||||
unimplemented!()
|
||||
self.model
|
||||
.complete(prompt, max_new_tokens)
|
||||
.map(|generated_text| DoGenerateResponse { generated_text })
|
||||
}
|
||||
|
||||
fn do_generate_stream(
|
||||
&self,
|
||||
request: &GenerateStreamRequest,
|
||||
_request: &GenerateStreamRequest,
|
||||
) -> anyhow::Result<DoGenerateStreamResponse> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
@@ -7,21 +7,20 @@ use llama_cpp_2::{
|
||||
model::{params::LlamaModelParams, AddBos, LlamaModel},
|
||||
token::data_array::LlamaTokenDataArray,
|
||||
};
|
||||
use once_cell::sync::Lazy;
|
||||
use std::{num::NonZeroU32, path::PathBuf, time::Duration};
|
||||
|
||||
use crate::configuration::Kwargs;
|
||||
|
||||
static BACKEND: Lazy<LlamaBackend> = Lazy::new(|| LlamaBackend::init().unwrap());
|
||||
|
||||
pub struct Model {
|
||||
backend: LlamaBackend,
|
||||
model: LlamaModel,
|
||||
n_ctx: NonZeroU32,
|
||||
}
|
||||
|
||||
impl Model {
|
||||
pub fn new(model_path: PathBuf, kwargs: &Kwargs) -> anyhow::Result<Self> {
|
||||
// Init the backend
|
||||
let backend = LlamaBackend::init()?;
|
||||
|
||||
// Get n_gpu_layers if set in kwargs
|
||||
// As a default we set it to 1000, which should put all layers on the GPU
|
||||
let n_gpu_layers = kwargs
|
||||
@@ -43,7 +42,7 @@ impl Model {
|
||||
|
||||
// Load the model
|
||||
eprintln!("SETTING MODEL AT PATH: {:?}", model_path);
|
||||
let model = LlamaModel::load_from_file(&backend, model_path, &model_params)?;
|
||||
let model = LlamaModel::load_from_file(&BACKEND, model_path, &model_params)?;
|
||||
eprintln!("\nMODEL SET\n");
|
||||
|
||||
// Get n_ctx if set in kwargs
|
||||
@@ -58,11 +57,7 @@ impl Model {
|
||||
.unwrap_or_else(|| Ok(NonZeroU32::new(2048)))?
|
||||
.context("n_ctx must not be zero")?;
|
||||
|
||||
Ok(Model {
|
||||
backend,
|
||||
model,
|
||||
n_ctx,
|
||||
})
|
||||
Ok(Model { model, n_ctx })
|
||||
}
|
||||
|
||||
pub fn complete(&self, prompt: &str, max_new_tokens: usize) -> anyhow::Result<String> {
|
||||
@@ -71,7 +66,7 @@ impl Model {
|
||||
|
||||
let mut ctx = self
|
||||
.model
|
||||
.new_context(&self.backend, ctx_params)
|
||||
.new_context(&BACKEND, ctx_params)
|
||||
.with_context(|| "unable to create the llama_context")?;
|
||||
|
||||
let tokens_list = self
|
||||
|
||||
@@ -142,7 +142,7 @@ impl Worker {
|
||||
.memory_backend
|
||||
.lock()
|
||||
.build_prompt(&request.params.text_document_position)?;
|
||||
eprintln!("\n\n****************{}***************\n\n", prompt);
|
||||
eprintln!("\nPROMPT*************\n{}\n************\n", prompt);
|
||||
let response = self.transformer_backend.do_generate(&prompt)?;
|
||||
let result = GenerateResult {
|
||||
generated_text: response.generated_text,
|
||||
|
||||
18
test.json
18
test.json
@@ -1,18 +0,0 @@
|
||||
{
|
||||
"macos": {
|
||||
"model_gguf": {
|
||||
"repository": "deepseek-coder-6.7b-base",
|
||||
"name": "Q4_K_M.gguf",
|
||||
"fim": false,
|
||||
"n_ctx": 2048,
|
||||
"n_threads": 8,
|
||||
"n_gpu_layers": 35
|
||||
}
|
||||
},
|
||||
"linux": {
|
||||
"model_gptq": {
|
||||
"repository": "theblokesomething",
|
||||
"name": "some q5 or something"
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user