mirror of
https://github.com/SilasMarvin/lsp-ai.git
synced 2025-12-19 07:24:24 +01:00
Added config option for llama_cpp models
This commit is contained in:
@@ -81,13 +81,6 @@ pub struct FileStore {
|
|||||||
pub crawl: bool,
|
pub crawl: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Deserialize)]
|
|
||||||
#[serde(deny_unknown_fields)]
|
|
||||||
pub struct Model {
|
|
||||||
pub repository: String,
|
|
||||||
pub name: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
const fn n_gpu_layers_default() -> u32 {
|
const fn n_gpu_layers_default() -> u32 {
|
||||||
1000
|
1000
|
||||||
}
|
}
|
||||||
@@ -106,6 +99,7 @@ pub struct MistralFIM {
|
|||||||
pub fim_endpoint: Option<String>,
|
pub fim_endpoint: Option<String>,
|
||||||
// The model name
|
// The model name
|
||||||
pub model: String,
|
pub model: String,
|
||||||
|
// The maximum requests per second
|
||||||
#[serde(default = "max_requests_per_second_default")]
|
#[serde(default = "max_requests_per_second_default")]
|
||||||
pub max_requests_per_second: f32,
|
pub max_requests_per_second: f32,
|
||||||
}
|
}
|
||||||
@@ -113,13 +107,17 @@ pub struct MistralFIM {
|
|||||||
#[derive(Clone, Debug, Deserialize)]
|
#[derive(Clone, Debug, Deserialize)]
|
||||||
#[serde(deny_unknown_fields)]
|
#[serde(deny_unknown_fields)]
|
||||||
pub struct LLaMACPP {
|
pub struct LLaMACPP {
|
||||||
// The model to use
|
// Which model to use
|
||||||
#[serde(flatten)]
|
pub repository: Option<String>,
|
||||||
pub model: Model,
|
pub name: Option<String>,
|
||||||
|
pub file_path: Option<String>,
|
||||||
|
// The layers to put on the GPU
|
||||||
#[serde(default = "n_gpu_layers_default")]
|
#[serde(default = "n_gpu_layers_default")]
|
||||||
pub n_gpu_layers: u32,
|
pub n_gpu_layers: u32,
|
||||||
|
// The context size
|
||||||
#[serde(default = "n_ctx_default")]
|
#[serde(default = "n_ctx_default")]
|
||||||
pub n_ctx: u32,
|
pub n_ctx: u32,
|
||||||
|
// The maximum requests per second
|
||||||
#[serde(default = "max_requests_per_second_default")]
|
#[serde(default = "max_requests_per_second_default")]
|
||||||
pub max_requests_per_second: f32,
|
pub max_requests_per_second: f32,
|
||||||
}
|
}
|
||||||
@@ -129,6 +127,7 @@ pub struct LLaMACPP {
|
|||||||
pub struct OpenAI {
|
pub struct OpenAI {
|
||||||
// The auth token env var name
|
// The auth token env var name
|
||||||
pub auth_token_env_var_name: Option<String>,
|
pub auth_token_env_var_name: Option<String>,
|
||||||
|
// The auth token
|
||||||
pub auth_token: Option<String>,
|
pub auth_token: Option<String>,
|
||||||
// The completions endpoint
|
// The completions endpoint
|
||||||
pub completions_endpoint: Option<String>,
|
pub completions_endpoint: Option<String>,
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ use crate::{
|
|||||||
},
|
},
|
||||||
utils::format_chat_messages,
|
utils::format_chat_messages,
|
||||||
};
|
};
|
||||||
use anyhow::Context;
|
|
||||||
use hf_hub::api::sync::ApiBuilder;
|
use hf_hub::api::sync::ApiBuilder;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
@@ -41,15 +40,22 @@ pub struct LLaMACPP {
|
|||||||
impl LLaMACPP {
|
impl LLaMACPP {
|
||||||
#[instrument]
|
#[instrument]
|
||||||
pub fn new(configuration: config::LLaMACPP) -> anyhow::Result<Self> {
|
pub fn new(configuration: config::LLaMACPP) -> anyhow::Result<Self> {
|
||||||
|
let model_path = match (
|
||||||
|
&configuration.file_path,
|
||||||
|
&configuration.repository,
|
||||||
|
&configuration.name,
|
||||||
|
) {
|
||||||
|
(Some(file_path), _, _) => std::path::PathBuf::from(file_path),
|
||||||
|
(_, Some(repository), Some(name)) => {
|
||||||
let api = ApiBuilder::new().with_progress(true).build()?;
|
let api = ApiBuilder::new().with_progress(true).build()?;
|
||||||
let name = configuration
|
error!("Loading in: {} - {}\nIf this model has not been loaded before it may take a few minutes to download it. Please hangtight.", repository, name);
|
||||||
.model
|
let repo = api.model(repository.clone());
|
||||||
.name
|
repo.get(&name)?
|
||||||
.as_ref()
|
}
|
||||||
.context("Please set `name` to use LLaMA.cpp")?;
|
_ => anyhow::bail!(
|
||||||
error!("Loading in: {} - {}\nIf this model has not been loaded before it may take a few minutes to download it. Please hangtight.", configuration.model.repository, name);
|
"To use llama.cpp provide either `file_path` or `repository` and `name`"
|
||||||
let repo = api.model(configuration.model.repository.to_owned());
|
),
|
||||||
let model_path = repo.get(name)?;
|
};
|
||||||
let model = Model::new(model_path, &configuration)?;
|
let model = Model::new(model_path, &configuration)?;
|
||||||
Ok(Self { model })
|
Ok(Self { model })
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user