Merge pull request #10 from SilasMarvin/silas-llamacpp-custom-file

Added  file_path config option for llama_cpp models
This commit is contained in:
Silas Marvin
2024-06-09 08:06:47 -07:00
committed by GitHub
2 changed files with 25 additions and 20 deletions

View File

@@ -81,13 +81,6 @@ pub struct FileStore {
pub crawl: bool, pub crawl: bool,
} }
#[derive(Clone, Debug, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct Model {
pub repository: String,
pub name: Option<String>,
}
const fn n_gpu_layers_default() -> u32 { const fn n_gpu_layers_default() -> u32 {
1000 1000
} }
@@ -106,6 +99,7 @@ pub struct MistralFIM {
pub fim_endpoint: Option<String>, pub fim_endpoint: Option<String>,
// The model name // The model name
pub model: String, pub model: String,
// The maximum requests per second
#[serde(default = "max_requests_per_second_default")] #[serde(default = "max_requests_per_second_default")]
pub max_requests_per_second: f32, pub max_requests_per_second: f32,
} }
@@ -113,13 +107,17 @@ pub struct MistralFIM {
#[derive(Clone, Debug, Deserialize)] #[derive(Clone, Debug, Deserialize)]
#[serde(deny_unknown_fields)] #[serde(deny_unknown_fields)]
pub struct LLaMACPP { pub struct LLaMACPP {
// The model to use // Which model to use
#[serde(flatten)] pub repository: Option<String>,
pub model: Model, pub name: Option<String>,
pub file_path: Option<String>,
// The layers to put on the GPU
#[serde(default = "n_gpu_layers_default")] #[serde(default = "n_gpu_layers_default")]
pub n_gpu_layers: u32, pub n_gpu_layers: u32,
// The context size
#[serde(default = "n_ctx_default")] #[serde(default = "n_ctx_default")]
pub n_ctx: u32, pub n_ctx: u32,
// The maximum requests per second
#[serde(default = "max_requests_per_second_default")] #[serde(default = "max_requests_per_second_default")]
pub max_requests_per_second: f32, pub max_requests_per_second: f32,
} }
@@ -129,6 +127,7 @@ pub struct LLaMACPP {
pub struct OpenAI { pub struct OpenAI {
// The auth token env var name // The auth token env var name
pub auth_token_env_var_name: Option<String>, pub auth_token_env_var_name: Option<String>,
// The auth token
pub auth_token: Option<String>, pub auth_token: Option<String>,
// The completions endpoint // The completions endpoint
pub completions_endpoint: Option<String>, pub completions_endpoint: Option<String>,

View File

@@ -9,7 +9,6 @@ use crate::{
}, },
utils::format_chat_messages, utils::format_chat_messages,
}; };
use anyhow::Context;
use hf_hub::api::sync::ApiBuilder; use hf_hub::api::sync::ApiBuilder;
use serde::Deserialize; use serde::Deserialize;
use serde_json::Value; use serde_json::Value;
@@ -41,15 +40,22 @@ pub struct LLaMACPP {
impl LLaMACPP { impl LLaMACPP {
#[instrument] #[instrument]
pub fn new(configuration: config::LLaMACPP) -> anyhow::Result<Self> { pub fn new(configuration: config::LLaMACPP) -> anyhow::Result<Self> {
let api = ApiBuilder::new().with_progress(true).build()?; let model_path = match (
let name = configuration &configuration.file_path,
.model &configuration.repository,
.name &configuration.name,
.as_ref() ) {
.context("Please set `name` to use LLaMA.cpp")?; (Some(file_path), _, _) => std::path::PathBuf::from(file_path),
error!("Loading in: {} - {}\nIf this model has not been loaded before it may take a few minutes to download it. Please hangtight.", configuration.model.repository, name); (_, Some(repository), Some(name)) => {
let repo = api.model(configuration.model.repository.to_owned()); let api = ApiBuilder::new().with_progress(true).build()?;
let model_path = repo.get(name)?; error!("Loading in: {} - {}\nIf this model has not been loaded before it may take a few minutes to download it. Please hangtight.", repository, name);
let repo = api.model(repository.clone());
repo.get(&name)?
}
_ => anyhow::bail!(
"To use llama.cpp provide either `file_path` or `repository` and `name`"
),
};
let model = Model::new(model_path, &configuration)?; let model = Model::new(model_path, &configuration)?;
Ok(Self { model }) Ok(Self { model })
} }