diff --git a/Cargo.lock b/Cargo.lock
index 8ce268a..69ad672 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -98,7 +98,7 @@ dependencies = [
  "bitflags 2.4.2",
  "cexpr",
  "clang-sys",
- "itertools 0.12.1",
+ "itertools",
  "lazy_static",
  "lazycell",
  "log",
@@ -566,15 +566,6 @@ dependencies = [
  "either",
 ]
 
-[[package]]
-name = "itertools"
-version = "0.12.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
-dependencies = [
- "either",
-]
-
 [[package]]
 name = "itoa"
 version = "1.0.10"
@@ -628,9 +619,8 @@ checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c"
 
 [[package]]
 name = "llama-cpp-2"
-version = "0.1.27"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "747243ba163eb361f5d6d483a177450240ce6ca70cefcb7c489f6333e9fe4300"
+version = "0.1.25"
+source = "git+https://github.com/SilasMarvin/llama-cpp-rs?branch=silas-8-metal-on-mac#8c61f584e7aa200581b711147e685821190aa025"
 dependencies = [
  "llama-cpp-sys-2",
  "thiserror",
@@ -639,9 +629,8 @@ dependencies = [
 
 [[package]]
 name = "llama-cpp-sys-2"
-version = "0.1.27"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3844a3f833eca795309fec9223a316ced1cebeff9c5cfce5ee760825040d281f"
+version = "0.1.25"
+source = "git+https://github.com/SilasMarvin/llama-cpp-rs?branch=silas-8-metal-on-mac#8c61f584e7aa200581b711147e685821190aa025"
 dependencies = [
  "bindgen",
  "cc",
@@ -685,8 +674,6 @@ dependencies = [
 [[package]]
 name = "lsp-server"
 version = "0.7.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "248f65b78f6db5d8e1b1604b4098a28b43d21a8eb1deeca22b1c421b276c7095"
 dependencies = [
  "crossbeam-channel",
  "log",
@@ -1011,7 +998,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9"
 dependencies = [
  "either",
- "itertools 0.11.0",
+ "itertools",
  "rayon",
 ]
 
@@ -1387,7 +1374,7 @@ dependencies = [
  "esaxx-rs",
  "getrandom",
  "indicatif",
- "itertools 0.11.0",
+ "itertools",
  "lazy_static",
  "log",
  "macro_rules_attribute",
diff --git a/Cargo.toml b/Cargo.toml
index 943172c..6bd0bba 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,7 +7,8 @@ edition = "2021"
 
 [dependencies]
 anyhow = "1.0.75"
-lsp-server = "0.7.4"
+# lsp-server = "0.7.4"
+lsp-server = { path = "../rust-analyzer/lib/lsp-server" }
 lsp-types = "0.94.1"
 ropey = "1.6.1"
 serde = "1.0.190"
@@ -18,7 +19,8 @@ tokenizers = "0.14.1"
 parking_lot = "0.12.1"
 once_cell = "1.19.0"
 directories = "5.0.1"
-llama-cpp-2 = "0.1.27"
+# llama-cpp-2 = "0.1.27"
+llama-cpp-2 = { path = "../llama-cpp-rs/llama-cpp-2" }
 
 [features]
 default = []
diff --git a/src/configuration.rs b/src/configuration.rs
index d8593e7..08a26f0 100644
--- a/src/configuration.rs
+++ b/src/configuration.rs
@@ -1,6 +1,6 @@
 use anyhow::{Context, Result};
 use serde::Deserialize;
-use serde_json::Value;
+use serde_json::{json, Value};
 use std::collections::HashMap;
 
 #[cfg(target_os = "macos")]
@@ -21,7 +21,6 @@ pub enum ValidTransformerBackend {
     PostgresML,
 }
 
-// TODO: Review this for real lol
 #[derive(Clone, Deserialize)]
 pub struct FIM {
     pub start: String,
@@ -49,6 +48,14 @@ struct ValidMemoryConfiguration {
     file_store: Option<Value>,
 }
 
+impl Default for ValidMemoryConfiguration {
+    fn default() -> Self {
+        Self {
+            file_store: Some(json!({})),
+        }
+    }
+}
+
 #[derive(Clone, Deserialize)]
 struct ChatMessages {
     role: String,
@@ -84,17 +91,52 @@ struct ModelGGUF {
     kwargs: Kwargs,
 }
 
+impl Default for ModelGGUF {
+    fn default() -> Self {
+        Self {
+            model: Model {
+                repository: "stabilityai/stable-code-3b".to_string(),
+                name: Some("stable-code-3b-Q5_K_M.gguf".to_string()),
+            },
+            fim: Some(FIM {
+                start: "<fim_prefix>".to_string(),
+                middle: "<fim_suffix>".to_string(),
+                end: "<fim_middle>".to_string(),
+            }),
+            max_new_tokens: MaxNewTokens::default(),
+            chat: None,
+            kwargs: Kwargs::default(),
+        }
+    }
+}
+
 #[derive(Clone, Deserialize)]
 struct ValidMacTransformerConfiguration {
     model_gguf: Option<ModelGGUF>,
 }
 
+impl Default for ValidMacTransformerConfiguration {
+    fn default() -> Self {
+        Self {
+            model_gguf: Some(ModelGGUF::default()),
+        }
+    }
+}
+
 #[derive(Clone, Deserialize)]
 struct ValidLinuxTransformerConfiguration {
     model_gguf: Option<ModelGGUF>,
 }
 
-#[derive(Clone, Deserialize)]
+impl Default for ValidLinuxTransformerConfiguration {
+    fn default() -> Self {
+        Self {
+            model_gguf: Some(ModelGGUF::default()),
+        }
+    }
+}
+
+#[derive(Clone, Deserialize, Default)]
 struct ValidConfiguration {
     memory: ValidMemoryConfiguration,
     #[cfg(target_os = "macos")]
@@ -115,10 +157,11 @@ impl Configuration {
         let configuration_args = args
             .as_object_mut()
             .context("Server configuration must be a JSON object")?
-            .remove("initializationOptions")
-            .unwrap_or_default();
-        let valid_args: ValidConfiguration = serde_json::from_value(configuration_args)?;
-        // TODO: Make sure they only specified one model or something ya know
+            .remove("initializationOptions");
+        let valid_args = match configuration_args {
+            Some(configuration_args) => serde_json::from_value(configuration_args)?,
+            None => ValidConfiguration::default(),
+        };
         Ok(Self {
             valid_config: valid_args,
         })
@@ -192,7 +235,6 @@ impl Configuration {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use serde_json::json;
 
     #[test]
     fn custom_mac_gguf_model() {
@@ -239,7 +281,6 @@ mod tests {
                             ]
                         },
                         "n_ctx": 2048,
-                        "n_threads": 8,
                         "n_gpu_layers": 35,
                     }
                 },
diff --git a/src/transformer_backends/llama_cpp/mod.rs b/src/transformer_backends/llama_cpp/mod.rs
index 472986c..037f9af 100644
--- a/src/transformer_backends/llama_cpp/mod.rs
+++ b/src/transformer_backends/llama_cpp/mod.rs
@@ -47,12 +47,14 @@ impl TransformerBackend for LlamaCPP {
 
     fn do_generate(&self, prompt: &str) -> anyhow::Result<DoGenerateResponse> {
         let max_new_tokens = self.configuration.get_max_new_tokens().generation;
-        unimplemented!()
+        self.model
+            .complete(prompt, max_new_tokens)
+            .map(|generated_text| DoGenerateResponse { generated_text })
     }
 
     fn do_generate_stream(
         &self,
-        request: &GenerateStreamRequest,
+        _request: &GenerateStreamRequest,
     ) -> anyhow::Result<DoGenerateStreamResponse> {
         unimplemented!()
     }
diff --git a/src/transformer_backends/llama_cpp/model.rs b/src/transformer_backends/llama_cpp/model.rs
index a614e40..d98adb6 100644
--- a/src/transformer_backends/llama_cpp/model.rs
+++ b/src/transformer_backends/llama_cpp/model.rs
@@ -7,21 +7,20 @@ use llama_cpp_2::{
     model::{params::LlamaModelParams, AddBos, LlamaModel},
     token::data_array::LlamaTokenDataArray,
 };
+use once_cell::sync::Lazy;
 use std::{num::NonZeroU32, path::PathBuf, time::Duration};
 
 use crate::configuration::Kwargs;
 
+static BACKEND: Lazy<LlamaBackend> = Lazy::new(|| LlamaBackend::init().unwrap());
+
 pub struct Model {
-    backend: LlamaBackend,
     model: LlamaModel,
     n_ctx: NonZeroU32,
 }
 
 impl Model {
     pub fn new(model_path: PathBuf, kwargs: &Kwargs) -> anyhow::Result<Self> {
-        // Init the backend
-        let backend = LlamaBackend::init()?;
-
         // Get n_gpu_layers if set in kwargs
         // As a default we set it to 1000, which should put all layers on the GPU
         let n_gpu_layers = kwargs
@@ -43,7 +42,7 @@ impl Model {
 
         // Load the model
         eprintln!("SETTING MODEL AT PATH: {:?}", model_path);
-        let model = LlamaModel::load_from_file(&backend, model_path, &model_params)?;
+        let model = LlamaModel::load_from_file(&BACKEND, model_path, &model_params)?;
         eprintln!("\nMODEL SET\n");
 
         // Get n_ctx if set in kwargs
@@ -58,11 +57,7 @@ impl Model {
             .unwrap_or_else(|| Ok(NonZeroU32::new(2048)))?
             .context("n_ctx must not be zero")?;
 
-        Ok(Model {
-            backend,
-            model,
-            n_ctx,
-        })
+        Ok(Model { model, n_ctx })
     }
 
     pub fn complete(&self, prompt: &str, max_new_tokens: usize) -> anyhow::Result<String> {
@@ -71,7 +66,7 @@ impl Model {
 
         let mut ctx = self
             .model
-            .new_context(&self.backend, ctx_params)
+            .new_context(&BACKEND, ctx_params)
             .with_context(|| "unable to create the llama_context")?;
 
         let tokens_list = self
diff --git a/src/worker.rs b/src/worker.rs
index a29cd94..641fdd5 100644
--- a/src/worker.rs
+++ b/src/worker.rs
@@ -142,7 +142,7 @@ impl Worker {
             .memory_backend
             .lock()
             .build_prompt(&request.params.text_document_position)?;
-        eprintln!("\n\n****************{}***************\n\n", prompt);
+        eprintln!("\nPROMPT*************\n{}\n************\n", prompt);
         let response = self.transformer_backend.do_generate(&prompt)?;
         let result = GenerateResult {
             generated_text: response.generated_text,
diff --git a/test.json b/test.json
deleted file mode 100644
index 58ac9e8..0000000
--- a/test.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-  "macos": {
-    "model_gguf": {
-      "repository": "deepseek-coder-6.7b-base",
-      "name": "Q4_K_M.gguf",
-      "fim": false,
-      "n_ctx": 2048,
-      "n_threads": 8,
-      "n_gpu_layers": 35
-    }
-  },
-  "linux": {
-    "model_gptq": {
-      "repository": "theblokesomething",
-      "name": "some q5 or something"
-    }
-  }
-}