diff --git a/editors/vscode/package.json b/editors/vscode/package.json index a75d4d9..0e75093 100644 --- a/editors/vscode/package.json +++ b/editors/vscode/package.json @@ -40,6 +40,13 @@ "type": "object", "default": {}, "description": "JSON configuration for LSP-AI generation" + }, + "lsp-ai.inlineCompletionConfiguration": { + "type": "object", + "default": { + "maxCompletionsPerSecond": 1 + }, + "description": "JSON configuration for LSP-AI generation" } } } diff --git a/editors/vscode/src/index.ts b/editors/vscode/src/index.ts index 2f94da8..7e062c1 100644 --- a/editors/vscode/src/index.ts +++ b/editors/vscode/src/index.ts @@ -139,6 +139,9 @@ export function activate(context: vscode.ExtensionContext) { generationConfiguration = defaultGenerationConfiguration; } + // Set the inlineCompletionConfiguration + const inlineCompletionConfiguration = vscode.workspace.getConfiguration("lsp-ai").inlineCompletionConfiguration; + const clientOptions: LanguageClientOptions = { documentSelector: [{ scheme: "file" }], initializationOptions: serverConfiguration @@ -177,6 +180,8 @@ export function activate(context: vscode.ExtensionContext) { context.subscriptions.push(vscode.commands.registerTextEditorCommand(generateCommand, generateCommandHandler)); // Register as an inline completion provider + let lastInlineCompletion = Date.now(); + let inlineCompletionRequestCounter = 0; vscode.languages.registerInlineCompletionItemProvider({ pattern: '**' }, { provideInlineCompletionItems: async (document: vscode.TextDocument, position: vscode.Position) => { @@ -188,8 +193,24 @@ export function activate(context: vscode.ExtensionContext) { model: generationConfiguration.model, parameters: generationConfiguration.parameters }; - const result = await client.sendRequest("textDocument/generation", params); - return [new vscode.InlineCompletionItem(result["generatedText"])]; + + inlineCompletionRequestCounter += 1; + let localInlineCompletionRequestCounter = inlineCompletionRequestCounter; + + if ((Date.now() - lastInlineCompletion) / 1000 < 1 / inlineCompletionConfiguration["maxCompletionsPerSecond"]) { + await new Promise(r => setTimeout(r, ((1 / inlineCompletionConfiguration["maxCompletionsPerSecond"]) - ((Date.now() - lastInlineCompletion) / 1000 )) * 1000)); + if (inlineCompletionRequestCounter == localInlineCompletionRequestCounter) { + lastInlineCompletion = Date.now(); + const result = await client.sendRequest("textDocument/generation", params); + return [new vscode.InlineCompletionItem(result["generatedText"])]; + } else { + return []; + } + } else { + lastInlineCompletion = Date.now(); + const result = await client.sendRequest("textDocument/generation", params); + return [new vscode.InlineCompletionItem(result["generatedText"])]; + } } } ); diff --git a/src/config.rs b/src/config.rs index 3e8eda5..05db2bd 100644 --- a/src/config.rs +++ b/src/config.rs @@ -5,6 +5,10 @@ use std::collections::HashMap; pub type Kwargs = HashMap; +const fn max_requests_per_second_default() -> f32 { + 1. +} + #[derive(Debug, Clone, Deserialize)] pub enum ValidMemoryBackend { #[serde(rename = "file_store")] @@ -103,7 +107,7 @@ pub struct MistralFIM { pub fim_endpoint: Option, // The model name pub model: String, - #[serde(default = "api_max_requests_per_second_default")] + #[serde(default = "max_requests_per_second_default")] pub max_requests_per_second: f32, } @@ -117,10 +121,8 @@ pub struct LLaMACPP { pub n_gpu_layers: u32, #[serde(default = "n_ctx_default")] pub n_ctx: u32, -} - -const fn api_max_requests_per_second_default() -> f32 { - 0.5 + #[serde(default = "max_requests_per_second_default")] + pub max_requests_per_second: f32, } #[derive(Clone, Debug, Deserialize)] @@ -134,7 +136,7 @@ pub struct OpenAI { // The chat endpoint pub chat_endpoint: Option, // The maximum requests per second - #[serde(default = "api_max_requests_per_second_default")] + #[serde(default = "max_requests_per_second_default")] pub max_requests_per_second: f32, // The model name pub model: String, @@ -151,7 +153,7 @@ pub struct Anthropic { // The chat endpoint pub chat_endpoint: Option, // The maximum requests per second - #[serde(default = "api_max_requests_per_second_default")] + #[serde(default = "max_requests_per_second_default")] pub max_requests_per_second: f32, // The model name pub model: String, @@ -233,7 +235,7 @@ impl Config { ) })? { #[cfg(feature = "llama_cpp")] - ValidModel::LLaMACPP(_) => Ok(1.), + ValidModel::LLaMACPP(llama_cpp) => Ok(llama_cpp.max_requests_per_second), ValidModel::OpenAI(open_ai) => Ok(open_ai.max_requests_per_second), ValidModel::Anthropic(anthropic) => Ok(anthropic.max_requests_per_second), ValidModel::MistralFIM(mistral_fim) => Ok(mistral_fim.max_requests_per_second), diff --git a/src/transformer_worker.rs b/src/transformer_worker.rs index a6b184f..4979aa8 100644 --- a/src/transformer_worker.rs +++ b/src/transformer_worker.rs @@ -124,6 +124,7 @@ fn do_run( let mut last_completion_request = None; let run_dispatch_request = |request| { + eprintln!("DISPATCHING REQUEST"); let task_connection = connection.clone(); let task_transformer_backends = transformer_backends.clone(); let task_memory_backend_tx = memory_backend_tx.clone();