mirror of
https://github.com/SilasMarvin/lsp-ai.git
synced 2025-12-18 23:14:28 +01:00
Rate limit vscode inlineCompletions
This commit is contained in:
@@ -40,6 +40,13 @@
|
|||||||
"type": "object",
|
"type": "object",
|
||||||
"default": {},
|
"default": {},
|
||||||
"description": "JSON configuration for LSP-AI generation"
|
"description": "JSON configuration for LSP-AI generation"
|
||||||
|
},
|
||||||
|
"lsp-ai.inlineCompletionConfiguration": {
|
||||||
|
"type": "object",
|
||||||
|
"default": {
|
||||||
|
"maxCompletionsPerSecond": 1
|
||||||
|
},
|
||||||
|
"description": "JSON configuration for LSP-AI generation"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -139,6 +139,9 @@ export function activate(context: vscode.ExtensionContext) {
|
|||||||
generationConfiguration = defaultGenerationConfiguration;
|
generationConfiguration = defaultGenerationConfiguration;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set the inlineCompletionConfiguration
|
||||||
|
const inlineCompletionConfiguration = vscode.workspace.getConfiguration("lsp-ai").inlineCompletionConfiguration;
|
||||||
|
|
||||||
const clientOptions: LanguageClientOptions = {
|
const clientOptions: LanguageClientOptions = {
|
||||||
documentSelector: [{ scheme: "file" }],
|
documentSelector: [{ scheme: "file" }],
|
||||||
initializationOptions: serverConfiguration
|
initializationOptions: serverConfiguration
|
||||||
@@ -177,6 +180,8 @@ export function activate(context: vscode.ExtensionContext) {
|
|||||||
context.subscriptions.push(vscode.commands.registerTextEditorCommand(generateCommand, generateCommandHandler));
|
context.subscriptions.push(vscode.commands.registerTextEditorCommand(generateCommand, generateCommandHandler));
|
||||||
|
|
||||||
// Register as an inline completion provider
|
// Register as an inline completion provider
|
||||||
|
let lastInlineCompletion = Date.now();
|
||||||
|
let inlineCompletionRequestCounter = 0;
|
||||||
vscode.languages.registerInlineCompletionItemProvider({ pattern: '**' },
|
vscode.languages.registerInlineCompletionItemProvider({ pattern: '**' },
|
||||||
{
|
{
|
||||||
provideInlineCompletionItems: async (document: vscode.TextDocument, position: vscode.Position) => {
|
provideInlineCompletionItems: async (document: vscode.TextDocument, position: vscode.Position) => {
|
||||||
@@ -188,8 +193,24 @@ export function activate(context: vscode.ExtensionContext) {
|
|||||||
model: generationConfiguration.model,
|
model: generationConfiguration.model,
|
||||||
parameters: generationConfiguration.parameters
|
parameters: generationConfiguration.parameters
|
||||||
};
|
};
|
||||||
|
|
||||||
|
inlineCompletionRequestCounter += 1;
|
||||||
|
let localInlineCompletionRequestCounter = inlineCompletionRequestCounter;
|
||||||
|
|
||||||
|
if ((Date.now() - lastInlineCompletion) / 1000 < 1 / inlineCompletionConfiguration["maxCompletionsPerSecond"]) {
|
||||||
|
await new Promise(r => setTimeout(r, ((1 / inlineCompletionConfiguration["maxCompletionsPerSecond"]) - ((Date.now() - lastInlineCompletion) / 1000 )) * 1000));
|
||||||
|
if (inlineCompletionRequestCounter == localInlineCompletionRequestCounter) {
|
||||||
|
lastInlineCompletion = Date.now();
|
||||||
const result = await client.sendRequest("textDocument/generation", params);
|
const result = await client.sendRequest("textDocument/generation", params);
|
||||||
return [new vscode.InlineCompletionItem(result["generatedText"])];
|
return [new vscode.InlineCompletionItem(result["generatedText"])];
|
||||||
|
} else {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
lastInlineCompletion = Date.now();
|
||||||
|
const result = await client.sendRequest("textDocument/generation", params);
|
||||||
|
return [new vscode.InlineCompletionItem(result["generatedText"])];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -5,6 +5,10 @@ use std::collections::HashMap;
|
|||||||
|
|
||||||
pub type Kwargs = HashMap<String, Value>;
|
pub type Kwargs = HashMap<String, Value>;
|
||||||
|
|
||||||
|
const fn max_requests_per_second_default() -> f32 {
|
||||||
|
1.
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Deserialize)]
|
#[derive(Debug, Clone, Deserialize)]
|
||||||
pub enum ValidMemoryBackend {
|
pub enum ValidMemoryBackend {
|
||||||
#[serde(rename = "file_store")]
|
#[serde(rename = "file_store")]
|
||||||
@@ -103,7 +107,7 @@ pub struct MistralFIM {
|
|||||||
pub fim_endpoint: Option<String>,
|
pub fim_endpoint: Option<String>,
|
||||||
// The model name
|
// The model name
|
||||||
pub model: String,
|
pub model: String,
|
||||||
#[serde(default = "api_max_requests_per_second_default")]
|
#[serde(default = "max_requests_per_second_default")]
|
||||||
pub max_requests_per_second: f32,
|
pub max_requests_per_second: f32,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -117,10 +121,8 @@ pub struct LLaMACPP {
|
|||||||
pub n_gpu_layers: u32,
|
pub n_gpu_layers: u32,
|
||||||
#[serde(default = "n_ctx_default")]
|
#[serde(default = "n_ctx_default")]
|
||||||
pub n_ctx: u32,
|
pub n_ctx: u32,
|
||||||
}
|
#[serde(default = "max_requests_per_second_default")]
|
||||||
|
pub max_requests_per_second: f32,
|
||||||
const fn api_max_requests_per_second_default() -> f32 {
|
|
||||||
0.5
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Deserialize)]
|
#[derive(Clone, Debug, Deserialize)]
|
||||||
@@ -134,7 +136,7 @@ pub struct OpenAI {
|
|||||||
// The chat endpoint
|
// The chat endpoint
|
||||||
pub chat_endpoint: Option<String>,
|
pub chat_endpoint: Option<String>,
|
||||||
// The maximum requests per second
|
// The maximum requests per second
|
||||||
#[serde(default = "api_max_requests_per_second_default")]
|
#[serde(default = "max_requests_per_second_default")]
|
||||||
pub max_requests_per_second: f32,
|
pub max_requests_per_second: f32,
|
||||||
// The model name
|
// The model name
|
||||||
pub model: String,
|
pub model: String,
|
||||||
@@ -151,7 +153,7 @@ pub struct Anthropic {
|
|||||||
// The chat endpoint
|
// The chat endpoint
|
||||||
pub chat_endpoint: Option<String>,
|
pub chat_endpoint: Option<String>,
|
||||||
// The maximum requests per second
|
// The maximum requests per second
|
||||||
#[serde(default = "api_max_requests_per_second_default")]
|
#[serde(default = "max_requests_per_second_default")]
|
||||||
pub max_requests_per_second: f32,
|
pub max_requests_per_second: f32,
|
||||||
// The model name
|
// The model name
|
||||||
pub model: String,
|
pub model: String,
|
||||||
@@ -233,7 +235,7 @@ impl Config {
|
|||||||
)
|
)
|
||||||
})? {
|
})? {
|
||||||
#[cfg(feature = "llama_cpp")]
|
#[cfg(feature = "llama_cpp")]
|
||||||
ValidModel::LLaMACPP(_) => Ok(1.),
|
ValidModel::LLaMACPP(llama_cpp) => Ok(llama_cpp.max_requests_per_second),
|
||||||
ValidModel::OpenAI(open_ai) => Ok(open_ai.max_requests_per_second),
|
ValidModel::OpenAI(open_ai) => Ok(open_ai.max_requests_per_second),
|
||||||
ValidModel::Anthropic(anthropic) => Ok(anthropic.max_requests_per_second),
|
ValidModel::Anthropic(anthropic) => Ok(anthropic.max_requests_per_second),
|
||||||
ValidModel::MistralFIM(mistral_fim) => Ok(mistral_fim.max_requests_per_second),
|
ValidModel::MistralFIM(mistral_fim) => Ok(mistral_fim.max_requests_per_second),
|
||||||
|
|||||||
@@ -124,6 +124,7 @@ fn do_run(
|
|||||||
let mut last_completion_request = None;
|
let mut last_completion_request = None;
|
||||||
|
|
||||||
let run_dispatch_request = |request| {
|
let run_dispatch_request = |request| {
|
||||||
|
eprintln!("DISPATCHING REQUEST");
|
||||||
let task_connection = connection.clone();
|
let task_connection = connection.clone();
|
||||||
let task_transformer_backends = transformer_backends.clone();
|
let task_transformer_backends = transformer_backends.clone();
|
||||||
let task_memory_backend_tx = memory_backend_tx.clone();
|
let task_memory_backend_tx = memory_backend_tx.clone();
|
||||||
|
|||||||
Reference in New Issue
Block a user