mirror of
https://github.com/aljazceru/goose.git
synced 2026-01-06 16:04:28 +01:00
feat: goose bench framework for functional and regression testing
Co-authored-by: Zaki Ali <zaki@squareup.com>
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -36,3 +36,6 @@ debug_*.txt
|
||||
# Generated files
|
||||
.docusaurus
|
||||
.cache-loader
|
||||
|
||||
# Benchmark paths
|
||||
benchmark-*
|
||||
|
||||
21
Cargo.lock
generated
21
Cargo.lock
generated
@@ -2199,11 +2199,31 @@ dependencies = [
|
||||
"wiremock",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "goose-bench"
|
||||
version = "1.0.10"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"chrono",
|
||||
"ctor",
|
||||
"goose",
|
||||
"mcp-core",
|
||||
"paste",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "goose-cli"
|
||||
version = "1.0.12"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"bat",
|
||||
"chrono",
|
||||
"clap",
|
||||
@@ -2212,6 +2232,7 @@ dependencies = [
|
||||
"etcetera",
|
||||
"futures",
|
||||
"goose",
|
||||
"goose-bench",
|
||||
"goose-mcp",
|
||||
"mcp-client",
|
||||
"mcp-core",
|
||||
|
||||
26
crates/goose-bench/Cargo.toml
Normal file
26
crates/goose-bench/Cargo.toml
Normal file
@@ -0,0 +1,26 @@
|
||||
[package]
|
||||
name = "goose-bench"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
authors.workspace = true
|
||||
license.workspace = true
|
||||
repository.workspace = true
|
||||
description.workspace = true
|
||||
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
paste = "1.0"
|
||||
ctor = "0.2.7"
|
||||
goose = { path = "../goose" }
|
||||
mcp-core = { path = "../mcp-core" }
|
||||
async-trait = "0.1.86"
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
serde_json = "1.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["registry"] }
|
||||
tokio = { version = "1.0", features = ["full"] }
|
||||
|
||||
[target.'cfg(target_os = "windows")'.dependencies]
|
||||
winapi = { version = "0.3", features = ["wincred"] }
|
||||
15
crates/goose-bench/src/assets/kubernetes.patch
Normal file
15
crates/goose-bench/src/assets/kubernetes.patch
Normal file
@@ -0,0 +1,15 @@
|
||||
diff --git a/kubernetes_swagger.json b/kubernetes_swagger.json
|
||||
index 3e11d92..859a63e 100644
|
||||
--- a/kubernetes_swagger.json
|
||||
+++ b/kubernetes_swagger.json
|
||||
@@ -371,8 +371,8 @@
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
- "io.k8s.api.admissionregistration.v1.ServiceReference": {
|
||||
- "description": "ServiceReference holds a reference to Service.legacy.k8s.io",
|
||||
+ "io.k8s.api.admissionregistration.v1.FakeServiceReference": {
|
||||
+ "description": "FakeServiceReference simulates a reference to a fake service for testing purposes.",
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "`name` is the name of the service. Required",
|
||||
86037
crates/goose-bench/src/assets/kubernetes_swagger.json
Normal file
86037
crates/goose-bench/src/assets/kubernetes_swagger.json
Normal file
File diff suppressed because it is too large
Load Diff
458
crates/goose-bench/src/assets/vscode.patch
Normal file
458
crates/goose-bench/src/assets/vscode.patch
Normal file
@@ -0,0 +1,458 @@
|
||||
diff --git a/vscode_config_registry.ts b/vscode_config_registry.ts
|
||||
index d2ba316..1834518 100644
|
||||
--- a/vscode_config_registry.ts
|
||||
+++ b/vscode_config_registry.ts
|
||||
@@ -23,68 +23,68 @@ export const Extensions = {
|
||||
Configuration: 'base.contributions.configuration'
|
||||
};
|
||||
|
||||
-export interface IConfigurationDelta {
|
||||
- removedDefaults?: IConfigurationDefaults[];
|
||||
- removedConfigurations?: IConfigurationNode[];
|
||||
- addedDefaults?: IConfigurationDefaults[];
|
||||
- addedConfigurations?: IConfigurationNode[];
|
||||
+export interface PConfigurationDelta {
|
||||
+ removedDefaults?: PConfigurationDefaults[];
|
||||
+ removedConfigurations?: PConfigurationNode[];
|
||||
+ addedDefaults?: PConfigurationDefaults[];
|
||||
+ addedConfigurations?: PConfigurationNode[];
|
||||
}
|
||||
|
||||
-export interface IConfigurationRegistry {
|
||||
+export interface PConfigurationRegistry {
|
||||
|
||||
/**
|
||||
* Register a configuration to the registry.
|
||||
*/
|
||||
- registerConfiguration(configuration: IConfigurationNode): void;
|
||||
+ registerConfiguration(configuration: PConfigurationNode): void;
|
||||
|
||||
/**
|
||||
* Register multiple configurations to the registry.
|
||||
*/
|
||||
- registerConfigurations(configurations: IConfigurationNode[], validate?: boolean): void;
|
||||
+ registerConfigurations(configurations: PConfigurationNode[], validate?: boolean): void;
|
||||
|
||||
/**
|
||||
* Deregister multiple configurations from the registry.
|
||||
*/
|
||||
- deregisterConfigurations(configurations: IConfigurationNode[]): void;
|
||||
+ deregisterConfigurations(configurations: PConfigurationNode[]): void;
|
||||
|
||||
/**
|
||||
* update the configuration registry by
|
||||
* - registering the configurations to add
|
||||
* - dereigstering the configurations to remove
|
||||
*/
|
||||
- updateConfigurations(configurations: { add: IConfigurationNode[]; remove: IConfigurationNode[] }): void;
|
||||
+ updateConfigurations(configurations: { add: PConfigurationNode[]; remove: PConfigurationNode[] }): void;
|
||||
|
||||
/**
|
||||
* Register multiple default configurations to the registry.
|
||||
*/
|
||||
- registerDefaultConfigurations(defaultConfigurations: IConfigurationDefaults[]): void;
|
||||
+ registerDefaultConfigurations(defaultConfigurations: PConfigurationDefaults[]): void;
|
||||
|
||||
/**
|
||||
* Deregister multiple default configurations from the registry.
|
||||
*/
|
||||
- deregisterDefaultConfigurations(defaultConfigurations: IConfigurationDefaults[]): void;
|
||||
+ deregisterDefaultConfigurations(defaultConfigurations: PConfigurationDefaults[]): void;
|
||||
|
||||
/**
|
||||
* Bulk update of the configuration registry (default and configurations, remove and add)
|
||||
* @param delta
|
||||
*/
|
||||
- deltaConfiguration(delta: IConfigurationDelta): void;
|
||||
+ deltaConfiguration(delta: PConfigurationDelta): void;
|
||||
|
||||
/**
|
||||
* Return the registered default configurations
|
||||
*/
|
||||
- getRegisteredDefaultConfigurations(): IConfigurationDefaults[];
|
||||
+ getRegisteredDefaultConfigurations(): PConfigurationDefaults[];
|
||||
|
||||
/**
|
||||
* Return the registered configuration defaults overrides
|
||||
*/
|
||||
- getConfigurationDefaultsOverrides(): Map<string, IConfigurationDefaultOverrideValue>;
|
||||
+ getConfigurationDefaultsOverrides(): Map<string, PConfigurationDefaultOverrideValue>;
|
||||
|
||||
/**
|
||||
* Signal that the schema of a configuration setting has changes. It is currently only supported to change enumeration values.
|
||||
* Property or default value changes are not allowed.
|
||||
*/
|
||||
- notifyConfigurationSchemaUpdated(...configurations: IConfigurationNode[]): void;
|
||||
+ notifyConfigurationSchemaUpdated(...configurations: PConfigurationNode[]): void;
|
||||
|
||||
/**
|
||||
* Event that fires whenever a configuration has been
|
||||
@@ -101,12 +101,12 @@ export interface IConfigurationRegistry {
|
||||
/**
|
||||
* Returns all configuration nodes contributed to this registry.
|
||||
*/
|
||||
- getConfigurations(): IConfigurationNode[];
|
||||
+ getConfigurations(): PConfigurationNode[];
|
||||
|
||||
/**
|
||||
* Returns all configurations settings of all configuration nodes contributed to this registry.
|
||||
*/
|
||||
- getConfigurationProperties(): IStringDictionary<IRegisteredConfigurationPropertySchema>;
|
||||
+ getConfigurationProperties(): IStringDictionary<PRegisteredConfigurationPropertySchema>;
|
||||
|
||||
/**
|
||||
* Return all configurations by policy name
|
||||
@@ -116,7 +116,7 @@ export interface IConfigurationRegistry {
|
||||
/**
|
||||
* Returns all excluded configurations settings of all configuration nodes contributed to this registry.
|
||||
*/
|
||||
- getExcludedConfigurationProperties(): IStringDictionary<IRegisteredConfigurationPropertySchema>;
|
||||
+ getExcludedConfigurationProperties(): IStringDictionary<PRegisteredConfigurationPropertySchema>;
|
||||
|
||||
/**
|
||||
* Register the identifiers for editor configurations
|
||||
@@ -168,7 +168,7 @@ export interface IPolicy {
|
||||
readonly minimumVersion: `${number}.${number}`;
|
||||
}
|
||||
|
||||
-export interface IConfigurationPropertySchema extends IJSONSchema {
|
||||
+export interface PConfigurationPropertySchema extends IJSONSchema {
|
||||
|
||||
scope?: ConfigurationScope;
|
||||
|
||||
@@ -235,14 +235,14 @@ export interface IExtensionInfo {
|
||||
displayName?: string;
|
||||
}
|
||||
|
||||
-export interface IConfigurationNode {
|
||||
+export interface PConfigurationNode {
|
||||
id?: string;
|
||||
order?: number;
|
||||
type?: string | string[];
|
||||
title?: string;
|
||||
description?: string;
|
||||
- properties?: IStringDictionary<IConfigurationPropertySchema>;
|
||||
- allOf?: IConfigurationNode[];
|
||||
+ properties?: IStringDictionary<PConfigurationPropertySchema>;
|
||||
+ allOf?: PConfigurationNode[];
|
||||
scope?: ConfigurationScope;
|
||||
extensionInfo?: IExtensionInfo;
|
||||
restrictedProperties?: string[];
|
||||
@@ -250,49 +250,49 @@ export interface IConfigurationNode {
|
||||
|
||||
export type ConfigurationDefaultValueSource = IExtensionInfo | Map<string, IExtensionInfo>;
|
||||
|
||||
-export interface IConfigurationDefaults {
|
||||
+export interface PConfigurationDefaults {
|
||||
overrides: IStringDictionary<any>;
|
||||
source?: IExtensionInfo;
|
||||
}
|
||||
|
||||
-export type IRegisteredConfigurationPropertySchema = IConfigurationPropertySchema & {
|
||||
+export type PRegisteredConfigurationPropertySchema = PConfigurationPropertySchema & {
|
||||
defaultDefaultValue?: any;
|
||||
source?: IExtensionInfo; // Source of the Property
|
||||
defaultValueSource?: ConfigurationDefaultValueSource; // Source of the Default Value
|
||||
};
|
||||
|
||||
-export interface IConfigurationDefaultOverride {
|
||||
+export interface PConfigurationDefaultOverride {
|
||||
readonly value: any;
|
||||
readonly source?: IExtensionInfo; // Source of the default override
|
||||
}
|
||||
|
||||
-export interface IConfigurationDefaultOverrideValue {
|
||||
+export interface PConfigurationDefaultOverrideValue {
|
||||
readonly value: any;
|
||||
readonly source?: ConfigurationDefaultValueSource;
|
||||
}
|
||||
|
||||
-export const allSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
|
||||
-export const applicationSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
|
||||
-export const applicationMachineSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
|
||||
-export const machineSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
|
||||
-export const machineOverridableSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
|
||||
-export const windowSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
|
||||
-export const resourceSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
|
||||
+export const allSettings: { properties: IStringDictionary<PConfigurationPropertySchema>; patternProperties: IStringDictionary<PConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
|
||||
+export const applicationSettings: { properties: IStringDictionary<PConfigurationPropertySchema>; patternProperties: IStringDictionary<PConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
|
||||
+export const applicationMachineSettings: { properties: IStringDictionary<PConfigurationPropertySchema>; patternProperties: IStringDictionary<PConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
|
||||
+export const machineSettings: { properties: IStringDictionary<PConfigurationPropertySchema>; patternProperties: IStringDictionary<PConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
|
||||
+export const machineOverridableSettings: { properties: IStringDictionary<PConfigurationPropertySchema>; patternProperties: IStringDictionary<PConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
|
||||
+export const windowSettings: { properties: IStringDictionary<PConfigurationPropertySchema>; patternProperties: IStringDictionary<PConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
|
||||
+export const resourceSettings: { properties: IStringDictionary<PConfigurationPropertySchema>; patternProperties: IStringDictionary<PConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
|
||||
|
||||
export const resourceLanguageSettingsSchemaId = 'vscode://schemas/settings/resourceLanguage';
|
||||
export const configurationDefaultsSchemaId = 'vscode://schemas/settings/configurationDefaults';
|
||||
|
||||
const contributionRegistry = Registry.as<IJSONContributionRegistry>(JSONExtensions.JSONContribution);
|
||||
|
||||
-class ConfigurationRegistry implements IConfigurationRegistry {
|
||||
+class ConfigurationRegistry implements PConfigurationRegistry {
|
||||
|
||||
- private readonly registeredConfigurationDefaults: IConfigurationDefaults[] = [];
|
||||
- private readonly configurationDefaultsOverrides: Map<string, { configurationDefaultOverrides: IConfigurationDefaultOverride[]; configurationDefaultOverrideValue?: IConfigurationDefaultOverrideValue }>;
|
||||
- private readonly defaultLanguageConfigurationOverridesNode: IConfigurationNode;
|
||||
- private readonly configurationContributors: IConfigurationNode[];
|
||||
- private readonly configurationProperties: IStringDictionary<IRegisteredConfigurationPropertySchema>;
|
||||
+ private readonly registeredConfigurationDefaults: PConfigurationDefaults[] = [];
|
||||
+ private readonly configurationDefaultsOverrides: Map<string, { configurationDefaultOverrides: PConfigurationDefaultOverride[]; configurationDefaultOverrideValue?: PConfigurationDefaultOverrideValue }>;
|
||||
+ private readonly defaultLanguageConfigurationOverridesNode: PConfigurationNode;
|
||||
+ private readonly configurationContributors: PConfigurationNode[];
|
||||
+ private readonly configurationProperties: IStringDictionary<PRegisteredConfigurationPropertySchema>;
|
||||
private readonly policyConfigurations: Map<PolicyName, string>;
|
||||
- private readonly excludedConfigurationProperties: IStringDictionary<IRegisteredConfigurationPropertySchema>;
|
||||
+ private readonly excludedConfigurationProperties: IStringDictionary<PRegisteredConfigurationPropertySchema>;
|
||||
private readonly resourceLanguageSettingsSchema: IJSONSchema;
|
||||
private readonly overrideIdentifiers = new Set<string>();
|
||||
|
||||
@@ -325,11 +325,11 @@ class ConfigurationRegistry implements IConfigurationRegistry {
|
||||
this.registerOverridePropertyPatternKey();
|
||||
}
|
||||
|
||||
- public registerConfiguration(configuration: IConfigurationNode, validate: boolean = true): void {
|
||||
+ public registerConfiguration(configuration: PConfigurationNode, validate: boolean = true): void {
|
||||
this.registerConfigurations([configuration], validate);
|
||||
}
|
||||
|
||||
- public registerConfigurations(configurations: IConfigurationNode[], validate: boolean = true): void {
|
||||
+ public registerConfigurations(configurations: PConfigurationNode[], validate: boolean = true): void {
|
||||
const properties = new Set<string>();
|
||||
this.doRegisterConfigurations(configurations, validate, properties);
|
||||
|
||||
@@ -338,7 +338,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
|
||||
this._onDidUpdateConfiguration.fire({ properties });
|
||||
}
|
||||
|
||||
- public deregisterConfigurations(configurations: IConfigurationNode[]): void {
|
||||
+ public deregisterConfigurations(configurations: PConfigurationNode[]): void {
|
||||
const properties = new Set<string>();
|
||||
this.doDeregisterConfigurations(configurations, properties);
|
||||
|
||||
@@ -347,7 +347,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
|
||||
this._onDidUpdateConfiguration.fire({ properties });
|
||||
}
|
||||
|
||||
- public updateConfigurations({ add, remove }: { add: IConfigurationNode[]; remove: IConfigurationNode[] }): void {
|
||||
+ public updateConfigurations({ add, remove }: { add: PConfigurationNode[]; remove: PConfigurationNode[] }): void {
|
||||
const properties = new Set<string>();
|
||||
this.doDeregisterConfigurations(remove, properties);
|
||||
this.doRegisterConfigurations(add, false, properties);
|
||||
@@ -357,14 +357,14 @@ class ConfigurationRegistry implements IConfigurationRegistry {
|
||||
this._onDidUpdateConfiguration.fire({ properties });
|
||||
}
|
||||
|
||||
- public registerDefaultConfigurations(configurationDefaults: IConfigurationDefaults[]): void {
|
||||
+ public registerDefaultConfigurations(configurationDefaults: PConfigurationDefaults[]): void {
|
||||
const properties = new Set<string>();
|
||||
this.doRegisterDefaultConfigurations(configurationDefaults, properties);
|
||||
this._onDidSchemaChange.fire();
|
||||
this._onDidUpdateConfiguration.fire({ properties, defaultsOverrides: true });
|
||||
}
|
||||
|
||||
- private doRegisterDefaultConfigurations(configurationDefaults: IConfigurationDefaults[], bucket: Set<string>) {
|
||||
+ private doRegisterDefaultConfigurations(configurationDefaults: PConfigurationDefaults[], bucket: Set<string>) {
|
||||
|
||||
this.registeredConfigurationDefaults.push(...configurationDefaults);
|
||||
|
||||
@@ -413,14 +413,14 @@ class ConfigurationRegistry implements IConfigurationRegistry {
|
||||
this.doRegisterOverrideIdentifiers(overrideIdentifiers);
|
||||
}
|
||||
|
||||
- public deregisterDefaultConfigurations(defaultConfigurations: IConfigurationDefaults[]): void {
|
||||
+ public deregisterDefaultConfigurations(defaultConfigurations: PConfigurationDefaults[]): void {
|
||||
const properties = new Set<string>();
|
||||
this.doDeregisterDefaultConfigurations(defaultConfigurations, properties);
|
||||
this._onDidSchemaChange.fire();
|
||||
this._onDidUpdateConfiguration.fire({ properties, defaultsOverrides: true });
|
||||
}
|
||||
|
||||
- private doDeregisterDefaultConfigurations(defaultConfigurations: IConfigurationDefaults[], bucket: Set<string>): void {
|
||||
+ private doDeregisterDefaultConfigurations(defaultConfigurations: PConfigurationDefaults[], bucket: Set<string>): void {
|
||||
for (const defaultConfiguration of defaultConfigurations) {
|
||||
const index = this.registeredConfigurationDefaults.indexOf(defaultConfiguration);
|
||||
if (index !== -1) {
|
||||
@@ -447,7 +447,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
|
||||
}
|
||||
|
||||
if (OVERRIDE_PROPERTY_REGEX.test(key)) {
|
||||
- let configurationDefaultOverrideValue: IConfigurationDefaultOverrideValue | undefined;
|
||||
+ let configurationDefaultOverrideValue: PConfigurationDefaultOverrideValue | undefined;
|
||||
for (const configurationDefaultOverride of configurationDefaultOverridesForKey.configurationDefaultOverrides) {
|
||||
configurationDefaultOverrideValue = this.mergeDefaultConfigurationsForOverrideIdentifier(key, configurationDefaultOverride.value, configurationDefaultOverride.source, configurationDefaultOverrideValue);
|
||||
}
|
||||
@@ -460,7 +460,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
|
||||
delete this.defaultLanguageConfigurationOverridesNode.properties![key];
|
||||
}
|
||||
} else {
|
||||
- let configurationDefaultOverrideValue: IConfigurationDefaultOverrideValue | undefined;
|
||||
+ let configurationDefaultOverrideValue: PConfigurationDefaultOverrideValue | undefined;
|
||||
for (const configurationDefaultOverride of configurationDefaultOverridesForKey.configurationDefaultOverrides) {
|
||||
configurationDefaultOverrideValue = this.mergeDefaultConfigurationsForConfigurationProperty(key, configurationDefaultOverride.value, configurationDefaultOverride.source, configurationDefaultOverrideValue);
|
||||
}
|
||||
@@ -477,8 +477,8 @@ class ConfigurationRegistry implements IConfigurationRegistry {
|
||||
this.updateOverridePropertyPatternKey();
|
||||
}
|
||||
|
||||
- private updateDefaultOverrideProperty(key: string, newDefaultOverride: IConfigurationDefaultOverrideValue, source: IExtensionInfo | undefined): void {
|
||||
- const property: IRegisteredConfigurationPropertySchema = {
|
||||
+ private updateDefaultOverrideProperty(key: string, newDefaultOverride: PConfigurationDefaultOverrideValue, source: IExtensionInfo | undefined): void {
|
||||
+ const property: PRegisteredConfigurationPropertySchema = {
|
||||
type: 'object',
|
||||
default: newDefaultOverride.value,
|
||||
description: nls.localize('defaultLanguageConfiguration.description', "Configure settings to be overridden for the {0} language.", getLanguageTagSettingPlainKey(key)),
|
||||
@@ -491,7 +491,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
|
||||
this.defaultLanguageConfigurationOverridesNode.properties![key] = property;
|
||||
}
|
||||
|
||||
- private mergeDefaultConfigurationsForOverrideIdentifier(overrideIdentifier: string, configurationValueObject: IStringDictionary<any>, valueSource: IExtensionInfo | undefined, existingDefaultOverride: IConfigurationDefaultOverrideValue | undefined): IConfigurationDefaultOverrideValue | undefined {
|
||||
+ private mergeDefaultConfigurationsForOverrideIdentifier(overrideIdentifier: string, configurationValueObject: IStringDictionary<any>, valueSource: IExtensionInfo | undefined, existingDefaultOverride: PConfigurationDefaultOverrideValue | undefined): PConfigurationDefaultOverrideValue | undefined {
|
||||
const defaultValue = existingDefaultOverride?.value || {};
|
||||
const source = existingDefaultOverride?.source ?? new Map<string, IExtensionInfo>();
|
||||
|
||||
@@ -532,7 +532,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
|
||||
return { value: defaultValue, source };
|
||||
}
|
||||
|
||||
- private mergeDefaultConfigurationsForConfigurationProperty(propertyKey: string, value: any, valuesSource: IExtensionInfo | undefined, existingDefaultOverride: IConfigurationDefaultOverrideValue | undefined): IConfigurationDefaultOverrideValue | undefined {
|
||||
+ private mergeDefaultConfigurationsForConfigurationProperty(propertyKey: string, value: any, valuesSource: IExtensionInfo | undefined, existingDefaultOverride: PConfigurationDefaultOverrideValue | undefined): PConfigurationDefaultOverrideValue | undefined {
|
||||
const property = this.configurationProperties[propertyKey];
|
||||
const existingDefaultValue = existingDefaultOverride?.value ?? property?.defaultDefaultValue;
|
||||
let source: ConfigurationDefaultValueSource | undefined = valuesSource;
|
||||
@@ -564,7 +564,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
|
||||
return { value, source };
|
||||
}
|
||||
|
||||
- public deltaConfiguration(delta: IConfigurationDelta): void {
|
||||
+ public deltaConfiguration(delta: PConfigurationDelta): void {
|
||||
// defaults: remove
|
||||
let defaultsOverrides = false;
|
||||
const properties = new Set<string>();
|
||||
@@ -589,7 +589,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
|
||||
this._onDidUpdateConfiguration.fire({ properties, defaultsOverrides });
|
||||
}
|
||||
|
||||
- public notifyConfigurationSchemaUpdated(...configurations: IConfigurationNode[]) {
|
||||
+ public notifyConfigurationSchemaUpdated(...configurations: PConfigurationNode[]) {
|
||||
this._onDidSchemaChange.fire();
|
||||
}
|
||||
|
||||
@@ -605,7 +605,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
|
||||
this.updateOverridePropertyPatternKey();
|
||||
}
|
||||
|
||||
- private doRegisterConfigurations(configurations: IConfigurationNode[], validate: boolean, bucket: Set<string>): void {
|
||||
+ private doRegisterConfigurations(configurations: PConfigurationNode[], validate: boolean, bucket: Set<string>): void {
|
||||
|
||||
configurations.forEach(configuration => {
|
||||
|
||||
@@ -616,9 +616,9 @@ class ConfigurationRegistry implements IConfigurationRegistry {
|
||||
});
|
||||
}
|
||||
|
||||
- private doDeregisterConfigurations(configurations: IConfigurationNode[], bucket: Set<string>): void {
|
||||
+ private doDeregisterConfigurations(configurations: PConfigurationNode[], bucket: Set<string>): void {
|
||||
|
||||
- const deregisterConfiguration = (configuration: IConfigurationNode) => {
|
||||
+ const deregisterConfiguration = (configuration: PConfigurationNode) => {
|
||||
if (configuration.properties) {
|
||||
for (const key in configuration.properties) {
|
||||
bucket.add(key);
|
||||
@@ -641,12 +641,12 @@ class ConfigurationRegistry implements IConfigurationRegistry {
|
||||
}
|
||||
}
|
||||
|
||||
- private validateAndRegisterProperties(configuration: IConfigurationNode, validate: boolean = true, extensionInfo: IExtensionInfo | undefined, restrictedProperties: string[] | undefined, scope: ConfigurationScope = ConfigurationScope.WINDOW, bucket: Set<string>): void {
|
||||
+ private validateAndRegisterProperties(configuration: PConfigurationNode, validate: boolean = true, extensionInfo: IExtensionInfo | undefined, restrictedProperties: string[] | undefined, scope: ConfigurationScope = ConfigurationScope.WINDOW, bucket: Set<string>): void {
|
||||
scope = types.isUndefinedOrNull(configuration.scope) ? scope : configuration.scope;
|
||||
const properties = configuration.properties;
|
||||
if (properties) {
|
||||
for (const key in properties) {
|
||||
- const property: IRegisteredConfigurationPropertySchema = properties[key];
|
||||
+ const property: PRegisteredConfigurationPropertySchema = properties[key];
|
||||
if (validate && validateProperty(key, property)) {
|
||||
delete properties[key];
|
||||
continue;
|
||||
@@ -696,7 +696,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
|
||||
}
|
||||
|
||||
// TODO: @sandy081 - Remove this method and include required info in getConfigurationProperties
|
||||
- getConfigurations(): IConfigurationNode[] {
|
||||
+ getConfigurations(): PConfigurationNode[] {
|
||||
return this.configurationContributors;
|
||||
}
|
||||
|
||||
@@ -712,12 +712,12 @@ class ConfigurationRegistry implements IConfigurationRegistry {
|
||||
return this.excludedConfigurationProperties;
|
||||
}
|
||||
|
||||
- getRegisteredDefaultConfigurations(): IConfigurationDefaults[] {
|
||||
+ getRegisteredDefaultConfigurations(): PConfigurationDefaults[] {
|
||||
return [...this.registeredConfigurationDefaults];
|
||||
}
|
||||
|
||||
- getConfigurationDefaultsOverrides(): Map<string, IConfigurationDefaultOverrideValue> {
|
||||
- const configurationDefaultsOverrides = new Map<string, IConfigurationDefaultOverrideValue>();
|
||||
+ getConfigurationDefaultsOverrides(): Map<string, PConfigurationDefaultOverrideValue> {
|
||||
+ const configurationDefaultsOverrides = new Map<string, PConfigurationDefaultOverrideValue>();
|
||||
for (const [key, value] of this.configurationDefaultsOverrides) {
|
||||
if (value.configurationDefaultOverrideValue) {
|
||||
configurationDefaultsOverrides.set(key, value.configurationDefaultOverrideValue);
|
||||
@@ -726,8 +726,8 @@ class ConfigurationRegistry implements IConfigurationRegistry {
|
||||
return configurationDefaultsOverrides;
|
||||
}
|
||||
|
||||
- private registerJSONConfiguration(configuration: IConfigurationNode) {
|
||||
- const register = (configuration: IConfigurationNode) => {
|
||||
+ private registerJSONConfiguration(configuration: PConfigurationNode) {
|
||||
+ const register = (configuration: PConfigurationNode) => {
|
||||
const properties = configuration.properties;
|
||||
if (properties) {
|
||||
for (const key in properties) {
|
||||
@@ -740,7 +740,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
|
||||
register(configuration);
|
||||
}
|
||||
|
||||
- private updateSchema(key: string, property: IConfigurationPropertySchema): void {
|
||||
+ private updateSchema(key: string, property: PConfigurationPropertySchema): void {
|
||||
allSettings.properties[key] = property;
|
||||
switch (property.scope) {
|
||||
case ConfigurationScope.APPLICATION:
|
||||
@@ -768,7 +768,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
|
||||
}
|
||||
}
|
||||
|
||||
- private removeFromSchema(key: string, property: IConfigurationPropertySchema): void {
|
||||
+ private removeFromSchema(key: string, property: PConfigurationPropertySchema): void {
|
||||
delete allSettings.properties[key];
|
||||
switch (property.scope) {
|
||||
case ConfigurationScope.APPLICATION:
|
||||
@@ -831,7 +831,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
|
||||
this._onDidSchemaChange.fire();
|
||||
}
|
||||
|
||||
- private updatePropertyDefaultValue(key: string, property: IRegisteredConfigurationPropertySchema): void {
|
||||
+ private updatePropertyDefaultValue(key: string, property: PRegisteredConfigurationPropertySchema): void {
|
||||
const configurationdefaultOverride = this.configurationDefaultsOverrides.get(key)?.configurationDefaultOverrideValue;
|
||||
let defaultValue = undefined;
|
||||
let defaultSource = undefined;
|
||||
@@ -899,7 +899,7 @@ export function getDefaultValue(type: string | string[] | undefined) {
|
||||
const configurationRegistry = new ConfigurationRegistry();
|
||||
Registry.add(Extensions.Configuration, configurationRegistry);
|
||||
|
||||
-export function validateProperty(property: string, schema: IRegisteredConfigurationPropertySchema): string | null {
|
||||
+export function validateProperty(property: string, schema: PRegisteredConfigurationPropertySchema): string | null {
|
||||
if (!property.trim()) {
|
||||
return nls.localize('config.property.empty', "Cannot register an empty property");
|
||||
}
|
||||
@@ -926,8 +926,8 @@ export function getScopes(): [string, ConfigurationScope | undefined][] {
|
||||
return scopes;
|
||||
}
|
||||
|
||||
-export function getAllConfigurationProperties(configurationNode: IConfigurationNode[]): IStringDictionary<IRegisteredConfigurationPropertySchema> {
|
||||
- const result: IStringDictionary<IRegisteredConfigurationPropertySchema> = {};
|
||||
+export function getAllConfigurationProperties(configurationNode: PConfigurationNode[]): IStringDictionary<PRegisteredConfigurationPropertySchema> {
|
||||
+ const result: IStringDictionary<PRegisteredConfigurationPropertySchema> = {};
|
||||
for (const configuration of configurationNode) {
|
||||
const properties = configuration.properties;
|
||||
if (types.isObject(properties)) {
|
||||
960
crates/goose-bench/src/assets/vscode_config_registry.ts
Normal file
960
crates/goose-bench/src/assets/vscode_config_registry.ts
Normal file
@@ -0,0 +1,960 @@
|
||||
/*---------------------------------------------------------------------------------------------
|
||||
* Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License. See License.txt in the project root for license information.
|
||||
*--------------------------------------------------------------------------------------------*/
|
||||
|
||||
import { distinct } from '../../../base/common/arrays.js';
|
||||
import { IStringDictionary } from '../../../base/common/collections.js';
|
||||
import { Emitter, Event } from '../../../base/common/event.js';
|
||||
import { IJSONSchema } from '../../../base/common/jsonSchema.js';
|
||||
import * as types from '../../../base/common/types.js';
|
||||
import * as nls from '../../../nls.js';
|
||||
import { getLanguageTagSettingPlainKey } from './configuration.js';
|
||||
import { Extensions as JSONExtensions, IJSONContributionRegistry } from '../../jsonschemas/common/jsonContributionRegistry.js';
|
||||
import { PolicyName } from '../../policy/common/policy.js';
|
||||
import { Registry } from '../../registry/common/platform.js';
|
||||
|
||||
export enum EditPresentationTypes {
|
||||
Multiline = 'multilineText',
|
||||
Singleline = 'singlelineText'
|
||||
}
|
||||
|
||||
export const Extensions = {
|
||||
Configuration: 'base.contributions.configuration'
|
||||
};
|
||||
|
||||
export interface IConfigurationDelta {
|
||||
removedDefaults?: IConfigurationDefaults[];
|
||||
removedConfigurations?: IConfigurationNode[];
|
||||
addedDefaults?: IConfigurationDefaults[];
|
||||
addedConfigurations?: IConfigurationNode[];
|
||||
}
|
||||
|
||||
export interface IConfigurationRegistry {
|
||||
|
||||
/**
|
||||
* Register a configuration to the registry.
|
||||
*/
|
||||
registerConfiguration(configuration: IConfigurationNode): void;
|
||||
|
||||
/**
|
||||
* Register multiple configurations to the registry.
|
||||
*/
|
||||
registerConfigurations(configurations: IConfigurationNode[], validate?: boolean): void;
|
||||
|
||||
/**
|
||||
* Deregister multiple configurations from the registry.
|
||||
*/
|
||||
deregisterConfigurations(configurations: IConfigurationNode[]): void;
|
||||
|
||||
/**
|
||||
* update the configuration registry by
|
||||
* - registering the configurations to add
|
||||
* - dereigstering the configurations to remove
|
||||
*/
|
||||
updateConfigurations(configurations: { add: IConfigurationNode[]; remove: IConfigurationNode[] }): void;
|
||||
|
||||
/**
|
||||
* Register multiple default configurations to the registry.
|
||||
*/
|
||||
registerDefaultConfigurations(defaultConfigurations: IConfigurationDefaults[]): void;
|
||||
|
||||
/**
|
||||
* Deregister multiple default configurations from the registry.
|
||||
*/
|
||||
deregisterDefaultConfigurations(defaultConfigurations: IConfigurationDefaults[]): void;
|
||||
|
||||
/**
|
||||
* Bulk update of the configuration registry (default and configurations, remove and add)
|
||||
* @param delta
|
||||
*/
|
||||
deltaConfiguration(delta: IConfigurationDelta): void;
|
||||
|
||||
/**
|
||||
* Return the registered default configurations
|
||||
*/
|
||||
getRegisteredDefaultConfigurations(): IConfigurationDefaults[];
|
||||
|
||||
/**
|
||||
* Return the registered configuration defaults overrides
|
||||
*/
|
||||
getConfigurationDefaultsOverrides(): Map<string, IConfigurationDefaultOverrideValue>;
|
||||
|
||||
/**
|
||||
* Signal that the schema of a configuration setting has changes. It is currently only supported to change enumeration values.
|
||||
* Property or default value changes are not allowed.
|
||||
*/
|
||||
notifyConfigurationSchemaUpdated(...configurations: IConfigurationNode[]): void;
|
||||
|
||||
/**
|
||||
* Event that fires whenever a configuration has been
|
||||
* registered.
|
||||
*/
|
||||
readonly onDidSchemaChange: Event<void>;
|
||||
|
||||
/**
|
||||
* Event that fires whenever a configuration has been
|
||||
* registered.
|
||||
*/
|
||||
readonly onDidUpdateConfiguration: Event<{ properties: ReadonlySet<string>; defaultsOverrides?: boolean }>;
|
||||
|
||||
/**
|
||||
* Returns all configuration nodes contributed to this registry.
|
||||
*/
|
||||
getConfigurations(): IConfigurationNode[];
|
||||
|
||||
/**
|
||||
* Returns all configurations settings of all configuration nodes contributed to this registry.
|
||||
*/
|
||||
getConfigurationProperties(): IStringDictionary<IRegisteredConfigurationPropertySchema>;
|
||||
|
||||
/**
|
||||
* Return all configurations by policy name
|
||||
*/
|
||||
getPolicyConfigurations(): Map<PolicyName, string>;
|
||||
|
||||
/**
|
||||
* Returns all excluded configurations settings of all configuration nodes contributed to this registry.
|
||||
*/
|
||||
getExcludedConfigurationProperties(): IStringDictionary<IRegisteredConfigurationPropertySchema>;
|
||||
|
||||
/**
|
||||
* Register the identifiers for editor configurations
|
||||
*/
|
||||
registerOverrideIdentifiers(identifiers: string[]): void;
|
||||
}
|
||||
|
||||
export const enum ConfigurationScope {
|
||||
/**
|
||||
* Application specific configuration, which can be configured only in default profile user settings.
|
||||
*/
|
||||
APPLICATION = 1,
|
||||
/**
|
||||
* Machine specific configuration, which can be configured only in local and remote user settings.
|
||||
*/
|
||||
MACHINE,
|
||||
/**
|
||||
* An application machine specific configuration, which can be configured only in default profile user settings and remote user settings.
|
||||
*/
|
||||
APPLICATION_MACHINE,
|
||||
/**
|
||||
* Window specific configuration, which can be configured in the user or workspace settings.
|
||||
*/
|
||||
WINDOW,
|
||||
/**
|
||||
* Resource specific configuration, which can be configured in the user, workspace or folder settings.
|
||||
*/
|
||||
RESOURCE,
|
||||
/**
|
||||
* Resource specific configuration that can be configured in language specific settings
|
||||
*/
|
||||
LANGUAGE_OVERRIDABLE,
|
||||
/**
|
||||
* Machine specific configuration that can also be configured in workspace or folder settings.
|
||||
*/
|
||||
MACHINE_OVERRIDABLE,
|
||||
}
|
||||
|
||||
export interface IPolicy {
|
||||
|
||||
/**
|
||||
* The policy name.
|
||||
*/
|
||||
readonly name: PolicyName;
|
||||
|
||||
/**
|
||||
* The Code version in which this policy was introduced.
|
||||
*/
|
||||
readonly minimumVersion: `${number}.${number}`;
|
||||
}
|
||||
|
||||
export interface IConfigurationPropertySchema extends IJSONSchema {
|
||||
|
||||
scope?: ConfigurationScope;
|
||||
|
||||
/**
|
||||
* When restricted, value of this configuration will be read only from trusted sources.
|
||||
* For eg., If the workspace is not trusted, then the value of this configuration is not read from workspace settings file.
|
||||
*/
|
||||
restricted?: boolean;
|
||||
|
||||
/**
|
||||
* When `false` this property is excluded from the registry. Default is to include.
|
||||
*/
|
||||
included?: boolean;
|
||||
|
||||
/**
|
||||
* List of tags associated to the property.
|
||||
* - A tag can be used for filtering
|
||||
* - Use `experimental` tag for marking the setting as experimental.
|
||||
* - Use `onExP` tag for marking that the default of the setting can be changed by running experiments.
|
||||
*/
|
||||
tags?: string[];
|
||||
|
||||
/**
|
||||
* When enabled this setting is ignored during sync and user can override this.
|
||||
*/
|
||||
ignoreSync?: boolean;
|
||||
|
||||
/**
|
||||
* When enabled this setting is ignored during sync and user cannot override this.
|
||||
*/
|
||||
disallowSyncIgnore?: boolean;
|
||||
|
||||
/**
|
||||
* Disallow extensions to contribute configuration default value for this setting.
|
||||
*/
|
||||
disallowConfigurationDefault?: boolean;
|
||||
|
||||
/**
|
||||
* Labels for enumeration items
|
||||
*/
|
||||
enumItemLabels?: string[];
|
||||
|
||||
/**
|
||||
* When specified, controls the presentation format of string settings.
|
||||
* Otherwise, the presentation format defaults to `singleline`.
|
||||
*/
|
||||
editPresentation?: EditPresentationTypes;
|
||||
|
||||
/**
|
||||
* When specified, gives an order number for the setting
|
||||
* within the settings editor. Otherwise, the setting is placed at the end.
|
||||
*/
|
||||
order?: number;
|
||||
|
||||
/**
|
||||
* When specified, this setting's value can always be overwritten by
|
||||
* a system-wide policy.
|
||||
*/
|
||||
policy?: IPolicy;
|
||||
}
|
||||
|
||||
export interface IExtensionInfo {
|
||||
id: string;
|
||||
displayName?: string;
|
||||
}
|
||||
|
||||
export interface IConfigurationNode {
|
||||
id?: string;
|
||||
order?: number;
|
||||
type?: string | string[];
|
||||
title?: string;
|
||||
description?: string;
|
||||
properties?: IStringDictionary<IConfigurationPropertySchema>;
|
||||
allOf?: IConfigurationNode[];
|
||||
scope?: ConfigurationScope;
|
||||
extensionInfo?: IExtensionInfo;
|
||||
restrictedProperties?: string[];
|
||||
}
|
||||
|
||||
export type ConfigurationDefaultValueSource = IExtensionInfo | Map<string, IExtensionInfo>;
|
||||
|
||||
export interface IConfigurationDefaults {
|
||||
overrides: IStringDictionary<any>;
|
||||
source?: IExtensionInfo;
|
||||
}
|
||||
|
||||
export type IRegisteredConfigurationPropertySchema = IConfigurationPropertySchema & {
|
||||
defaultDefaultValue?: any;
|
||||
source?: IExtensionInfo; // Source of the Property
|
||||
defaultValueSource?: ConfigurationDefaultValueSource; // Source of the Default Value
|
||||
};
|
||||
|
||||
export interface IConfigurationDefaultOverride {
|
||||
readonly value: any;
|
||||
readonly source?: IExtensionInfo; // Source of the default override
|
||||
}
|
||||
|
||||
export interface IConfigurationDefaultOverrideValue {
|
||||
readonly value: any;
|
||||
readonly source?: ConfigurationDefaultValueSource;
|
||||
}
|
||||
|
||||
export const allSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
|
||||
export const applicationSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
|
||||
export const applicationMachineSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
|
||||
export const machineSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
|
||||
export const machineOverridableSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
|
||||
export const windowSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
|
||||
export const resourceSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
|
||||
|
||||
export const resourceLanguageSettingsSchemaId = 'vscode://schemas/settings/resourceLanguage';
|
||||
export const configurationDefaultsSchemaId = 'vscode://schemas/settings/configurationDefaults';
|
||||
|
||||
const contributionRegistry = Registry.as<IJSONContributionRegistry>(JSONExtensions.JSONContribution);
|
||||
|
||||
class ConfigurationRegistry implements IConfigurationRegistry {
|
||||
|
||||
private readonly registeredConfigurationDefaults: IConfigurationDefaults[] = [];
|
||||
private readonly configurationDefaultsOverrides: Map<string, { configurationDefaultOverrides: IConfigurationDefaultOverride[]; configurationDefaultOverrideValue?: IConfigurationDefaultOverrideValue }>;
|
||||
private readonly defaultLanguageConfigurationOverridesNode: IConfigurationNode;
|
||||
private readonly configurationContributors: IConfigurationNode[];
|
||||
private readonly configurationProperties: IStringDictionary<IRegisteredConfigurationPropertySchema>;
|
||||
private readonly policyConfigurations: Map<PolicyName, string>;
|
||||
private readonly excludedConfigurationProperties: IStringDictionary<IRegisteredConfigurationPropertySchema>;
|
||||
private readonly resourceLanguageSettingsSchema: IJSONSchema;
|
||||
private readonly overrideIdentifiers = new Set<string>();
|
||||
|
||||
private readonly _onDidSchemaChange = new Emitter<void>();
|
||||
readonly onDidSchemaChange: Event<void> = this._onDidSchemaChange.event;
|
||||
|
||||
private readonly _onDidUpdateConfiguration = new Emitter<{ properties: ReadonlySet<string>; defaultsOverrides?: boolean }>();
|
||||
readonly onDidUpdateConfiguration = this._onDidUpdateConfiguration.event;
|
||||
|
||||
constructor() {
|
||||
this.configurationDefaultsOverrides = new Map();
|
||||
this.defaultLanguageConfigurationOverridesNode = {
|
||||
id: 'defaultOverrides',
|
||||
title: nls.localize('defaultLanguageConfigurationOverrides.title', "Default Language Configuration Overrides"),
|
||||
properties: {}
|
||||
};
|
||||
this.configurationContributors = [this.defaultLanguageConfigurationOverridesNode];
|
||||
this.resourceLanguageSettingsSchema = {
|
||||
properties: {},
|
||||
patternProperties: {},
|
||||
additionalProperties: true,
|
||||
allowTrailingCommas: true,
|
||||
allowComments: true
|
||||
};
|
||||
this.configurationProperties = {};
|
||||
this.policyConfigurations = new Map<PolicyName, string>();
|
||||
this.excludedConfigurationProperties = {};
|
||||
|
||||
contributionRegistry.registerSchema(resourceLanguageSettingsSchemaId, this.resourceLanguageSettingsSchema);
|
||||
this.registerOverridePropertyPatternKey();
|
||||
}
|
||||
|
||||
public registerConfiguration(configuration: IConfigurationNode, validate: boolean = true): void {
|
||||
this.registerConfigurations([configuration], validate);
|
||||
}
|
||||
|
||||
public registerConfigurations(configurations: IConfigurationNode[], validate: boolean = true): void {
|
||||
const properties = new Set<string>();
|
||||
this.doRegisterConfigurations(configurations, validate, properties);
|
||||
|
||||
contributionRegistry.registerSchema(resourceLanguageSettingsSchemaId, this.resourceLanguageSettingsSchema);
|
||||
this._onDidSchemaChange.fire();
|
||||
this._onDidUpdateConfiguration.fire({ properties });
|
||||
}
|
||||
|
||||
public deregisterConfigurations(configurations: IConfigurationNode[]): void {
|
||||
const properties = new Set<string>();
|
||||
this.doDeregisterConfigurations(configurations, properties);
|
||||
|
||||
contributionRegistry.registerSchema(resourceLanguageSettingsSchemaId, this.resourceLanguageSettingsSchema);
|
||||
this._onDidSchemaChange.fire();
|
||||
this._onDidUpdateConfiguration.fire({ properties });
|
||||
}
|
||||
|
||||
public updateConfigurations({ add, remove }: { add: IConfigurationNode[]; remove: IConfigurationNode[] }): void {
|
||||
const properties = new Set<string>();
|
||||
this.doDeregisterConfigurations(remove, properties);
|
||||
this.doRegisterConfigurations(add, false, properties);
|
||||
|
||||
contributionRegistry.registerSchema(resourceLanguageSettingsSchemaId, this.resourceLanguageSettingsSchema);
|
||||
this._onDidSchemaChange.fire();
|
||||
this._onDidUpdateConfiguration.fire({ properties });
|
||||
}
|
||||
|
||||
public registerDefaultConfigurations(configurationDefaults: IConfigurationDefaults[]): void {
|
||||
const properties = new Set<string>();
|
||||
this.doRegisterDefaultConfigurations(configurationDefaults, properties);
|
||||
this._onDidSchemaChange.fire();
|
||||
this._onDidUpdateConfiguration.fire({ properties, defaultsOverrides: true });
|
||||
}
|
||||
|
||||
private doRegisterDefaultConfigurations(configurationDefaults: IConfigurationDefaults[], bucket: Set<string>) {
|
||||
|
||||
this.registeredConfigurationDefaults.push(...configurationDefaults);
|
||||
|
||||
const overrideIdentifiers: string[] = [];
|
||||
|
||||
for (const { overrides, source } of configurationDefaults) {
|
||||
for (const key in overrides) {
|
||||
bucket.add(key);
|
||||
|
||||
const configurationDefaultOverridesForKey = this.configurationDefaultsOverrides.get(key)
|
||||
?? this.configurationDefaultsOverrides.set(key, { configurationDefaultOverrides: [] }).get(key)!;
|
||||
|
||||
const value = overrides[key];
|
||||
configurationDefaultOverridesForKey.configurationDefaultOverrides.push({ value, source });
|
||||
|
||||
// Configuration defaults for Override Identifiers
|
||||
if (OVERRIDE_PROPERTY_REGEX.test(key)) {
|
||||
const newDefaultOverride = this.mergeDefaultConfigurationsForOverrideIdentifier(key, value, source, configurationDefaultOverridesForKey.configurationDefaultOverrideValue);
|
||||
if (!newDefaultOverride) {
|
||||
continue;
|
||||
}
|
||||
|
||||
configurationDefaultOverridesForKey.configurationDefaultOverrideValue = newDefaultOverride;
|
||||
this.updateDefaultOverrideProperty(key, newDefaultOverride, source);
|
||||
overrideIdentifiers.push(...overrideIdentifiersFromKey(key));
|
||||
}
|
||||
|
||||
// Configuration defaults for Configuration Properties
|
||||
else {
|
||||
const newDefaultOverride = this.mergeDefaultConfigurationsForConfigurationProperty(key, value, source, configurationDefaultOverridesForKey.configurationDefaultOverrideValue);
|
||||
if (!newDefaultOverride) {
|
||||
continue;
|
||||
}
|
||||
|
||||
configurationDefaultOverridesForKey.configurationDefaultOverrideValue = newDefaultOverride;
|
||||
const property = this.configurationProperties[key];
|
||||
if (property) {
|
||||
this.updatePropertyDefaultValue(key, property);
|
||||
this.updateSchema(key, property);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
this.doRegisterOverrideIdentifiers(overrideIdentifiers);
|
||||
}
|
||||
|
||||
public deregisterDefaultConfigurations(defaultConfigurations: IConfigurationDefaults[]): void {
|
||||
const properties = new Set<string>();
|
||||
this.doDeregisterDefaultConfigurations(defaultConfigurations, properties);
|
||||
this._onDidSchemaChange.fire();
|
||||
this._onDidUpdateConfiguration.fire({ properties, defaultsOverrides: true });
|
||||
}
|
||||
|
||||
private doDeregisterDefaultConfigurations(defaultConfigurations: IConfigurationDefaults[], bucket: Set<string>): void {
|
||||
for (const defaultConfiguration of defaultConfigurations) {
|
||||
const index = this.registeredConfigurationDefaults.indexOf(defaultConfiguration);
|
||||
if (index !== -1) {
|
||||
this.registeredConfigurationDefaults.splice(index, 1);
|
||||
}
|
||||
}
|
||||
|
||||
for (const { overrides, source } of defaultConfigurations) {
|
||||
for (const key in overrides) {
|
||||
const configurationDefaultOverridesForKey = this.configurationDefaultsOverrides.get(key);
|
||||
if (!configurationDefaultOverridesForKey) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const index = configurationDefaultOverridesForKey.configurationDefaultOverrides
|
||||
.findIndex(configurationDefaultOverride => source ? configurationDefaultOverride.source?.id === source.id : configurationDefaultOverride.value === overrides[key]);
|
||||
if (index === -1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
configurationDefaultOverridesForKey.configurationDefaultOverrides.splice(index, 1);
|
||||
if (configurationDefaultOverridesForKey.configurationDefaultOverrides.length === 0) {
|
||||
this.configurationDefaultsOverrides.delete(key);
|
||||
}
|
||||
|
||||
if (OVERRIDE_PROPERTY_REGEX.test(key)) {
|
||||
let configurationDefaultOverrideValue: IConfigurationDefaultOverrideValue | undefined;
|
||||
for (const configurationDefaultOverride of configurationDefaultOverridesForKey.configurationDefaultOverrides) {
|
||||
configurationDefaultOverrideValue = this.mergeDefaultConfigurationsForOverrideIdentifier(key, configurationDefaultOverride.value, configurationDefaultOverride.source, configurationDefaultOverrideValue);
|
||||
}
|
||||
if (configurationDefaultOverrideValue && !types.isEmptyObject(configurationDefaultOverrideValue.value)) {
|
||||
configurationDefaultOverridesForKey.configurationDefaultOverrideValue = configurationDefaultOverrideValue;
|
||||
this.updateDefaultOverrideProperty(key, configurationDefaultOverrideValue, source);
|
||||
} else {
|
||||
this.configurationDefaultsOverrides.delete(key);
|
||||
delete this.configurationProperties[key];
|
||||
delete this.defaultLanguageConfigurationOverridesNode.properties![key];
|
||||
}
|
||||
} else {
|
||||
let configurationDefaultOverrideValue: IConfigurationDefaultOverrideValue | undefined;
|
||||
for (const configurationDefaultOverride of configurationDefaultOverridesForKey.configurationDefaultOverrides) {
|
||||
configurationDefaultOverrideValue = this.mergeDefaultConfigurationsForConfigurationProperty(key, configurationDefaultOverride.value, configurationDefaultOverride.source, configurationDefaultOverrideValue);
|
||||
}
|
||||
configurationDefaultOverridesForKey.configurationDefaultOverrideValue = configurationDefaultOverrideValue;
|
||||
const property = this.configurationProperties[key];
|
||||
if (property) {
|
||||
this.updatePropertyDefaultValue(key, property);
|
||||
this.updateSchema(key, property);
|
||||
}
|
||||
}
|
||||
bucket.add(key);
|
||||
}
|
||||
}
|
||||
this.updateOverridePropertyPatternKey();
|
||||
}
|
||||
|
||||
private updateDefaultOverrideProperty(key: string, newDefaultOverride: IConfigurationDefaultOverrideValue, source: IExtensionInfo | undefined): void {
|
||||
const property: IRegisteredConfigurationPropertySchema = {
|
||||
type: 'object',
|
||||
default: newDefaultOverride.value,
|
||||
description: nls.localize('defaultLanguageConfiguration.description', "Configure settings to be overridden for the {0} language.", getLanguageTagSettingPlainKey(key)),
|
||||
$ref: resourceLanguageSettingsSchemaId,
|
||||
defaultDefaultValue: newDefaultOverride.value,
|
||||
source,
|
||||
defaultValueSource: source
|
||||
};
|
||||
this.configurationProperties[key] = property;
|
||||
this.defaultLanguageConfigurationOverridesNode.properties![key] = property;
|
||||
}
|
||||
|
||||
private mergeDefaultConfigurationsForOverrideIdentifier(overrideIdentifier: string, configurationValueObject: IStringDictionary<any>, valueSource: IExtensionInfo | undefined, existingDefaultOverride: IConfigurationDefaultOverrideValue | undefined): IConfigurationDefaultOverrideValue | undefined {
|
||||
const defaultValue = existingDefaultOverride?.value || {};
|
||||
const source = existingDefaultOverride?.source ?? new Map<string, IExtensionInfo>();
|
||||
|
||||
// This should not happen
|
||||
if (!(source instanceof Map)) {
|
||||
console.error('objectConfigurationSources is not a Map');
|
||||
return undefined;
|
||||
}
|
||||
|
||||
for (const propertyKey of Object.keys(configurationValueObject)) {
|
||||
const propertyDefaultValue = configurationValueObject[propertyKey];
|
||||
|
||||
const isObjectSetting = types.isObject(propertyDefaultValue) &&
|
||||
(types.isUndefined(defaultValue[propertyKey]) || types.isObject(defaultValue[propertyKey]));
|
||||
|
||||
// If the default value is an object, merge the objects and store the source of each keys
|
||||
if (isObjectSetting) {
|
||||
defaultValue[propertyKey] = { ...(defaultValue[propertyKey] ?? {}), ...propertyDefaultValue };
|
||||
// Track the source of each value in the object
|
||||
if (valueSource) {
|
||||
for (const objectKey in propertyDefaultValue) {
|
||||
source.set(`${propertyKey}.${objectKey}`, valueSource);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Primitive values are overridden
|
||||
else {
|
||||
defaultValue[propertyKey] = propertyDefaultValue;
|
||||
if (valueSource) {
|
||||
source.set(propertyKey, valueSource);
|
||||
} else {
|
||||
source.delete(propertyKey);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { value: defaultValue, source };
|
||||
}
|
||||
|
||||
private mergeDefaultConfigurationsForConfigurationProperty(propertyKey: string, value: any, valuesSource: IExtensionInfo | undefined, existingDefaultOverride: IConfigurationDefaultOverrideValue | undefined): IConfigurationDefaultOverrideValue | undefined {
|
||||
const property = this.configurationProperties[propertyKey];
|
||||
const existingDefaultValue = existingDefaultOverride?.value ?? property?.defaultDefaultValue;
|
||||
let source: ConfigurationDefaultValueSource | undefined = valuesSource;
|
||||
|
||||
const isObjectSetting = types.isObject(value) &&
|
||||
(
|
||||
property !== undefined && property.type === 'object' ||
|
||||
property === undefined && (types.isUndefined(existingDefaultValue) || types.isObject(existingDefaultValue))
|
||||
);
|
||||
|
||||
// If the default value is an object, merge the objects and store the source of each keys
|
||||
if (isObjectSetting) {
|
||||
source = existingDefaultOverride?.source ?? new Map<string, IExtensionInfo>();
|
||||
|
||||
// This should not happen
|
||||
if (!(source instanceof Map)) {
|
||||
console.error('defaultValueSource is not a Map');
|
||||
return undefined;
|
||||
}
|
||||
|
||||
for (const objectKey in value) {
|
||||
if (valuesSource) {
|
||||
source.set(`${propertyKey}.${objectKey}`, valuesSource);
|
||||
}
|
||||
}
|
||||
value = { ...(types.isObject(existingDefaultValue) ? existingDefaultValue : {}), ...value };
|
||||
}
|
||||
|
||||
return { value, source };
|
||||
}
|
||||
|
||||
public deltaConfiguration(delta: IConfigurationDelta): void {
|
||||
// defaults: remove
|
||||
let defaultsOverrides = false;
|
||||
const properties = new Set<string>();
|
||||
if (delta.removedDefaults) {
|
||||
this.doDeregisterDefaultConfigurations(delta.removedDefaults, properties);
|
||||
defaultsOverrides = true;
|
||||
}
|
||||
// defaults: add
|
||||
if (delta.addedDefaults) {
|
||||
this.doRegisterDefaultConfigurations(delta.addedDefaults, properties);
|
||||
defaultsOverrides = true;
|
||||
}
|
||||
// configurations: remove
|
||||
if (delta.removedConfigurations) {
|
||||
this.doDeregisterConfigurations(delta.removedConfigurations, properties);
|
||||
}
|
||||
// configurations: add
|
||||
if (delta.addedConfigurations) {
|
||||
this.doRegisterConfigurations(delta.addedConfigurations, false, properties);
|
||||
}
|
||||
this._onDidSchemaChange.fire();
|
||||
this._onDidUpdateConfiguration.fire({ properties, defaultsOverrides });
|
||||
}
|
||||
|
||||
public notifyConfigurationSchemaUpdated(...configurations: IConfigurationNode[]) {
|
||||
this._onDidSchemaChange.fire();
|
||||
}
|
||||
|
||||
public registerOverrideIdentifiers(overrideIdentifiers: string[]): void {
|
||||
this.doRegisterOverrideIdentifiers(overrideIdentifiers);
|
||||
this._onDidSchemaChange.fire();
|
||||
}
|
||||
|
||||
private doRegisterOverrideIdentifiers(overrideIdentifiers: string[]) {
|
||||
for (const overrideIdentifier of overrideIdentifiers) {
|
||||
this.overrideIdentifiers.add(overrideIdentifier);
|
||||
}
|
||||
this.updateOverridePropertyPatternKey();
|
||||
}
|
||||
|
||||
private doRegisterConfigurations(configurations: IConfigurationNode[], validate: boolean, bucket: Set<string>): void {
|
||||
|
||||
configurations.forEach(configuration => {
|
||||
|
||||
this.validateAndRegisterProperties(configuration, validate, configuration.extensionInfo, configuration.restrictedProperties, undefined, bucket);
|
||||
|
||||
this.configurationContributors.push(configuration);
|
||||
this.registerJSONConfiguration(configuration);
|
||||
});
|
||||
}
|
||||
|
||||
private doDeregisterConfigurations(configurations: IConfigurationNode[], bucket: Set<string>): void {
|
||||
|
||||
const deregisterConfiguration = (configuration: IConfigurationNode) => {
|
||||
if (configuration.properties) {
|
||||
for (const key in configuration.properties) {
|
||||
bucket.add(key);
|
||||
const property = this.configurationProperties[key];
|
||||
if (property?.policy?.name) {
|
||||
this.policyConfigurations.delete(property.policy.name);
|
||||
}
|
||||
delete this.configurationProperties[key];
|
||||
this.removeFromSchema(key, configuration.properties[key]);
|
||||
}
|
||||
}
|
||||
configuration.allOf?.forEach(node => deregisterConfiguration(node));
|
||||
};
|
||||
for (const configuration of configurations) {
|
||||
deregisterConfiguration(configuration);
|
||||
const index = this.configurationContributors.indexOf(configuration);
|
||||
if (index !== -1) {
|
||||
this.configurationContributors.splice(index, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private validateAndRegisterProperties(configuration: IConfigurationNode, validate: boolean = true, extensionInfo: IExtensionInfo | undefined, restrictedProperties: string[] | undefined, scope: ConfigurationScope = ConfigurationScope.WINDOW, bucket: Set<string>): void {
|
||||
scope = types.isUndefinedOrNull(configuration.scope) ? scope : configuration.scope;
|
||||
const properties = configuration.properties;
|
||||
if (properties) {
|
||||
for (const key in properties) {
|
||||
const property: IRegisteredConfigurationPropertySchema = properties[key];
|
||||
if (validate && validateProperty(key, property)) {
|
||||
delete properties[key];
|
||||
continue;
|
||||
}
|
||||
|
||||
property.source = extensionInfo;
|
||||
|
||||
// update default value
|
||||
property.defaultDefaultValue = properties[key].default;
|
||||
this.updatePropertyDefaultValue(key, property);
|
||||
|
||||
// update scope
|
||||
if (OVERRIDE_PROPERTY_REGEX.test(key)) {
|
||||
property.scope = undefined; // No scope for overridable properties `[${identifier}]`
|
||||
} else {
|
||||
property.scope = types.isUndefinedOrNull(property.scope) ? scope : property.scope;
|
||||
property.restricted = types.isUndefinedOrNull(property.restricted) ? !!restrictedProperties?.includes(key) : property.restricted;
|
||||
}
|
||||
|
||||
// Add to properties maps
|
||||
// Property is included by default if 'included' is unspecified
|
||||
if (properties[key].hasOwnProperty('included') && !properties[key].included) {
|
||||
this.excludedConfigurationProperties[key] = properties[key];
|
||||
delete properties[key];
|
||||
continue;
|
||||
} else {
|
||||
this.configurationProperties[key] = properties[key];
|
||||
if (properties[key].policy?.name) {
|
||||
this.policyConfigurations.set(properties[key].policy!.name, key);
|
||||
}
|
||||
}
|
||||
|
||||
if (!properties[key].deprecationMessage && properties[key].markdownDeprecationMessage) {
|
||||
// If not set, default deprecationMessage to the markdown source
|
||||
properties[key].deprecationMessage = properties[key].markdownDeprecationMessage;
|
||||
}
|
||||
|
||||
bucket.add(key);
|
||||
}
|
||||
}
|
||||
const subNodes = configuration.allOf;
|
||||
if (subNodes) {
|
||||
for (const node of subNodes) {
|
||||
this.validateAndRegisterProperties(node, validate, extensionInfo, restrictedProperties, scope, bucket);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: @sandy081 - Remove this method and include required info in getConfigurationProperties
|
||||
getConfigurations(): IConfigurationNode[] {
|
||||
return this.configurationContributors;
|
||||
}
|
||||
|
||||
getConfigurationProperties(): IStringDictionary<IRegisteredConfigurationPropertySchema> {
|
||||
return this.configurationProperties;
|
||||
}
|
||||
|
||||
getPolicyConfigurations(): Map<PolicyName, string> {
|
||||
return this.policyConfigurations;
|
||||
}
|
||||
|
||||
getExcludedConfigurationProperties(): IStringDictionary<IRegisteredConfigurationPropertySchema> {
|
||||
return this.excludedConfigurationProperties;
|
||||
}
|
||||
|
||||
getRegisteredDefaultConfigurations(): IConfigurationDefaults[] {
|
||||
return [...this.registeredConfigurationDefaults];
|
||||
}
|
||||
|
||||
getConfigurationDefaultsOverrides(): Map<string, IConfigurationDefaultOverrideValue> {
|
||||
const configurationDefaultsOverrides = new Map<string, IConfigurationDefaultOverrideValue>();
|
||||
for (const [key, value] of this.configurationDefaultsOverrides) {
|
||||
if (value.configurationDefaultOverrideValue) {
|
||||
configurationDefaultsOverrides.set(key, value.configurationDefaultOverrideValue);
|
||||
}
|
||||
}
|
||||
return configurationDefaultsOverrides;
|
||||
}
|
||||
|
||||
private registerJSONConfiguration(configuration: IConfigurationNode) {
|
||||
const register = (configuration: IConfigurationNode) => {
|
||||
const properties = configuration.properties;
|
||||
if (properties) {
|
||||
for (const key in properties) {
|
||||
this.updateSchema(key, properties[key]);
|
||||
}
|
||||
}
|
||||
const subNodes = configuration.allOf;
|
||||
subNodes?.forEach(register);
|
||||
};
|
||||
register(configuration);
|
||||
}
|
||||
|
||||
private updateSchema(key: string, property: IConfigurationPropertySchema): void {
|
||||
allSettings.properties[key] = property;
|
||||
switch (property.scope) {
|
||||
case ConfigurationScope.APPLICATION:
|
||||
applicationSettings.properties[key] = property;
|
||||
break;
|
||||
case ConfigurationScope.MACHINE:
|
||||
machineSettings.properties[key] = property;
|
||||
break;
|
||||
case ConfigurationScope.APPLICATION_MACHINE:
|
||||
applicationMachineSettings.properties[key] = property;
|
||||
break;
|
||||
case ConfigurationScope.MACHINE_OVERRIDABLE:
|
||||
machineOverridableSettings.properties[key] = property;
|
||||
break;
|
||||
case ConfigurationScope.WINDOW:
|
||||
windowSettings.properties[key] = property;
|
||||
break;
|
||||
case ConfigurationScope.RESOURCE:
|
||||
resourceSettings.properties[key] = property;
|
||||
break;
|
||||
case ConfigurationScope.LANGUAGE_OVERRIDABLE:
|
||||
resourceSettings.properties[key] = property;
|
||||
this.resourceLanguageSettingsSchema.properties![key] = property;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private removeFromSchema(key: string, property: IConfigurationPropertySchema): void {
|
||||
delete allSettings.properties[key];
|
||||
switch (property.scope) {
|
||||
case ConfigurationScope.APPLICATION:
|
||||
delete applicationSettings.properties[key];
|
||||
break;
|
||||
case ConfigurationScope.MACHINE:
|
||||
delete machineSettings.properties[key];
|
||||
break;
|
||||
case ConfigurationScope.APPLICATION_MACHINE:
|
||||
delete applicationMachineSettings.properties[key];
|
||||
break;
|
||||
case ConfigurationScope.MACHINE_OVERRIDABLE:
|
||||
delete machineOverridableSettings.properties[key];
|
||||
break;
|
||||
case ConfigurationScope.WINDOW:
|
||||
delete windowSettings.properties[key];
|
||||
break;
|
||||
case ConfigurationScope.RESOURCE:
|
||||
case ConfigurationScope.LANGUAGE_OVERRIDABLE:
|
||||
delete resourceSettings.properties[key];
|
||||
delete this.resourceLanguageSettingsSchema.properties![key];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private updateOverridePropertyPatternKey(): void {
|
||||
for (const overrideIdentifier of this.overrideIdentifiers.values()) {
|
||||
const overrideIdentifierProperty = `[${overrideIdentifier}]`;
|
||||
const resourceLanguagePropertiesSchema: IJSONSchema = {
|
||||
type: 'object',
|
||||
description: nls.localize('overrideSettings.defaultDescription', "Configure editor settings to be overridden for a language."),
|
||||
errorMessage: nls.localize('overrideSettings.errorMessage', "This setting does not support per-language configuration."),
|
||||
$ref: resourceLanguageSettingsSchemaId,
|
||||
};
|
||||
this.updatePropertyDefaultValue(overrideIdentifierProperty, resourceLanguagePropertiesSchema);
|
||||
allSettings.properties[overrideIdentifierProperty] = resourceLanguagePropertiesSchema;
|
||||
applicationSettings.properties[overrideIdentifierProperty] = resourceLanguagePropertiesSchema;
|
||||
applicationMachineSettings.properties[overrideIdentifierProperty] = resourceLanguagePropertiesSchema;
|
||||
machineSettings.properties[overrideIdentifierProperty] = resourceLanguagePropertiesSchema;
|
||||
machineOverridableSettings.properties[overrideIdentifierProperty] = resourceLanguagePropertiesSchema;
|
||||
windowSettings.properties[overrideIdentifierProperty] = resourceLanguagePropertiesSchema;
|
||||
resourceSettings.properties[overrideIdentifierProperty] = resourceLanguagePropertiesSchema;
|
||||
}
|
||||
}
|
||||
|
||||
private registerOverridePropertyPatternKey(): void {
|
||||
const resourceLanguagePropertiesSchema: IJSONSchema = {
|
||||
type: 'object',
|
||||
description: nls.localize('overrideSettings.defaultDescription', "Configure editor settings to be overridden for a language."),
|
||||
errorMessage: nls.localize('overrideSettings.errorMessage', "This setting does not support per-language configuration."),
|
||||
$ref: resourceLanguageSettingsSchemaId,
|
||||
};
|
||||
allSettings.patternProperties[OVERRIDE_PROPERTY_PATTERN] = resourceLanguagePropertiesSchema;
|
||||
applicationSettings.patternProperties[OVERRIDE_PROPERTY_PATTERN] = resourceLanguagePropertiesSchema;
|
||||
applicationMachineSettings.patternProperties[OVERRIDE_PROPERTY_PATTERN] = resourceLanguagePropertiesSchema;
|
||||
machineSettings.patternProperties[OVERRIDE_PROPERTY_PATTERN] = resourceLanguagePropertiesSchema;
|
||||
machineOverridableSettings.patternProperties[OVERRIDE_PROPERTY_PATTERN] = resourceLanguagePropertiesSchema;
|
||||
windowSettings.patternProperties[OVERRIDE_PROPERTY_PATTERN] = resourceLanguagePropertiesSchema;
|
||||
resourceSettings.patternProperties[OVERRIDE_PROPERTY_PATTERN] = resourceLanguagePropertiesSchema;
|
||||
this._onDidSchemaChange.fire();
|
||||
}
|
||||
|
||||
private updatePropertyDefaultValue(key: string, property: IRegisteredConfigurationPropertySchema): void {
|
||||
const configurationdefaultOverride = this.configurationDefaultsOverrides.get(key)?.configurationDefaultOverrideValue;
|
||||
let defaultValue = undefined;
|
||||
let defaultSource = undefined;
|
||||
if (configurationdefaultOverride
|
||||
&& (!property.disallowConfigurationDefault || !configurationdefaultOverride.source) // Prevent overriding the default value if the property is disallowed to be overridden by configuration defaults from extensions
|
||||
) {
|
||||
defaultValue = configurationdefaultOverride.value;
|
||||
defaultSource = configurationdefaultOverride.source;
|
||||
}
|
||||
if (types.isUndefined(defaultValue)) {
|
||||
defaultValue = property.defaultDefaultValue;
|
||||
defaultSource = undefined;
|
||||
}
|
||||
if (types.isUndefined(defaultValue)) {
|
||||
defaultValue = getDefaultValue(property.type);
|
||||
}
|
||||
property.default = defaultValue;
|
||||
property.defaultValueSource = defaultSource;
|
||||
}
|
||||
}
|
||||
|
||||
const OVERRIDE_IDENTIFIER_PATTERN = `\\[([^\\]]+)\\]`;
|
||||
const OVERRIDE_IDENTIFIER_REGEX = new RegExp(OVERRIDE_IDENTIFIER_PATTERN, 'g');
|
||||
export const OVERRIDE_PROPERTY_PATTERN = `^(${OVERRIDE_IDENTIFIER_PATTERN})+$`;
|
||||
export const OVERRIDE_PROPERTY_REGEX = new RegExp(OVERRIDE_PROPERTY_PATTERN);
|
||||
|
||||
export function overrideIdentifiersFromKey(key: string): string[] {
|
||||
const identifiers: string[] = [];
|
||||
if (OVERRIDE_PROPERTY_REGEX.test(key)) {
|
||||
let matches = OVERRIDE_IDENTIFIER_REGEX.exec(key);
|
||||
while (matches?.length) {
|
||||
const identifier = matches[1].trim();
|
||||
if (identifier) {
|
||||
identifiers.push(identifier);
|
||||
}
|
||||
matches = OVERRIDE_IDENTIFIER_REGEX.exec(key);
|
||||
}
|
||||
}
|
||||
return distinct(identifiers);
|
||||
}
|
||||
|
||||
export function keyFromOverrideIdentifiers(overrideIdentifiers: string[]): string {
|
||||
return overrideIdentifiers.reduce((result, overrideIdentifier) => `${result}[${overrideIdentifier}]`, '');
|
||||
}
|
||||
|
||||
export function getDefaultValue(type: string | string[] | undefined) {
|
||||
const t = Array.isArray(type) ? (<string[]>type)[0] : <string>type;
|
||||
switch (t) {
|
||||
case 'boolean':
|
||||
return false;
|
||||
case 'integer':
|
||||
case 'number':
|
||||
return 0;
|
||||
case 'string':
|
||||
return '';
|
||||
case 'array':
|
||||
return [];
|
||||
case 'object':
|
||||
return {};
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
const configurationRegistry = new ConfigurationRegistry();
|
||||
Registry.add(Extensions.Configuration, configurationRegistry);
|
||||
|
||||
export function validateProperty(property: string, schema: IRegisteredConfigurationPropertySchema): string | null {
|
||||
if (!property.trim()) {
|
||||
return nls.localize('config.property.empty', "Cannot register an empty property");
|
||||
}
|
||||
if (OVERRIDE_PROPERTY_REGEX.test(property)) {
|
||||
return nls.localize('config.property.languageDefault', "Cannot register '{0}'. This matches property pattern '\\\\[.*\\\\]$' for describing language specific editor settings. Use 'configurationDefaults' contribution.", property);
|
||||
}
|
||||
if (configurationRegistry.getConfigurationProperties()[property] !== undefined) {
|
||||
return nls.localize('config.property.duplicate', "Cannot register '{0}'. This property is already registered.", property);
|
||||
}
|
||||
if (schema.policy?.name && configurationRegistry.getPolicyConfigurations().get(schema.policy?.name) !== undefined) {
|
||||
return nls.localize('config.policy.duplicate', "Cannot register '{0}'. The associated policy {1} is already registered with {2}.", property, schema.policy?.name, configurationRegistry.getPolicyConfigurations().get(schema.policy?.name));
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
export function getScopes(): [string, ConfigurationScope | undefined][] {
|
||||
const scopes: [string, ConfigurationScope | undefined][] = [];
|
||||
const configurationProperties = configurationRegistry.getConfigurationProperties();
|
||||
for (const key of Object.keys(configurationProperties)) {
|
||||
scopes.push([key, configurationProperties[key].scope]);
|
||||
}
|
||||
scopes.push(['launch', ConfigurationScope.RESOURCE]);
|
||||
scopes.push(['task', ConfigurationScope.RESOURCE]);
|
||||
return scopes;
|
||||
}
|
||||
|
||||
export function getAllConfigurationProperties(configurationNode: IConfigurationNode[]): IStringDictionary<IRegisteredConfigurationPropertySchema> {
|
||||
const result: IStringDictionary<IRegisteredConfigurationPropertySchema> = {};
|
||||
for (const configuration of configurationNode) {
|
||||
const properties = configuration.properties;
|
||||
if (types.isObject(properties)) {
|
||||
for (const key in properties) {
|
||||
result[key] = properties[key];
|
||||
}
|
||||
}
|
||||
if (configuration.allOf) {
|
||||
Object.assign(result, getAllConfigurationProperties(configuration.allOf));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
export function parseScope(scope: string): ConfigurationScope {
|
||||
switch (scope) {
|
||||
case 'application':
|
||||
return ConfigurationScope.APPLICATION;
|
||||
case 'machine':
|
||||
return ConfigurationScope.MACHINE;
|
||||
case 'resource':
|
||||
return ConfigurationScope.RESOURCE;
|
||||
case 'machine-overridable':
|
||||
return ConfigurationScope.MACHINE_OVERRIDABLE;
|
||||
case 'language-overridable':
|
||||
return ConfigurationScope.LANGUAGE_OVERRIDABLE;
|
||||
default:
|
||||
return ConfigurationScope.WINDOW;
|
||||
}
|
||||
}
|
||||
72
crates/goose-bench/src/error_capture.rs
Normal file
72
crates/goose-bench/src/error_capture.rs
Normal file
@@ -0,0 +1,72 @@
|
||||
use crate::eval_suites::BenchAgentError;
|
||||
use chrono::Utc;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
use tracing::{Event, Subscriber};
|
||||
use tracing_subscriber::layer::Context;
|
||||
use tracing_subscriber::Layer;
|
||||
|
||||
pub struct ErrorCaptureLayer {
|
||||
errors: Arc<Mutex<Vec<BenchAgentError>>>,
|
||||
}
|
||||
|
||||
impl ErrorCaptureLayer {
|
||||
pub fn new(errors: Arc<Mutex<Vec<BenchAgentError>>>) -> Self {
|
||||
Self { errors }
|
||||
}
|
||||
}
|
||||
|
||||
impl<S> Layer<S> for ErrorCaptureLayer
|
||||
where
|
||||
S: Subscriber,
|
||||
{
|
||||
fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) {
|
||||
// Only capture error and warning level events
|
||||
if *event.metadata().level() <= tracing::Level::WARN {
|
||||
let mut visitor = JsonVisitor::new();
|
||||
event.record(&mut visitor);
|
||||
|
||||
if let Some(message) = visitor.recorded_fields.get("message") {
|
||||
let error = BenchAgentError {
|
||||
message: message.to_string(),
|
||||
level: event.metadata().level().to_string(),
|
||||
timestamp: Utc::now(),
|
||||
};
|
||||
|
||||
let errors = self.errors.clone();
|
||||
tokio::spawn(async move {
|
||||
let mut errors = errors.lock().await;
|
||||
errors.push(error);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct JsonVisitor {
|
||||
recorded_fields: serde_json::Map<String, serde_json::Value>,
|
||||
}
|
||||
|
||||
impl JsonVisitor {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
recorded_fields: serde_json::Map::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl tracing::field::Visit for JsonVisitor {
|
||||
fn record_str(&mut self, field: &tracing::field::Field, value: &str) {
|
||||
self.recorded_fields.insert(
|
||||
field.name().to_string(),
|
||||
serde_json::Value::String(value.to_string()),
|
||||
);
|
||||
}
|
||||
|
||||
fn record_debug(&mut self, field: &tracing::field::Field, value: &dyn std::fmt::Debug) {
|
||||
self.recorded_fields.insert(
|
||||
field.name().to_string(),
|
||||
serde_json::Value::String(format!("{:?}", value)),
|
||||
);
|
||||
}
|
||||
}
|
||||
79
crates/goose-bench/src/eval_suites/core/create_file.rs
Normal file
79
crates/goose-bench/src/eval_suites/core/create_file.rs
Normal file
@@ -0,0 +1,79 @@
|
||||
// Create a new file called test.txt with the content 'Hello, World!
|
||||
|
||||
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric};
|
||||
use crate::register_evaluation;
|
||||
use crate::work_dir::WorkDir;
|
||||
use async_trait::async_trait;
|
||||
use goose::message::MessageContent;
|
||||
use mcp_core::role::Role;
|
||||
use serde_json::{self, Value};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DeveloperCreateFile {}
|
||||
|
||||
impl DeveloperCreateFile {
|
||||
pub fn new() -> Self {
|
||||
DeveloperCreateFile {}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Evaluation for DeveloperCreateFile {
|
||||
async fn run(
|
||||
&self,
|
||||
mut agent: Box<dyn BenchAgent>,
|
||||
_work_dir: &mut WorkDir,
|
||||
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
|
||||
let mut metrics = Vec::new();
|
||||
|
||||
// Send the prompt to list files
|
||||
let messages = agent.prompt("Create a new file called test.txt in the current directory with the content 'Hello, World!'. Then read the contents of the new file to confirm.".to_string()).await?;
|
||||
// println!("asdhflkahjsdflkasdfl");
|
||||
|
||||
let valid_tool_call = messages.iter().any(|msg| {
|
||||
// Check if it's an assistant message
|
||||
msg.role == Role::Assistant &&
|
||||
// Check if any content item is a tool request for creating a file
|
||||
msg.content.iter().any(|content| {
|
||||
if let MessageContent::ToolRequest(tool_req) = content {
|
||||
if let Ok(tool_call) = tool_req.tool_call.as_ref() {
|
||||
// Check tool name is correct
|
||||
if tool_call.name != "developer__text_editor" {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Parse the arguments as JSON
|
||||
if let Ok(args) = serde_json::from_value::<Value>(tool_call.arguments.clone()) {
|
||||
// Check all required parameters match exactly
|
||||
args.get("command").and_then(Value::as_str) == Some("write") &&
|
||||
args.get("path").and_then(Value::as_str).is_some_and(|s| s.contains("test.txt")) &&
|
||||
args.get("file_text").and_then(Value::as_str) == Some("Hello, World!")
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
})
|
||||
});
|
||||
|
||||
metrics.push((
|
||||
"Create files".to_string(),
|
||||
EvaluationMetric::Boolean(valid_tool_call),
|
||||
));
|
||||
Ok(metrics)
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"developer_create_read_file"
|
||||
}
|
||||
|
||||
fn required_extensions(&self) -> Vec<String> {
|
||||
vec!["developer".to_string()]
|
||||
}
|
||||
}
|
||||
|
||||
register_evaluation!("developer", DeveloperCreateFile);
|
||||
44
crates/goose-bench/src/eval_suites/core/example.rs
Normal file
44
crates/goose-bench/src/eval_suites/core/example.rs
Normal file
@@ -0,0 +1,44 @@
|
||||
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric};
|
||||
use crate::register_evaluation;
|
||||
use crate::work_dir::WorkDir;
|
||||
use async_trait::async_trait;
|
||||
// use std::fs;
|
||||
|
||||
pub struct ExampleEval {}
|
||||
|
||||
impl ExampleEval {
|
||||
pub fn new() -> Self {
|
||||
ExampleEval {}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Evaluation for ExampleEval {
|
||||
async fn run(
|
||||
&self,
|
||||
mut agent: Box<dyn BenchAgent>,
|
||||
_work_dir: &mut WorkDir,
|
||||
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
|
||||
println!("ExampleEval - run");
|
||||
// let f = work_dir.fs_get(String::from("./arbitrary_dir/arbitrary_file.txt"))?;
|
||||
// let _contents = fs::read_to_string(f)?;
|
||||
let mut metrics = Vec::new();
|
||||
let _ = agent.prompt("What can you do?".to_string()).await;
|
||||
metrics.push((
|
||||
"example_metric".to_string(),
|
||||
EvaluationMetric::Boolean(true),
|
||||
));
|
||||
metrics.push(("example_count".to_string(), EvaluationMetric::Integer(42)));
|
||||
Ok(metrics)
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"example_eval"
|
||||
}
|
||||
|
||||
fn required_extensions(&self) -> Vec<String> {
|
||||
Vec::new() // Example eval doesn't require any extensions
|
||||
}
|
||||
}
|
||||
|
||||
register_evaluation!("core", ExampleEval);
|
||||
96
crates/goose-bench/src/eval_suites/core/image.rs
Normal file
96
crates/goose-bench/src/eval_suites/core/image.rs
Normal file
@@ -0,0 +1,96 @@
|
||||
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric};
|
||||
use crate::register_evaluation;
|
||||
use crate::work_dir::WorkDir;
|
||||
use async_trait::async_trait;
|
||||
use goose::message::MessageContent;
|
||||
use mcp_core::content::Content;
|
||||
use mcp_core::role::Role;
|
||||
use serde_json::{self, Value};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DeveloperImage {}
|
||||
|
||||
impl DeveloperImage {
|
||||
pub fn new() -> Self {
|
||||
DeveloperImage {}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Evaluation for DeveloperImage {
|
||||
async fn run(
|
||||
&self,
|
||||
mut agent: Box<dyn BenchAgent>,
|
||||
_work_dir: &mut WorkDir,
|
||||
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
|
||||
let mut metrics = Vec::new();
|
||||
|
||||
// Send the prompt to list files
|
||||
let messages = agent
|
||||
.prompt("Take a screenshot of the display 0 and describe what you see.".to_string())
|
||||
.await?;
|
||||
|
||||
// Check if the assistant makes appropriate tool calls and gets valid responses
|
||||
let mut valid_tool_call = false;
|
||||
let mut valid_response = false;
|
||||
|
||||
for msg in messages.iter() {
|
||||
// Check for valid tool request
|
||||
if msg.role == Role::Assistant {
|
||||
for content in msg.content.iter() {
|
||||
if let MessageContent::ToolRequest(tool_req) = content {
|
||||
if let Ok(tool_call) = tool_req.tool_call.as_ref() {
|
||||
if let Ok(args) =
|
||||
serde_json::from_value::<Value>(tool_call.arguments.clone())
|
||||
{
|
||||
if tool_call.name == "developer__screen_capture"
|
||||
&& (args.get("display").and_then(Value::as_i64) == Some(0))
|
||||
{
|
||||
valid_tool_call = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for valid tool response
|
||||
if msg.role == Role::User && valid_tool_call {
|
||||
for content in msg.content.iter() {
|
||||
if let MessageContent::ToolResponse(tool_resp) = content {
|
||||
if let Ok(result) = &tool_resp.tool_result {
|
||||
// Check each item in the result list
|
||||
for item in result {
|
||||
if let Content::Image(image) = item {
|
||||
// Image content already contains mime_type and data
|
||||
if image.mime_type.starts_with("image/")
|
||||
&& !image.data.is_empty()
|
||||
{
|
||||
valid_response = true;
|
||||
break; // Found a valid image, no need to check further
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Both the tool call and response must be valid
|
||||
metrics.push((
|
||||
"Take a screenshot and upload images".to_string(),
|
||||
EvaluationMetric::Boolean(valid_tool_call && valid_response),
|
||||
));
|
||||
Ok(metrics)
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"developer_image"
|
||||
}
|
||||
|
||||
fn required_extensions(&self) -> Vec<String> {
|
||||
vec!["developer".to_string()]
|
||||
}
|
||||
}
|
||||
|
||||
register_evaluation!("developer_image", DeveloperImage);
|
||||
80
crates/goose-bench/src/eval_suites/core/list_files.rs
Normal file
80
crates/goose-bench/src/eval_suites/core/list_files.rs
Normal file
@@ -0,0 +1,80 @@
|
||||
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric};
|
||||
use crate::register_evaluation;
|
||||
use crate::work_dir::WorkDir;
|
||||
use async_trait::async_trait;
|
||||
use goose::message::MessageContent;
|
||||
use mcp_core::role::Role;
|
||||
use serde_json::{self, Value};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DeveloperListFiles {}
|
||||
|
||||
impl DeveloperListFiles {
|
||||
pub fn new() -> Self {
|
||||
DeveloperListFiles {}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Evaluation for DeveloperListFiles {
|
||||
async fn run(
|
||||
&self,
|
||||
mut agent: Box<dyn BenchAgent>,
|
||||
_work_dir: &mut WorkDir,
|
||||
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
|
||||
let mut metrics = Vec::new();
|
||||
|
||||
// Send the prompt to list files
|
||||
let messages = agent
|
||||
.prompt("list the files in the current directory".to_string())
|
||||
.await?;
|
||||
// println!("asdhflkahjsdflkasdfl");
|
||||
|
||||
// Check if the assistant makes appropriate tool calls
|
||||
let valid_tool_call = messages.iter().any(|msg| {
|
||||
// Check if it's an assistant message
|
||||
msg.role == Role::Assistant &&
|
||||
// Check if any content item is a tool request for listing files
|
||||
msg.content.iter().any(|content| {
|
||||
if let MessageContent::ToolRequest(tool_req) = content {
|
||||
// Check if the tool call is for shell with ls or rg --files
|
||||
if let Ok(tool_call) = tool_req.tool_call.as_ref() {
|
||||
// Parse arguments as JSON Value first
|
||||
if let Ok(args) = serde_json::from_value::<Value>(tool_call.arguments.clone()) {
|
||||
tool_call.name == "developer__shell" &&
|
||||
args.get("command")
|
||||
.and_then(Value::as_str).is_some_and(|cmd| {
|
||||
cmd.contains("ls ") ||
|
||||
cmd.contains("ls\n") ||
|
||||
cmd.contains("ls$") ||
|
||||
cmd.contains("rg --files")
|
||||
})
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
})
|
||||
});
|
||||
|
||||
metrics.push((
|
||||
"Using the shell command tool".to_string(),
|
||||
EvaluationMetric::Boolean(valid_tool_call),
|
||||
));
|
||||
Ok(metrics)
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"developer_list_files"
|
||||
}
|
||||
|
||||
fn required_extensions(&self) -> Vec<String> {
|
||||
vec!["developer".to_string()]
|
||||
}
|
||||
}
|
||||
|
||||
register_evaluation!("developer", DeveloperListFiles);
|
||||
11
crates/goose-bench/src/eval_suites/core/mod.rs
Normal file
11
crates/goose-bench/src/eval_suites/core/mod.rs
Normal file
@@ -0,0 +1,11 @@
|
||||
mod example;
|
||||
// developer extension evals
|
||||
mod create_file;
|
||||
mod image;
|
||||
mod list_files;
|
||||
mod search_replace;
|
||||
// computer controller extension evals
|
||||
mod script;
|
||||
mod web_scrape;
|
||||
// memory extension evals
|
||||
mod save_fact;
|
||||
79
crates/goose-bench/src/eval_suites/core/save_fact.rs
Normal file
79
crates/goose-bench/src/eval_suites/core/save_fact.rs
Normal file
@@ -0,0 +1,79 @@
|
||||
// Create a new file called test.txt with the content 'Hello, World!
|
||||
|
||||
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric};
|
||||
use crate::register_evaluation;
|
||||
use crate::work_dir::WorkDir;
|
||||
use async_trait::async_trait;
|
||||
use goose::message::MessageContent;
|
||||
use mcp_core::role::Role;
|
||||
use serde_json::{self, Value};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct MemoryRememberMemory {}
|
||||
|
||||
impl MemoryRememberMemory {
|
||||
pub fn new() -> Self {
|
||||
MemoryRememberMemory {}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Evaluation for MemoryRememberMemory {
|
||||
async fn run(
|
||||
&self,
|
||||
mut agent: Box<dyn BenchAgent>,
|
||||
_work_dir: &mut WorkDir,
|
||||
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
|
||||
let mut metrics = Vec::new();
|
||||
|
||||
// Send the prompt to list files
|
||||
let messages = agent.prompt("Save this fact: The capital of France is Paris.".to_string());
|
||||
let messages = messages.await?;
|
||||
|
||||
let valid_tool_call = messages.iter().any(|msg| {
|
||||
// Check if it's an assistant message
|
||||
msg.role == Role::Assistant &&
|
||||
// Check if any content item is a tool request for creating a file
|
||||
msg.content.iter().any(|content| {
|
||||
if let MessageContent::ToolRequest(tool_req) = content {
|
||||
if let Ok(tool_call) = tool_req.tool_call.as_ref() {
|
||||
// Check tool name is correct
|
||||
if tool_call.name != "memory__remember_memory" {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Parse the arguments as JSON
|
||||
if let Ok(args) = serde_json::from_value::<Value>(tool_call.arguments.clone()) {
|
||||
// Check all required parameters match exactly
|
||||
args.get("category").and_then(Value::as_str).is_some_and(|s| s.contains("fact")) &&
|
||||
args.get("data").and_then(Value::as_str) == Some("The capital of France is Paris.") &&
|
||||
args.get("is_global").and_then(Value::as_bool) == Some(true)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
})
|
||||
});
|
||||
|
||||
metrics.push((
|
||||
"Saving facts".to_string(),
|
||||
EvaluationMetric::Boolean(valid_tool_call),
|
||||
));
|
||||
Ok(metrics)
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"memory_remember_memory"
|
||||
}
|
||||
|
||||
fn required_extensions(&self) -> Vec<String> {
|
||||
vec!["memory".to_string()]
|
||||
}
|
||||
}
|
||||
|
||||
register_evaluation!("memory", MemoryRememberMemory);
|
||||
77
crates/goose-bench/src/eval_suites/core/script.rs
Normal file
77
crates/goose-bench/src/eval_suites/core/script.rs
Normal file
@@ -0,0 +1,77 @@
|
||||
// Create a new file called test.txt with the content 'Hello, World!
|
||||
|
||||
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric};
|
||||
use crate::register_evaluation;
|
||||
use crate::work_dir::WorkDir;
|
||||
use async_trait::async_trait;
|
||||
use goose::message::MessageContent;
|
||||
use mcp_core::role::Role;
|
||||
use serde_json::{self, Value};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ComputerControllerScript {}
|
||||
|
||||
impl ComputerControllerScript {
|
||||
pub fn new() -> Self {
|
||||
ComputerControllerScript {}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Evaluation for ComputerControllerScript {
|
||||
async fn run(
|
||||
&self,
|
||||
mut agent: Box<dyn BenchAgent>,
|
||||
_work_dir: &mut WorkDir,
|
||||
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
|
||||
let mut metrics = Vec::new();
|
||||
|
||||
// Send the prompt to list files
|
||||
let messages = agent.prompt("Make a beep sound".to_string());
|
||||
let messages = messages.await?;
|
||||
|
||||
let valid_tool_call = messages.iter().any(|msg| {
|
||||
// Check if it's an assistant message
|
||||
msg.role == Role::Assistant &&
|
||||
// Check if any content item is a tool request for creating a file
|
||||
msg.content.iter().any(|content| {
|
||||
if let MessageContent::ToolRequest(tool_req) = content {
|
||||
if let Ok(tool_call) = tool_req.tool_call.as_ref() {
|
||||
// Check tool name is correct
|
||||
if tool_call.name != "computercontroller__computer_control" {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Parse the arguments as JSON
|
||||
if let Ok(args) = serde_json::from_value::<Value>(tool_call.arguments.clone()) {
|
||||
// Check all required parameters match exactly
|
||||
args.get("script").and_then(Value::as_str).is_some_and(|s| s.contains("beep"))
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
})
|
||||
});
|
||||
|
||||
metrics.push((
|
||||
"Running os scripts".to_string(),
|
||||
EvaluationMetric::Boolean(valid_tool_call),
|
||||
));
|
||||
Ok(metrics)
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"computercontroller_script"
|
||||
}
|
||||
|
||||
fn required_extensions(&self) -> Vec<String> {
|
||||
vec!["computercontroller".to_string()]
|
||||
}
|
||||
}
|
||||
|
||||
register_evaluation!("computercontroller", ComputerControllerScript);
|
||||
110
crates/goose-bench/src/eval_suites/core/search_replace.rs
Normal file
110
crates/goose-bench/src/eval_suites/core/search_replace.rs
Normal file
@@ -0,0 +1,110 @@
|
||||
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric};
|
||||
use crate::register_evaluation;
|
||||
use crate::work_dir::WorkDir;
|
||||
use async_trait::async_trait;
|
||||
use std::fs;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DeveloperSearchReplace {}
|
||||
|
||||
impl DeveloperSearchReplace {
|
||||
pub fn new() -> Self {
|
||||
DeveloperSearchReplace {}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Evaluation for DeveloperSearchReplace {
|
||||
async fn run(
|
||||
&self,
|
||||
mut agent: Box<dyn BenchAgent>,
|
||||
work_dir: &mut WorkDir,
|
||||
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
|
||||
let mut metrics = Vec::new();
|
||||
|
||||
// Try to find the assets directory
|
||||
let assets_dir_path = work_dir.path.join("assets");
|
||||
let _assets_exists = assets_dir_path.exists();
|
||||
|
||||
// Get the kubernetes_swagger.json file from the assets directory and copy it to the working directory for eval
|
||||
// so the agent can modify it
|
||||
let source_file = work_dir.path.join("assets").join("kubernetes_swagger.json");
|
||||
let target_file = std::env::current_dir()
|
||||
.unwrap_or_default()
|
||||
.join("kubernetes_swagger.json");
|
||||
|
||||
// Copy the file to the root of the working directory if it doesn't exist there yet
|
||||
if !target_file.exists() && source_file.exists() {
|
||||
println!("Copying file from {:?} to {:?}", source_file, target_file);
|
||||
fs::copy(&source_file, &target_file)?;
|
||||
println!("File copied successfully");
|
||||
} else {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Could not find kubernetes_swagger.json file"
|
||||
));
|
||||
}
|
||||
|
||||
// Send the prompt to modify the file
|
||||
let _messages = agent.prompt("Remove the io.k8s.api.admissionregistration.v1.ServiceReference definition block and replace with a new definition for io.k8s.api.admissionregistration.v1.FakeServiceReference. Update the fields in the definition as well to be consistent. Don't change the property names. Don't update any references to the old definition. Only modify the definition and it's description to 'FakeServiceReference simulates a reference to a fake service for testing purposes.'.The file to modify is kubernetes_swagger.json.".to_string()).await?;
|
||||
|
||||
// Get the path to the modified file
|
||||
let modified_file_path = std::env::current_dir()
|
||||
.unwrap_or_default()
|
||||
.join("kubernetes_swagger.json");
|
||||
|
||||
// Read the expected patch file from the assets directory
|
||||
let patch_file_path = work_dir.path.join("assets").join("kubernetes.patch");
|
||||
if !patch_file_path.exists() {
|
||||
return Err(anyhow::anyhow!("Could not find patch file"));
|
||||
}
|
||||
let patch_content = fs::read_to_string(&patch_file_path)?
|
||||
.lines()
|
||||
.skip(4)
|
||||
.collect::<Vec<&str>>()
|
||||
.join("\n");
|
||||
|
||||
// Run git diff between modified and source files
|
||||
let diff_output = std::process::Command::new("git")
|
||||
.args([
|
||||
"diff",
|
||||
"--no-index",
|
||||
source_file.to_str().unwrap(),
|
||||
modified_file_path.to_str().unwrap(),
|
||||
])
|
||||
.output()?;
|
||||
|
||||
let actual_diff = String::from_utf8_lossy(&diff_output.stdout)
|
||||
.to_string()
|
||||
.lines()
|
||||
.skip(4)
|
||||
.collect::<Vec<&str>>()
|
||||
.join("\n");
|
||||
|
||||
let mut changes_match = true;
|
||||
|
||||
// Compare the remaining lines
|
||||
if actual_diff != patch_content {
|
||||
println!("Diffs don't match!");
|
||||
println!("Expected patch:\n{}", patch_content);
|
||||
println!("Actual diff:\n{}", actual_diff);
|
||||
changes_match = false;
|
||||
}
|
||||
|
||||
metrics.push((
|
||||
"Changes match expected patch".to_string(),
|
||||
EvaluationMetric::Boolean(changes_match),
|
||||
));
|
||||
|
||||
Ok(metrics)
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"developer_search_replace"
|
||||
}
|
||||
|
||||
fn required_extensions(&self) -> Vec<String> {
|
||||
vec!["developer".to_string()]
|
||||
}
|
||||
}
|
||||
|
||||
register_evaluation!("developer_search_replace", DeveloperSearchReplace);
|
||||
79
crates/goose-bench/src/eval_suites/core/web_scrape.rs
Normal file
79
crates/goose-bench/src/eval_suites/core/web_scrape.rs
Normal file
@@ -0,0 +1,79 @@
|
||||
// Create a new file called test.txt with the content 'Hello, World!
|
||||
|
||||
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric};
|
||||
use crate::register_evaluation;
|
||||
use crate::work_dir::WorkDir;
|
||||
use async_trait::async_trait;
|
||||
use goose::message::MessageContent;
|
||||
use mcp_core::role::Role;
|
||||
use serde_json::{self, Value};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ComputerControllerWebScrape {}
|
||||
|
||||
impl ComputerControllerWebScrape {
|
||||
pub fn new() -> Self {
|
||||
ComputerControllerWebScrape {}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Evaluation for ComputerControllerWebScrape {
|
||||
async fn run(
|
||||
&self,
|
||||
mut agent: Box<dyn BenchAgent>,
|
||||
_work_dir: &mut WorkDir,
|
||||
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
|
||||
let mut metrics = Vec::new();
|
||||
|
||||
// Send the prompt to list files
|
||||
let messages = agent.prompt(
|
||||
"What are the headlines on hackernews? Organize the list into categories.".to_string(),
|
||||
);
|
||||
let messages = messages.await?;
|
||||
|
||||
let valid_tool_call = messages.iter().any(|msg| {
|
||||
// Check if it's an assistant message
|
||||
msg.role == Role::Assistant &&
|
||||
// Check if any content item is a tool request for creating a file
|
||||
msg.content.iter().any(|content| {
|
||||
if let MessageContent::ToolRequest(tool_req) = content {
|
||||
if let Ok(tool_call) = tool_req.tool_call.as_ref() {
|
||||
// Check tool name is correct
|
||||
if tool_call.name != "computercontroller__web_scrape" {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Parse the arguments as JSON
|
||||
if let Ok(args) = serde_json::from_value::<Value>(tool_call.arguments.clone()) {
|
||||
// Check all required parameters match exactly
|
||||
args.get("url").and_then(Value::as_str).map(|s| s.trim_end_matches('/')) == Some("https://news.ycombinator.com")
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
})
|
||||
});
|
||||
|
||||
metrics.push((
|
||||
"Retrieve and scrape web pages".to_string(),
|
||||
EvaluationMetric::Boolean(valid_tool_call),
|
||||
));
|
||||
Ok(metrics)
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"computercontroller_web_scrape"
|
||||
}
|
||||
|
||||
fn required_extensions(&self) -> Vec<String> {
|
||||
vec!["computercontroller".to_string()]
|
||||
}
|
||||
}
|
||||
|
||||
register_evaluation!("computercontroller", ComputerControllerWebScrape);
|
||||
47
crates/goose-bench/src/eval_suites/evaluation.rs
Normal file
47
crates/goose-bench/src/eval_suites/evaluation.rs
Normal file
@@ -0,0 +1,47 @@
|
||||
use crate::work_dir::WorkDir;
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
use chrono::{DateTime, Utc};
|
||||
use goose::message::Message;
|
||||
use serde::Serialize;
|
||||
|
||||
pub type Model = (String, String);
|
||||
pub type Extension = String;
|
||||
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
pub struct BenchAgentError {
|
||||
pub message: String,
|
||||
pub level: String, // ERROR, WARN, etc.
|
||||
pub timestamp: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub enum EvaluationMetric {
|
||||
Integer(i64),
|
||||
Float(f64),
|
||||
String(String),
|
||||
Boolean(bool),
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait BenchAgent: Send + Sync {
|
||||
async fn prompt(&mut self, p: String) -> Result<Vec<Message>>;
|
||||
|
||||
// Make get_errors async
|
||||
async fn get_errors(&self) -> Vec<BenchAgentError>;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait Evaluation: Send + Sync {
|
||||
async fn run(
|
||||
&self,
|
||||
agent: Box<dyn BenchAgent>,
|
||||
run_loc: &mut WorkDir,
|
||||
) -> Result<Vec<(String, EvaluationMetric)>>;
|
||||
|
||||
fn name(&self) -> &str;
|
||||
|
||||
fn required_extensions(&self) -> Vec<String> {
|
||||
Vec::new() // Default implementation returns empty vec
|
||||
}
|
||||
}
|
||||
65
crates/goose-bench/src/eval_suites/factory.rs
Normal file
65
crates/goose-bench/src/eval_suites/factory.rs
Normal file
@@ -0,0 +1,65 @@
|
||||
pub use super::Evaluation;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{OnceLock, RwLock};
|
||||
|
||||
type EvaluationConstructor = fn() -> Box<dyn Evaluation>;
|
||||
|
||||
// Use std::sync::RwLock for interior mutability
|
||||
static EVALUATION_REGISTRY: OnceLock<RwLock<HashMap<&'static str, Vec<EvaluationConstructor>>>> =
|
||||
OnceLock::new();
|
||||
|
||||
/// Initialize the registry if it hasn't been initialized
|
||||
fn registry() -> &'static RwLock<HashMap<&'static str, Vec<EvaluationConstructor>>> {
|
||||
EVALUATION_REGISTRY.get_or_init(|| RwLock::new(HashMap::new()))
|
||||
}
|
||||
|
||||
/// Register a new evaluation version
|
||||
pub fn register_evaluation(suite_name: &'static str, constructor: fn() -> Box<dyn Evaluation>) {
|
||||
let registry = registry();
|
||||
if let Ok(mut map) = registry.write() {
|
||||
map.entry(suite_name)
|
||||
.or_insert_with(Vec::new)
|
||||
.push(constructor);
|
||||
}
|
||||
}
|
||||
|
||||
pub struct EvaluationSuiteFactory;
|
||||
|
||||
impl EvaluationSuiteFactory {
|
||||
pub fn create(suite_name: &str) -> Option<Vec<Box<dyn Evaluation>>> {
|
||||
let registry = registry();
|
||||
let map = registry
|
||||
.read()
|
||||
.expect("Failed to read the benchmark evaluation registry.");
|
||||
|
||||
let constructors = map.get(suite_name)?;
|
||||
let instances = constructors
|
||||
.iter()
|
||||
.map(|&constructor| constructor())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Some(instances)
|
||||
}
|
||||
|
||||
pub fn available_evaluations() -> Vec<&'static str> {
|
||||
registry()
|
||||
.read()
|
||||
.map(|map| map.keys().copied().collect())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! register_evaluation {
|
||||
($suite_name:expr, $evaluation_type:ty) => {
|
||||
paste::paste! {
|
||||
#[ctor::ctor]
|
||||
#[allow(non_snake_case)]
|
||||
fn [<__register_evaluation_ $suite_name>]() {
|
||||
$crate::eval_suites::factory::register_evaluation($suite_name, || {
|
||||
Box::new(<$evaluation_type>::new())
|
||||
});
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
6
crates/goose-bench/src/eval_suites/mod.rs
Normal file
6
crates/goose-bench/src/eval_suites/mod.rs
Normal file
@@ -0,0 +1,6 @@
|
||||
mod core;
|
||||
mod evaluation;
|
||||
mod factory;
|
||||
|
||||
pub use evaluation::*;
|
||||
pub use factory::{register_evaluation, EvaluationSuiteFactory};
|
||||
4
crates/goose-bench/src/lib.rs
Normal file
4
crates/goose-bench/src/lib.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
pub mod error_capture;
|
||||
pub mod eval_suites;
|
||||
pub mod reporting;
|
||||
pub mod work_dir;
|
||||
143
crates/goose-bench/src/reporting.rs
Normal file
143
crates/goose-bench/src/reporting.rs
Normal file
@@ -0,0 +1,143 @@
|
||||
use crate::eval_suites::{BenchAgentError, EvaluationMetric};
|
||||
use chrono::Local;
|
||||
use serde::Serialize;
|
||||
use std::fmt;
|
||||
|
||||
/// Represents a single evaluation result
|
||||
#[derive(Default, Serialize)]
|
||||
pub struct EvaluationResult {
|
||||
pub name: String,
|
||||
pub metrics: Vec<(String, EvaluationMetric)>,
|
||||
pub errors: Vec<BenchAgentError>,
|
||||
}
|
||||
|
||||
/// Represents results for an entire suite
|
||||
#[derive(Default, Serialize)]
|
||||
pub struct SuiteResult {
|
||||
pub name: String,
|
||||
pub evaluations: Vec<EvaluationResult>,
|
||||
}
|
||||
|
||||
/// Contains all benchmark results and metadata
|
||||
#[derive(Default, Serialize)]
|
||||
pub struct BenchmarkResults {
|
||||
pub provider: String,
|
||||
pub start_time: String,
|
||||
pub suites: Vec<SuiteResult>,
|
||||
}
|
||||
|
||||
impl EvaluationResult {
|
||||
pub fn new(name: String) -> Self {
|
||||
Self {
|
||||
name,
|
||||
metrics: Vec::new(),
|
||||
errors: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_metric(&mut self, name: String, metric: EvaluationMetric) {
|
||||
self.metrics.push((name, metric));
|
||||
}
|
||||
|
||||
pub fn add_error(&mut self, error: BenchAgentError) {
|
||||
self.errors.push(error);
|
||||
}
|
||||
}
|
||||
|
||||
impl SuiteResult {
|
||||
pub fn new(name: String) -> Self {
|
||||
Self {
|
||||
name,
|
||||
evaluations: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_evaluation(&mut self, eval: EvaluationResult) {
|
||||
self.evaluations.push(eval);
|
||||
}
|
||||
}
|
||||
|
||||
impl BenchmarkResults {
|
||||
pub fn new(provider: String) -> Self {
|
||||
Self {
|
||||
provider,
|
||||
start_time: Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
|
||||
suites: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_suite(&mut self, suite: SuiteResult) {
|
||||
self.suites.push(suite);
|
||||
}
|
||||
|
||||
/// Generate a summary of the benchmark results
|
||||
pub fn summary(&self) -> String {
|
||||
let mut summary = String::new();
|
||||
summary.push_str(&format!("Benchmark Summary - {}\n", self.provider));
|
||||
summary.push_str(&format!("Run at: {}\n\n", self.start_time));
|
||||
|
||||
for suite in &self.suites {
|
||||
summary.push_str(&format!(
|
||||
"Suite: {} ({} evaluations)\n",
|
||||
suite.name,
|
||||
suite.evaluations.len()
|
||||
));
|
||||
|
||||
// Count total metrics and errors
|
||||
let total_metrics: usize = suite.evaluations.iter().map(|e| e.metrics.len()).sum();
|
||||
let total_errors: usize = suite.evaluations.iter().map(|e| e.errors.len()).sum();
|
||||
|
||||
summary.push_str(&format!(" Total metrics: {}\n", total_metrics));
|
||||
if total_errors > 0 {
|
||||
summary.push_str(&format!(" Total errors: {}\n", total_errors));
|
||||
}
|
||||
}
|
||||
|
||||
summary
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for EvaluationMetric {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
EvaluationMetric::Integer(i) => write!(f, "{}", i),
|
||||
EvaluationMetric::Float(fl) => write!(f, "{:.2}", fl),
|
||||
EvaluationMetric::String(s) => write!(f, "{}", s),
|
||||
EvaluationMetric::Boolean(b) => write!(f, "{}", b),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for BenchmarkResults {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
writeln!(f, "Benchmark Results")?;
|
||||
writeln!(f, "Provider: {}", self.provider)?;
|
||||
writeln!(f, "Start Time: {}", self.start_time)?;
|
||||
writeln!(f)?;
|
||||
|
||||
for suite in &self.suites {
|
||||
writeln!(f, "Suite: {}", suite.name)?;
|
||||
|
||||
for eval in &suite.evaluations {
|
||||
writeln!(f, " Evaluation: {}", eval.name)?;
|
||||
for (metric_name, metric_value) in &eval.metrics {
|
||||
writeln!(f, " {}: {}", metric_name, metric_value)?;
|
||||
}
|
||||
if !eval.errors.is_empty() {
|
||||
writeln!(f, " Errors:")?;
|
||||
for error in &eval.errors {
|
||||
writeln!(
|
||||
f,
|
||||
" [{}] {}: {}",
|
||||
error.timestamp.format("%H:%M:%S"),
|
||||
error.level,
|
||||
error.message
|
||||
)?;
|
||||
}
|
||||
}
|
||||
writeln!(f)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
113
crates/goose-bench/src/work_dir.rs
Normal file
113
crates/goose-bench/src/work_dir.rs
Normal file
@@ -0,0 +1,113 @@
|
||||
use std::fs;
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
pub struct WorkDir {
|
||||
pub path: PathBuf,
|
||||
traversal: Vec<PathBuf>,
|
||||
}
|
||||
|
||||
impl Default for WorkDir {
|
||||
fn default() -> Self {
|
||||
let path = PathBuf::from(".").canonicalize().unwrap();
|
||||
WorkDir {
|
||||
path: path.clone(),
|
||||
traversal: vec![path.clone()],
|
||||
}
|
||||
}
|
||||
}
|
||||
impl WorkDir {
|
||||
pub fn new(path: &str) -> Self {
|
||||
let path = PathBuf::from(path);
|
||||
WorkDir {
|
||||
path: path.clone(),
|
||||
traversal: vec![path.clone()],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn at(path: String, include_dirs: Vec<PathBuf>) -> anyhow::Result<WorkDir> {
|
||||
fs::create_dir_all(&path)?;
|
||||
|
||||
let dirs = include_dirs
|
||||
.iter()
|
||||
.map(|d| d.canonicalize().unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let p = PathBuf::from(&path).canonicalize()?;
|
||||
let _: Vec<_> = dirs
|
||||
.iter()
|
||||
.map(|d| WorkDir::deep_copy(d.as_path(), p.as_path()))
|
||||
.collect();
|
||||
|
||||
std::env::set_current_dir(&path)?;
|
||||
|
||||
Ok(WorkDir::new(p.to_string_lossy().to_string().as_str()))
|
||||
}
|
||||
pub fn move_to(&mut self, path: String) -> anyhow::Result<&mut Self> {
|
||||
fs::create_dir_all(&path)?;
|
||||
self.traversal.push(PathBuf::from(&path));
|
||||
std::env::set_current_dir(&path)?;
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
pub fn fs_get(&mut self, path: String) -> anyhow::Result<PathBuf> {
|
||||
let p = Path::new(&path);
|
||||
if !p.exists() {
|
||||
let artifact_at_root = if p.is_dir() {
|
||||
self.traversal[0].clone().join(&path).canonicalize()?
|
||||
} else {
|
||||
self.traversal[0]
|
||||
.clone()
|
||||
.join(p.parent().unwrap_or(Path::new("")))
|
||||
.canonicalize()?
|
||||
};
|
||||
|
||||
let here = PathBuf::from(".").canonicalize()?;
|
||||
|
||||
WorkDir::deep_copy(artifact_at_root.as_path(), here.as_path())?;
|
||||
}
|
||||
|
||||
Ok(PathBuf::from(path))
|
||||
}
|
||||
|
||||
fn deep_copy(src: &Path, dst: &Path) -> io::Result<()> {
|
||||
// Create the destination directory with the source's name
|
||||
let dst_dir = if let Some(src_name) = src.file_name() {
|
||||
dst.join(src_name)
|
||||
} else {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"Source path must have a file name",
|
||||
));
|
||||
};
|
||||
|
||||
// Create the destination directory if it doesn't exist
|
||||
if !dst_dir.exists() {
|
||||
fs::create_dir_all(&dst_dir)?;
|
||||
}
|
||||
|
||||
// Copy each entry in the source directory
|
||||
for entry in fs::read_dir(src)? {
|
||||
let entry = entry?;
|
||||
let ty = entry.file_type()?;
|
||||
let src_path = entry.path();
|
||||
let dst_path = dst_dir.join(entry.file_name());
|
||||
|
||||
if ty.is_dir() {
|
||||
WorkDir::deep_copy(&src_path, dst_path.parent().unwrap())?;
|
||||
} else {
|
||||
fs::copy(&src_path, &dst_path)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for WorkDir {
|
||||
fn drop(&mut self) {
|
||||
self.traversal.pop();
|
||||
std::env::set_current_dir("..").unwrap()
|
||||
}
|
||||
}
|
||||
@@ -13,6 +13,7 @@ path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
goose = { path = "../goose" }
|
||||
goose-bench = { path = "../goose-bench" }
|
||||
goose-mcp = { path = "../goose-mcp" }
|
||||
mcp-client = { path = "../mcp-client" }
|
||||
mcp-server = { path = "../mcp-server" }
|
||||
@@ -48,6 +49,7 @@ tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt", "json",
|
||||
tracing-appender = "0.2"
|
||||
once_cell = "1.20.2"
|
||||
shlex = "1.3.0"
|
||||
async-trait = "0.1.86"
|
||||
|
||||
[target.'cfg(target_os = "windows")'.dependencies]
|
||||
winapi = { version = "0.3", features = ["wincred"] }
|
||||
|
||||
171
crates/goose-cli/src/commands/bench.rs
Normal file
171
crates/goose-cli/src/commands/bench.rs
Normal file
@@ -0,0 +1,171 @@
|
||||
use crate::session::build_session;
|
||||
use crate::Session;
|
||||
use async_trait::async_trait;
|
||||
use chrono::Local;
|
||||
use goose::config::Config;
|
||||
use goose::message::Message;
|
||||
use goose_bench::error_capture::ErrorCaptureLayer;
|
||||
use goose_bench::eval_suites::{BenchAgent, BenchAgentError, Evaluation, EvaluationSuiteFactory};
|
||||
use goose_bench::reporting::{BenchmarkResults, EvaluationResult, SuiteResult};
|
||||
use goose_bench::work_dir::WorkDir;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Once;
|
||||
use tokio::sync::Mutex;
|
||||
use tracing_subscriber::layer::SubscriberExt;
|
||||
|
||||
// Used to ensure we only set up tracing once
|
||||
static INIT: Once = Once::new();
|
||||
|
||||
pub struct BenchSession {
|
||||
session: Session,
|
||||
errors: Arc<Mutex<Vec<BenchAgentError>>>,
|
||||
}
|
||||
|
||||
impl BenchSession {
|
||||
pub fn new(session: Session) -> Self {
|
||||
let errors = Arc::new(Mutex::new(Vec::new()));
|
||||
|
||||
// Create and register the error capture layer only once
|
||||
INIT.call_once(|| {
|
||||
let error_layer = ErrorCaptureLayer::new(errors.clone());
|
||||
let subscriber = tracing_subscriber::Registry::default().with(error_layer);
|
||||
|
||||
tracing::subscriber::set_global_default(subscriber)
|
||||
.expect("Failed to set tracing subscriber");
|
||||
});
|
||||
|
||||
Self { session, errors }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl BenchAgent for BenchSession {
|
||||
async fn prompt(&mut self, p: String) -> anyhow::Result<Vec<Message>> {
|
||||
// Clear previous errors
|
||||
{
|
||||
let mut errors = self.errors.lock().await;
|
||||
errors.clear();
|
||||
}
|
||||
|
||||
self.session.headless(p).await?;
|
||||
Ok(self.session.message_history())
|
||||
}
|
||||
|
||||
async fn get_errors(&self) -> Vec<BenchAgentError> {
|
||||
let errors = self.errors.lock().await;
|
||||
errors.clone()
|
||||
}
|
||||
}
|
||||
|
||||
// Wrapper struct to implement BenchAgent for Arc<Mutex<BenchSession>>
|
||||
struct BenchAgentWrapper(Arc<Mutex<BenchSession>>);
|
||||
|
||||
#[async_trait]
|
||||
impl BenchAgent for BenchAgentWrapper {
|
||||
async fn prompt(&mut self, p: String) -> anyhow::Result<Vec<Message>> {
|
||||
let mut session = self.0.lock().await;
|
||||
session.prompt(p).await
|
||||
}
|
||||
|
||||
async fn get_errors(&self) -> Vec<BenchAgentError> {
|
||||
let session = self.0.lock().await;
|
||||
session.get_errors().await
|
||||
}
|
||||
}
|
||||
|
||||
async fn run_eval(
|
||||
evaluation: Box<dyn Evaluation>,
|
||||
work_dir: &mut WorkDir,
|
||||
) -> anyhow::Result<EvaluationResult> {
|
||||
let mut result = EvaluationResult::new(evaluation.name().to_string());
|
||||
|
||||
if let Ok(work_dir) = work_dir.move_to(format!("./{}", &evaluation.name())) {
|
||||
let required_extensions = evaluation.required_extensions();
|
||||
|
||||
// Create session with error capture
|
||||
let base_session = build_session(None, false, Vec::new(), required_extensions).await;
|
||||
|
||||
let bench_session = Arc::new(Mutex::new(BenchSession::new(base_session)));
|
||||
let bench_session_clone = bench_session.clone();
|
||||
|
||||
if let Ok(metrics) = evaluation
|
||||
.run(Box::new(BenchAgentWrapper(bench_session)), work_dir)
|
||||
.await
|
||||
{
|
||||
for (name, metric) in metrics {
|
||||
result.add_metric(name, metric);
|
||||
}
|
||||
|
||||
// Add any errors that occurred
|
||||
let agent = BenchAgentWrapper(bench_session_clone);
|
||||
for error in agent.get_errors().await {
|
||||
result.add_error(error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn run_suite(suite: &str, work_dir: &mut WorkDir) -> anyhow::Result<SuiteResult> {
|
||||
let mut suite_result = SuiteResult::new(suite.to_string());
|
||||
|
||||
if let Ok(work_dir) = work_dir.move_to(format!("./{}", &suite)) {
|
||||
if let Some(evals) = EvaluationSuiteFactory::create(suite) {
|
||||
for eval in evals {
|
||||
let eval_result = run_eval(eval, work_dir).await?;
|
||||
suite_result.add_evaluation(eval_result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(suite_result)
|
||||
}
|
||||
|
||||
pub async fn run_benchmark(
|
||||
suites: Vec<String>,
|
||||
include_dirs: Vec<PathBuf>,
|
||||
) -> anyhow::Result<BenchmarkResults> {
|
||||
let suites = EvaluationSuiteFactory::available_evaluations()
|
||||
.into_iter()
|
||||
.filter(|&s| suites.contains(&s.to_string()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let config = Config::global();
|
||||
let provider_name: String = config
|
||||
.get("GOOSE_PROVIDER")
|
||||
.expect("No provider configured. Run 'goose configure' first");
|
||||
|
||||
let mut results = BenchmarkResults::new(provider_name.clone());
|
||||
|
||||
let current_time = Local::now().format("%H:%M:%S").to_string();
|
||||
let current_date = Local::now().format("%Y-%m-%d").to_string();
|
||||
if let Ok(mut work_dir) = WorkDir::at(
|
||||
format!("./benchmark-{}", &provider_name),
|
||||
include_dirs.clone(),
|
||||
) {
|
||||
if let Ok(work_dir) = work_dir.move_to(format!("./{}-{}", ¤t_date, current_time)) {
|
||||
for suite in suites {
|
||||
let suite_result = run_suite(suite, work_dir).await?;
|
||||
results.add_suite(suite_result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
pub async fn list_suites() -> anyhow::Result<HashMap<String, usize>> {
|
||||
let suites = EvaluationSuiteFactory::available_evaluations();
|
||||
let mut suite_counts = HashMap::new();
|
||||
|
||||
for suite in suites {
|
||||
if let Some(evals) = EvaluationSuiteFactory::create(suite) {
|
||||
suite_counts.insert(suite.to_string(), evals.len());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(suite_counts)
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
pub mod agent_version;
|
||||
pub mod bench;
|
||||
pub mod configure;
|
||||
pub mod info;
|
||||
pub mod mcp;
|
||||
|
||||
@@ -2,12 +2,15 @@ use anyhow::Result;
|
||||
use clap::{Args, Parser, Subcommand};
|
||||
|
||||
use goose::config::Config;
|
||||
|
||||
use goose_cli::commands::agent_version::AgentCommand;
|
||||
use goose_cli::commands::bench::{list_suites, run_benchmark};
|
||||
use goose_cli::commands::configure::handle_configure;
|
||||
use goose_cli::commands::info::handle_info;
|
||||
use goose_cli::commands::mcp::run_server;
|
||||
use goose_cli::logging::setup_logging;
|
||||
use goose_cli::session;
|
||||
use goose_cli::session::build_session;
|
||||
use goose_cli::{commands::agent_version::AgentCommand, session};
|
||||
use std::io::{self, Read};
|
||||
use std::path::PathBuf;
|
||||
|
||||
@@ -194,6 +197,66 @@ enum Command {
|
||||
#[arg(short, long, help = "Enforce to re-configure goose during update")]
|
||||
reconfigure: bool,
|
||||
},
|
||||
|
||||
Bench {
|
||||
#[arg(
|
||||
short = 's',
|
||||
long = "suites",
|
||||
value_name = "BENCH_SUITE_NAME",
|
||||
help = "Run this list of bench-suites.",
|
||||
long_help = "Specify a comma-separated list of evaluation-suite names to be run.",
|
||||
value_delimiter = ','
|
||||
)]
|
||||
suites: Vec<String>,
|
||||
|
||||
#[arg(
|
||||
short = 'i',
|
||||
long = "include-dir",
|
||||
value_name = "DIR_NAME",
|
||||
action = clap::ArgAction::Append,
|
||||
long_help = "Make one or more dirs available to all bench suites. Specify either a single dir-name, a comma-separated list of dir-names, or use this multiple instances of this flag to specify multiple dirs.",
|
||||
value_delimiter = ','
|
||||
)]
|
||||
include_dirs: Vec<PathBuf>,
|
||||
|
||||
#[arg(
|
||||
long = "repeat",
|
||||
value_name = "QUANTITY",
|
||||
long_help = "Number of times to repeat the benchmark run.",
|
||||
default_value = "1"
|
||||
)]
|
||||
repeat: usize,
|
||||
|
||||
#[arg(
|
||||
long = "list",
|
||||
value_name = "LIST",
|
||||
help = "List all available bench suites."
|
||||
)]
|
||||
list: bool,
|
||||
|
||||
#[arg(
|
||||
long = "output",
|
||||
short = 'o',
|
||||
value_name = "FILE",
|
||||
help = "Save benchmark results to a file"
|
||||
)]
|
||||
output: Option<PathBuf>,
|
||||
|
||||
#[arg(
|
||||
long = "format",
|
||||
value_name = "FORMAT",
|
||||
help = "Output format (text, json)",
|
||||
default_value = "text"
|
||||
)]
|
||||
format: String,
|
||||
|
||||
#[arg(
|
||||
long = "summary",
|
||||
help = "Show only summary results",
|
||||
action = clap::ArgAction::SetTrue
|
||||
)]
|
||||
summary: bool,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(clap::ValueEnum, Clone, Debug)]
|
||||
@@ -232,6 +295,7 @@ async fn main() -> Result<()> {
|
||||
builtin,
|
||||
)
|
||||
.await;
|
||||
|
||||
setup_logging(session.session_file().file_stem().and_then(|s| s.to_str()))?;
|
||||
let _ = session.interactive(None).await;
|
||||
return Ok(());
|
||||
@@ -290,6 +354,56 @@ async fn main() -> Result<()> {
|
||||
goose_cli::commands::update::update(canary, reconfigure)?;
|
||||
return Ok(());
|
||||
}
|
||||
Some(Command::Bench {
|
||||
suites,
|
||||
include_dirs,
|
||||
repeat,
|
||||
list,
|
||||
output,
|
||||
format,
|
||||
summary,
|
||||
}) => {
|
||||
if list {
|
||||
let suites = list_suites().await?;
|
||||
for suite in suites.keys() {
|
||||
println!("{}: {}", suite, suites.get(suite).unwrap());
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
let suites = if suites.is_empty() {
|
||||
vec!["core".to_string()]
|
||||
} else {
|
||||
suites
|
||||
};
|
||||
let current_dir = std::env::current_dir()?;
|
||||
|
||||
for i in 0..repeat {
|
||||
if repeat > 1 {
|
||||
println!("\nRun {} of {}:", i + 1, repeat);
|
||||
}
|
||||
let results = run_benchmark(suites.clone(), include_dirs.clone()).await?;
|
||||
|
||||
// Handle output based on format
|
||||
let output_str = match format.as_str() {
|
||||
"json" => serde_json::to_string_pretty(&results)?,
|
||||
_ => results.to_string(), // Uses Display impl
|
||||
};
|
||||
|
||||
// Save to file if specified
|
||||
if let Some(path) = &output {
|
||||
std::fs::write(current_dir.join(path), &output_str)?;
|
||||
println!("Results saved to: {}", path.display());
|
||||
} else {
|
||||
// Print to console
|
||||
if summary {
|
||||
println!("{}", results.summary());
|
||||
} else {
|
||||
println!("{}", output_str);
|
||||
}
|
||||
}
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
None => {
|
||||
if !Config::global().exists() {
|
||||
let _ = handle_configure().await;
|
||||
|
||||
@@ -622,4 +622,8 @@ impl Session {
|
||||
cache.prompt_info.clear();
|
||||
cache.last_updated = Instant::now();
|
||||
}
|
||||
|
||||
pub fn message_history(&self) -> Vec<Message> {
|
||||
self.messages.clone()
|
||||
}
|
||||
}
|
||||
|
||||
83
scripts/README.md
Normal file
83
scripts/README.md
Normal file
@@ -0,0 +1,83 @@
|
||||
# Goose Benchmark Scripts
|
||||
|
||||
This directory contains scripts for running and analyzing Goose benchmarks.
|
||||
|
||||
## run-benchmarks.sh
|
||||
|
||||
This script runs Goose benchmarks across multiple provider:model pairs and analyzes the results.
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Goose CLI must be built or installed
|
||||
- `jq` command-line tool for JSON processing (optional, but recommended for result analysis)
|
||||
|
||||
### Usage
|
||||
|
||||
```bash
|
||||
./scripts/run-benchmarks.sh [options]
|
||||
```
|
||||
|
||||
#### Options
|
||||
|
||||
- `-p, --provider-models`: Comma-separated list of provider:model pairs (e.g., 'openai:gpt-4o,anthropic:claude-3-5-sonnet')
|
||||
- `-s, --suites`: Comma-separated list of benchmark suites to run (e.g., 'core,small_models')
|
||||
- `-o, --output-dir`: Directory to store benchmark results (default: './benchmark-results')
|
||||
- `-d, --debug`: Use debug build instead of release build
|
||||
- `-h, --help`: Show help message
|
||||
|
||||
#### Examples
|
||||
|
||||
```bash
|
||||
# Run with release build (default)
|
||||
./scripts/run-benchmarks.sh --provider-models 'openai:gpt-4o,anthropic:claude-3-5-sonnet' --suites 'core,small_models'
|
||||
|
||||
# Run with debug build
|
||||
./scripts/run-benchmarks.sh --provider-models 'openai:gpt-4o' --suites 'core' --debug
|
||||
```
|
||||
|
||||
### How It Works
|
||||
|
||||
The script:
|
||||
1. Parses the provider:model pairs and benchmark suites
|
||||
2. Determines whether to use the debug or release binary
|
||||
3. For each provider:model pair:
|
||||
- Sets the `GOOSE_PROVIDER` and `GOOSE_MODEL` environment variables
|
||||
- Runs the benchmark with the specified suites
|
||||
- Analyzes the results for failures
|
||||
4. Generates a summary of all benchmark runs
|
||||
|
||||
### Output
|
||||
|
||||
The script creates the following files in the output directory:
|
||||
|
||||
- `summary.md`: A summary of all benchmark results
|
||||
- `{provider}-{model}.json`: Raw JSON output from each benchmark run
|
||||
- `{provider}-{model}-analysis.txt`: Analysis of each benchmark run
|
||||
|
||||
### Exit Codes
|
||||
|
||||
- `0`: All benchmarks completed successfully
|
||||
- `1`: One or more benchmarks failed
|
||||
|
||||
## parse-benchmark-results.sh
|
||||
|
||||
This script analyzes a single benchmark JSON result file and identifies any failures.
|
||||
|
||||
### Usage
|
||||
|
||||
```bash
|
||||
./scripts/parse-benchmark-results.sh path/to/benchmark-results.json
|
||||
```
|
||||
|
||||
### Output
|
||||
|
||||
The script outputs an analysis of the benchmark results to stdout, including:
|
||||
|
||||
- Basic information about the benchmark run
|
||||
- Results for each evaluation in each suite
|
||||
- Summary of passed and failed metrics
|
||||
|
||||
### Exit Codes
|
||||
|
||||
- `0`: All metrics passed successfully
|
||||
- `1`: One or more metrics failed
|
||||
93
scripts/parse-benchmark-results.sh
Executable file
93
scripts/parse-benchmark-results.sh
Executable file
@@ -0,0 +1,93 @@
|
||||
#!/usr/bin/env bash
|
||||
# Script to parse goose-bench results and check for failures
|
||||
|
||||
set -e
|
||||
|
||||
if [ "$#" -ne 1 ]; then
|
||||
echo "Usage: $0 <benchmark-result-json-file>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
RESULT_FILE="$1"
|
||||
|
||||
if [ ! -f "$RESULT_FILE" ]; then
|
||||
echo "Error: Result file not found: $RESULT_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Extract basic information
|
||||
PROVIDER=$(jq -r '.provider' "$RESULT_FILE")
|
||||
START_TIME=$(jq -r '.start_time' "$RESULT_FILE")
|
||||
SUITE_COUNT=$(jq '.suites | length' "$RESULT_FILE")
|
||||
|
||||
echo "Benchmark Results Analysis"
|
||||
echo "-------------------------"
|
||||
echo "Provider: $PROVIDER"
|
||||
echo "Start Time: $START_TIME"
|
||||
echo "Number of Suites: $SUITE_COUNT"
|
||||
echo ""
|
||||
|
||||
# Initialize counters
|
||||
TOTAL_EVALS=0
|
||||
TOTAL_METRICS=0
|
||||
FAILED_METRICS=0
|
||||
PASSED_METRICS=0
|
||||
|
||||
# Process each suite
|
||||
for i in $(seq 0 $((SUITE_COUNT-1))); do
|
||||
SUITE_NAME=$(jq -r ".suites[$i].name" "$RESULT_FILE")
|
||||
EVAL_COUNT=$(jq ".suites[$i].evaluations | length" "$RESULT_FILE")
|
||||
TOTAL_EVALS=$((TOTAL_EVALS + EVAL_COUNT))
|
||||
|
||||
echo "Suite: $SUITE_NAME ($EVAL_COUNT evaluations)"
|
||||
|
||||
# Process each evaluation in this suite
|
||||
for j in $(seq 0 $((EVAL_COUNT-1))); do
|
||||
EVAL_NAME=$(jq -r ".suites[$i].evaluations[$j].name" "$RESULT_FILE")
|
||||
METRIC_COUNT=$(jq ".suites[$i].evaluations[$j].metrics | length" "$RESULT_FILE")
|
||||
TOTAL_METRICS=$((TOTAL_METRICS + METRIC_COUNT))
|
||||
|
||||
# Check for failures in this evaluation
|
||||
# This assumes metrics with names containing "success", "pass", or "correct"
|
||||
# and boolean values of false indicate failures
|
||||
FAILURES=$(jq -r ".suites[$i].evaluations[$j].metrics[] |
|
||||
select(
|
||||
(.[0] | test(\"success|pass|correct\"; \"i\")) and
|
||||
(.[1] == false or .[1] == \"false\" or .[1] == 0 or .[1] == \"0\")
|
||||
) | .[0]" "$RESULT_FILE" | wc -l | tr -d ' ')
|
||||
|
||||
if [ "$FAILURES" -gt 0 ]; then
|
||||
FAILED_METRICS=$((FAILED_METRICS + FAILURES))
|
||||
echo " ❌ $EVAL_NAME: $FAILURES failures detected"
|
||||
|
||||
# Print the specific failing metrics
|
||||
FAILING_METRICS=$(jq -r ".suites[$i].evaluations[$j].metrics[] |
|
||||
select(
|
||||
(.[0] | test(\"success|pass|correct\"; \"i\")) and
|
||||
(.[1] == false or .[1] == \"false\" or .[1] == 0 or .[1] == \"0\")
|
||||
) | \" - \" + .[0]" "$RESULT_FILE")
|
||||
echo "$FAILING_METRICS"
|
||||
else
|
||||
PASSED_METRICS=$((PASSED_METRICS + METRIC_COUNT))
|
||||
echo " ✅ $EVAL_NAME: All metrics passed"
|
||||
fi
|
||||
done
|
||||
echo ""
|
||||
done
|
||||
|
||||
# Print summary
|
||||
echo "Summary:"
|
||||
echo "-------"
|
||||
echo "Total Evaluations: $TOTAL_EVALS"
|
||||
echo "Total Metrics: $TOTAL_METRICS"
|
||||
echo "Passed Metrics: $PASSED_METRICS"
|
||||
echo "Failed Metrics: $FAILED_METRICS"
|
||||
|
||||
# Set exit code based on failures
|
||||
if [ "$FAILED_METRICS" -gt 0 ]; then
|
||||
echo "❌ Benchmark has $FAILED_METRICS failures"
|
||||
exit 1
|
||||
else
|
||||
echo "✅ All metrics passed successfully"
|
||||
exit 0
|
||||
fi
|
||||
286
scripts/run-benchmarks.sh
Executable file
286
scripts/run-benchmarks.sh
Executable file
@@ -0,0 +1,286 @@
|
||||
#!/usr/bin/env bash
|
||||
# run-benchmarks.sh - Script to run goose benchmarks across multiple provider:model pairs
|
||||
|
||||
set -e
|
||||
|
||||
# Display usage information
|
||||
function show_usage() {
|
||||
echo "Usage: $0 [options]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " -p, --provider-models Comma-separated list of provider:model pairs (e.g., 'openai:gpt-4o,anthropic:claude-3-5-sonnet')"
|
||||
echo " -s, --suites Comma-separated list of benchmark suites to run (e.g., 'core,small_models')"
|
||||
echo " -o, --output-dir Directory to store benchmark results (default: './benchmark-results')"
|
||||
echo " -d, --debug Use debug build instead of release build"
|
||||
echo " -h, --help Show this help message"
|
||||
echo ""
|
||||
echo "Example:"
|
||||
echo " $0 --provider-models 'openai:gpt-4o,anthropic:claude-3-5-sonnet' --suites 'core,small_models'"
|
||||
}
|
||||
|
||||
# Parse command line arguments
|
||||
PROVIDER_MODELS=""
|
||||
SUITES=""
|
||||
OUTPUT_DIR="./benchmark-results"
|
||||
DEBUG_MODE=false
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-p|--provider-models)
|
||||
PROVIDER_MODELS="$2"
|
||||
shift 2
|
||||
;;
|
||||
-s|--suites)
|
||||
SUITES="$2"
|
||||
shift 2
|
||||
;;
|
||||
-o|--output-dir)
|
||||
OUTPUT_DIR="$2"
|
||||
shift 2
|
||||
;;
|
||||
-d|--debug)
|
||||
DEBUG_MODE=true
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
show_usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Error: Unknown option: $1"
|
||||
show_usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Validate required parameters
|
||||
if [[ -z "$PROVIDER_MODELS" ]]; then
|
||||
echo "Error: Provider-model pairs must be specified"
|
||||
show_usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -z "$SUITES" ]]; then
|
||||
echo "Error: Benchmark suites must be specified"
|
||||
show_usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create output directory
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
|
||||
# Create a results summary file
|
||||
SUMMARY_FILE="$OUTPUT_DIR/summary.md"
|
||||
echo "# Benchmark Results Summary" > "$SUMMARY_FILE"
|
||||
echo "Run date: $(date)" >> "$SUMMARY_FILE"
|
||||
echo "Suites: $SUITES" >> "$SUMMARY_FILE"
|
||||
if [ "$DEBUG_MODE" = true ]; then
|
||||
echo "Mode: Debug" >> "$SUMMARY_FILE"
|
||||
else
|
||||
echo "Mode: Release" >> "$SUMMARY_FILE"
|
||||
fi
|
||||
echo "" >> "$SUMMARY_FILE"
|
||||
|
||||
# Determine which binary to use
|
||||
GOOSE_CMD="goose"
|
||||
if [ "$DEBUG_MODE" = true ]; then
|
||||
if [ -f "./target/debug/goose" ]; then
|
||||
GOOSE_CMD="./target/debug/goose"
|
||||
echo "Using debug binary: $GOOSE_CMD"
|
||||
else
|
||||
echo "Warning: Debug binary not found at ./target/debug/goose. Falling back to system-installed goose."
|
||||
fi
|
||||
else
|
||||
if [ -f "./target/release/goose" ]; then
|
||||
GOOSE_CMD="./target/release/goose"
|
||||
echo "Using release binary: $GOOSE_CMD"
|
||||
else
|
||||
echo "Warning: Release binary not found at ./target/release/goose. Falling back to system-installed goose."
|
||||
fi
|
||||
fi
|
||||
|
||||
# Parse provider:model pairs
|
||||
PROVIDERS=()
|
||||
MODELS=()
|
||||
|
||||
# Read provider:model pairs
|
||||
IFS=',' read -ra PAIRS <<< "$PROVIDER_MODELS"
|
||||
for pair in "${PAIRS[@]}"; do
|
||||
# Split by colon
|
||||
IFS=':' read -r provider model <<< "$pair"
|
||||
if [[ -n "$provider" && -n "$model" ]]; then
|
||||
PROVIDERS+=("$provider")
|
||||
MODELS+=("$model")
|
||||
else
|
||||
echo "Warning: Invalid provider:model pair: $pair. Skipping."
|
||||
fi
|
||||
done
|
||||
|
||||
# Track overall success
|
||||
OVERALL_SUCCESS=true
|
||||
COUNT=${#PROVIDERS[@]}
|
||||
|
||||
echo "Running benchmarks for $COUNT provider:model pairs..."
|
||||
echo "Benchmark suites: $SUITES"
|
||||
echo ""
|
||||
|
||||
# Loop through each provider-model pair
|
||||
for ((i=0; i<$COUNT; i++)); do
|
||||
provider="${PROVIDERS[i]}"
|
||||
model="${MODELS[i]}"
|
||||
|
||||
echo "=========================================================="
|
||||
echo "Provider: $provider, Model: $model"
|
||||
echo "=========================================================="
|
||||
|
||||
echo "## Provider: $provider, Model: $model" >> "$SUMMARY_FILE"
|
||||
|
||||
# Set environment variables for this provider/model instead of using configure
|
||||
export GOOSE_PROVIDER="$provider"
|
||||
export GOOSE_MODEL="$model"
|
||||
|
||||
# Run the benchmark and save results to JSON
|
||||
echo "Running benchmark for $provider/$model with suites: $SUITES"
|
||||
OUTPUT_FILE="$OUTPUT_DIR/${provider}-${model}.json"
|
||||
ANALYSIS_FILE="$OUTPUT_DIR/${provider}-${model}-analysis.txt"
|
||||
|
||||
if $GOOSE_CMD bench --suites "$SUITES" --output "$OUTPUT_FILE" --format json; then
|
||||
echo "✅ Benchmark completed successfully" | tee -a "$SUMMARY_FILE"
|
||||
|
||||
# Parse the JSON to check for failures
|
||||
if [ -f "$OUTPUT_FILE" ]; then
|
||||
# Check if jq is installed
|
||||
if ! command -v jq &> /dev/null; then
|
||||
echo "Warning: jq not found. Cannot parse JSON results."
|
||||
echo "⚠️ Could not parse results (jq not installed)" >> "$SUMMARY_FILE"
|
||||
else
|
||||
# Basic validation of the JSON file
|
||||
if jq empty "$OUTPUT_FILE" 2>/dev/null; then
|
||||
# Extract basic information
|
||||
PROVIDER_NAME=$(jq -r '.provider' "$OUTPUT_FILE")
|
||||
START_TIME=$(jq -r '.start_time' "$OUTPUT_FILE")
|
||||
SUITE_COUNT=$(jq '.suites | length' "$OUTPUT_FILE")
|
||||
|
||||
echo "Benchmark Results Analysis" > "$ANALYSIS_FILE"
|
||||
echo "-------------------------" >> "$ANALYSIS_FILE"
|
||||
echo "Provider: $PROVIDER_NAME" >> "$ANALYSIS_FILE"
|
||||
echo "Start Time: $START_TIME" >> "$ANALYSIS_FILE"
|
||||
echo "Number of Suites: $SUITE_COUNT" >> "$ANALYSIS_FILE"
|
||||
echo "" >> "$ANALYSIS_FILE"
|
||||
|
||||
# Initialize counters
|
||||
TOTAL_EVALS=0
|
||||
TOTAL_METRICS=0
|
||||
FAILED_METRICS=0
|
||||
PASSED_METRICS=0
|
||||
TOTAL_ERRORS=0
|
||||
|
||||
# Process each suite
|
||||
for j in $(seq 0 $((SUITE_COUNT-1))); do
|
||||
SUITE_NAME=$(jq -r ".suites[$j].name" "$OUTPUT_FILE")
|
||||
EVAL_COUNT=$(jq ".suites[$j].evaluations | length" "$OUTPUT_FILE")
|
||||
TOTAL_EVALS=$((TOTAL_EVALS + EVAL_COUNT))
|
||||
|
||||
echo "Suite: $SUITE_NAME ($EVAL_COUNT evaluations)" >> "$ANALYSIS_FILE"
|
||||
|
||||
# Process each evaluation in this suite
|
||||
for k in $(seq 0 $((EVAL_COUNT-1))); do
|
||||
EVAL_NAME=$(jq -r ".suites[$j].evaluations[$k].name" "$OUTPUT_FILE")
|
||||
METRIC_COUNT=$(jq ".suites[$j].evaluations[$k].metrics | length" "$OUTPUT_FILE")
|
||||
TOTAL_METRICS=$((TOTAL_METRICS + METRIC_COUNT))
|
||||
|
||||
# Check for errors in this evaluation
|
||||
ERROR_COUNT=$(jq ".suites[$j].evaluations[$k].errors | length" "$OUTPUT_FILE")
|
||||
TOTAL_ERRORS=$((TOTAL_ERRORS + ERROR_COUNT))
|
||||
|
||||
# Check for failures in metrics
|
||||
FAILURES=$(jq -r ".suites[$j].evaluations[$k].metrics[] |
|
||||
select(
|
||||
.[1].Boolean == false or .[1].Boolean == \"false\" or .[1].Boolean == 0 or .[1].Boolean == \"0\"
|
||||
) | .[0]" "$OUTPUT_FILE" | wc -l | tr -d ' ')
|
||||
|
||||
if [ "$FAILURES" -gt 0 ] || [ "$ERROR_COUNT" -gt 0 ]; then
|
||||
FAILED_METRICS=$((FAILED_METRICS + FAILURES))
|
||||
echo " ❌ $EVAL_NAME:" >> "$ANALYSIS_FILE"
|
||||
|
||||
if [ "$FAILURES" -gt 0 ]; then
|
||||
echo " - $FAILURES metric failures detected" >> "$ANALYSIS_FILE"
|
||||
# Print the specific failing metrics
|
||||
FAILING_METRICS=$(jq -r ".suites[$j].evaluations[$k].metrics[] |
|
||||
select(
|
||||
.[1].Boolean == false or .[1].Boolean == \"false\" or .[1].Boolean == 0 or .[1].Boolean == \"0\"
|
||||
) | .[0]" "$OUTPUT_FILE")
|
||||
echo " Failed metrics:" >> "$ANALYSIS_FILE"
|
||||
echo "$FAILING_METRICS" | sed 's/^/ - /' >> "$ANALYSIS_FILE"
|
||||
fi
|
||||
|
||||
if [ "$ERROR_COUNT" -gt 0 ]; then
|
||||
echo " - $ERROR_COUNT errors detected" >> "$ANALYSIS_FILE"
|
||||
# Print the errors
|
||||
jq -r ".suites[$j].evaluations[$k].errors[] | \" [\(.level)] \(.message)\"" "$OUTPUT_FILE" >> "$ANALYSIS_FILE"
|
||||
fi
|
||||
else
|
||||
PASSED_METRICS=$((PASSED_METRICS + METRIC_COUNT))
|
||||
echo " ✅ $EVAL_NAME: All metrics passed, no errors" >> "$ANALYSIS_FILE"
|
||||
fi
|
||||
done
|
||||
echo "" >> "$ANALYSIS_FILE"
|
||||
done
|
||||
|
||||
# Print summary
|
||||
echo "Summary:" >> "$ANALYSIS_FILE"
|
||||
echo "-------" >> "$ANALYSIS_FILE"
|
||||
echo "Total Evaluations: $TOTAL_EVALS" >> "$ANALYSIS_FILE"
|
||||
echo "Total Metrics: $TOTAL_METRICS" >> "$ANALYSIS_FILE"
|
||||
echo "Passed Metrics: $PASSED_METRICS" >> "$ANALYSIS_FILE"
|
||||
echo "Failed Metrics: $FAILED_METRICS" >> "$ANALYSIS_FILE"
|
||||
echo "Total Errors: $TOTAL_ERRORS" >> "$ANALYSIS_FILE"
|
||||
|
||||
# Determine success/failure
|
||||
if [ "$FAILED_METRICS" -gt 0 ] || [ "$TOTAL_ERRORS" -gt 0 ]; then
|
||||
if [ "$FAILED_METRICS" -gt 0 ]; then
|
||||
echo "❌ Benchmark has $FAILED_METRICS failed metrics" >> "$ANALYSIS_FILE"
|
||||
fi
|
||||
if [ "$TOTAL_ERRORS" -gt 0 ]; then
|
||||
echo "❌ Benchmark has $TOTAL_ERRORS errors" >> "$ANALYSIS_FILE"
|
||||
fi
|
||||
echo "❌ Tests failed for $provider/$model" | tee -a "$SUMMARY_FILE"
|
||||
cat "$ANALYSIS_FILE" >> "$SUMMARY_FILE"
|
||||
OVERALL_SUCCESS=false
|
||||
else
|
||||
echo "✅ All metrics passed successfully, no errors" >> "$ANALYSIS_FILE"
|
||||
echo "✅ All tests passed for $provider/$model" | tee -a "$SUMMARY_FILE"
|
||||
cat "$ANALYSIS_FILE" >> "$SUMMARY_FILE"
|
||||
fi
|
||||
else
|
||||
echo "❌ Invalid JSON in benchmark output" | tee -a "$SUMMARY_FILE"
|
||||
OVERALL_SUCCESS=false
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo "❌ Benchmark output file not found" | tee -a "$SUMMARY_FILE"
|
||||
OVERALL_SUCCESS=false
|
||||
fi
|
||||
else
|
||||
echo "❌ Benchmark failed to run" | tee -a "$SUMMARY_FILE"
|
||||
OVERALL_SUCCESS=false
|
||||
fi
|
||||
|
||||
echo "" >> "$SUMMARY_FILE"
|
||||
echo ""
|
||||
done
|
||||
|
||||
echo "=========================================================="
|
||||
echo "Benchmark run completed"
|
||||
echo "Results saved to: $OUTPUT_DIR"
|
||||
echo "Summary file: $SUMMARY_FILE"
|
||||
|
||||
# Output final status
|
||||
if [ "$OVERALL_SUCCESS" = false ]; then
|
||||
echo "❌ Some benchmarks failed. Check the summary for details."
|
||||
exit 1
|
||||
else
|
||||
echo "✅ All benchmarks completed successfully."
|
||||
exit 0
|
||||
fi
|
||||
Reference in New Issue
Block a user