feat: goose bench framework for functional and regression testing

Co-authored-by: Zaki Ali <zaki@squareup.com>
This commit is contained in:
marcelle
2025-03-05 21:23:00 -05:00
committed by GitHub
parent 24210d7b7b
commit 49dee048e4
31 changed files with 89381 additions and 2 deletions

3
.gitignore vendored
View File

@@ -36,3 +36,6 @@ debug_*.txt
# Generated files
.docusaurus
.cache-loader
# Benchmark paths
benchmark-*

21
Cargo.lock generated
View File

@@ -2199,11 +2199,31 @@ dependencies = [
"wiremock",
]
[[package]]
name = "goose-bench"
version = "1.0.10"
dependencies = [
"anyhow",
"async-trait",
"chrono",
"ctor",
"goose",
"mcp-core",
"paste",
"serde",
"serde_json",
"tokio",
"tracing",
"tracing-subscriber",
"winapi",
]
[[package]]
name = "goose-cli"
version = "1.0.12"
dependencies = [
"anyhow",
"async-trait",
"bat",
"chrono",
"clap",
@@ -2212,6 +2232,7 @@ dependencies = [
"etcetera",
"futures",
"goose",
"goose-bench",
"goose-mcp",
"mcp-client",
"mcp-core",

View File

@@ -0,0 +1,26 @@
[package]
name = "goose-bench"
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
repository.workspace = true
description.workspace = true
[dependencies]
anyhow = "1.0"
paste = "1.0"
ctor = "0.2.7"
goose = { path = "../goose" }
mcp-core = { path = "../mcp-core" }
async-trait = "0.1.86"
chrono = { version = "0.4", features = ["serde"] }
serde_json = "1.0"
serde = { version = "1.0", features = ["derive"] }
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["registry"] }
tokio = { version = "1.0", features = ["full"] }
[target.'cfg(target_os = "windows")'.dependencies]
winapi = { version = "0.3", features = ["wincred"] }

View File

@@ -0,0 +1,15 @@
diff --git a/kubernetes_swagger.json b/kubernetes_swagger.json
index 3e11d92..859a63e 100644
--- a/kubernetes_swagger.json
+++ b/kubernetes_swagger.json
@@ -371,8 +371,8 @@
},
"type": "object"
},
- "io.k8s.api.admissionregistration.v1.ServiceReference": {
- "description": "ServiceReference holds a reference to Service.legacy.k8s.io",
+ "io.k8s.api.admissionregistration.v1.FakeServiceReference": {
+ "description": "FakeServiceReference simulates a reference to a fake service for testing purposes.",
"properties": {
"name": {
"description": "`name` is the name of the service. Required",

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,458 @@
diff --git a/vscode_config_registry.ts b/vscode_config_registry.ts
index d2ba316..1834518 100644
--- a/vscode_config_registry.ts
+++ b/vscode_config_registry.ts
@@ -23,68 +23,68 @@ export const Extensions = {
Configuration: 'base.contributions.configuration'
};
-export interface IConfigurationDelta {
- removedDefaults?: IConfigurationDefaults[];
- removedConfigurations?: IConfigurationNode[];
- addedDefaults?: IConfigurationDefaults[];
- addedConfigurations?: IConfigurationNode[];
+export interface PConfigurationDelta {
+ removedDefaults?: PConfigurationDefaults[];
+ removedConfigurations?: PConfigurationNode[];
+ addedDefaults?: PConfigurationDefaults[];
+ addedConfigurations?: PConfigurationNode[];
}
-export interface IConfigurationRegistry {
+export interface PConfigurationRegistry {
/**
* Register a configuration to the registry.
*/
- registerConfiguration(configuration: IConfigurationNode): void;
+ registerConfiguration(configuration: PConfigurationNode): void;
/**
* Register multiple configurations to the registry.
*/
- registerConfigurations(configurations: IConfigurationNode[], validate?: boolean): void;
+ registerConfigurations(configurations: PConfigurationNode[], validate?: boolean): void;
/**
* Deregister multiple configurations from the registry.
*/
- deregisterConfigurations(configurations: IConfigurationNode[]): void;
+ deregisterConfigurations(configurations: PConfigurationNode[]): void;
/**
* update the configuration registry by
* - registering the configurations to add
* - dereigstering the configurations to remove
*/
- updateConfigurations(configurations: { add: IConfigurationNode[]; remove: IConfigurationNode[] }): void;
+ updateConfigurations(configurations: { add: PConfigurationNode[]; remove: PConfigurationNode[] }): void;
/**
* Register multiple default configurations to the registry.
*/
- registerDefaultConfigurations(defaultConfigurations: IConfigurationDefaults[]): void;
+ registerDefaultConfigurations(defaultConfigurations: PConfigurationDefaults[]): void;
/**
* Deregister multiple default configurations from the registry.
*/
- deregisterDefaultConfigurations(defaultConfigurations: IConfigurationDefaults[]): void;
+ deregisterDefaultConfigurations(defaultConfigurations: PConfigurationDefaults[]): void;
/**
* Bulk update of the configuration registry (default and configurations, remove and add)
* @param delta
*/
- deltaConfiguration(delta: IConfigurationDelta): void;
+ deltaConfiguration(delta: PConfigurationDelta): void;
/**
* Return the registered default configurations
*/
- getRegisteredDefaultConfigurations(): IConfigurationDefaults[];
+ getRegisteredDefaultConfigurations(): PConfigurationDefaults[];
/**
* Return the registered configuration defaults overrides
*/
- getConfigurationDefaultsOverrides(): Map<string, IConfigurationDefaultOverrideValue>;
+ getConfigurationDefaultsOverrides(): Map<string, PConfigurationDefaultOverrideValue>;
/**
* Signal that the schema of a configuration setting has changes. It is currently only supported to change enumeration values.
* Property or default value changes are not allowed.
*/
- notifyConfigurationSchemaUpdated(...configurations: IConfigurationNode[]): void;
+ notifyConfigurationSchemaUpdated(...configurations: PConfigurationNode[]): void;
/**
* Event that fires whenever a configuration has been
@@ -101,12 +101,12 @@ export interface IConfigurationRegistry {
/**
* Returns all configuration nodes contributed to this registry.
*/
- getConfigurations(): IConfigurationNode[];
+ getConfigurations(): PConfigurationNode[];
/**
* Returns all configurations settings of all configuration nodes contributed to this registry.
*/
- getConfigurationProperties(): IStringDictionary<IRegisteredConfigurationPropertySchema>;
+ getConfigurationProperties(): IStringDictionary<PRegisteredConfigurationPropertySchema>;
/**
* Return all configurations by policy name
@@ -116,7 +116,7 @@ export interface IConfigurationRegistry {
/**
* Returns all excluded configurations settings of all configuration nodes contributed to this registry.
*/
- getExcludedConfigurationProperties(): IStringDictionary<IRegisteredConfigurationPropertySchema>;
+ getExcludedConfigurationProperties(): IStringDictionary<PRegisteredConfigurationPropertySchema>;
/**
* Register the identifiers for editor configurations
@@ -168,7 +168,7 @@ export interface IPolicy {
readonly minimumVersion: `${number}.${number}`;
}
-export interface IConfigurationPropertySchema extends IJSONSchema {
+export interface PConfigurationPropertySchema extends IJSONSchema {
scope?: ConfigurationScope;
@@ -235,14 +235,14 @@ export interface IExtensionInfo {
displayName?: string;
}
-export interface IConfigurationNode {
+export interface PConfigurationNode {
id?: string;
order?: number;
type?: string | string[];
title?: string;
description?: string;
- properties?: IStringDictionary<IConfigurationPropertySchema>;
- allOf?: IConfigurationNode[];
+ properties?: IStringDictionary<PConfigurationPropertySchema>;
+ allOf?: PConfigurationNode[];
scope?: ConfigurationScope;
extensionInfo?: IExtensionInfo;
restrictedProperties?: string[];
@@ -250,49 +250,49 @@ export interface IConfigurationNode {
export type ConfigurationDefaultValueSource = IExtensionInfo | Map<string, IExtensionInfo>;
-export interface IConfigurationDefaults {
+export interface PConfigurationDefaults {
overrides: IStringDictionary<any>;
source?: IExtensionInfo;
}
-export type IRegisteredConfigurationPropertySchema = IConfigurationPropertySchema & {
+export type PRegisteredConfigurationPropertySchema = PConfigurationPropertySchema & {
defaultDefaultValue?: any;
source?: IExtensionInfo; // Source of the Property
defaultValueSource?: ConfigurationDefaultValueSource; // Source of the Default Value
};
-export interface IConfigurationDefaultOverride {
+export interface PConfigurationDefaultOverride {
readonly value: any;
readonly source?: IExtensionInfo; // Source of the default override
}
-export interface IConfigurationDefaultOverrideValue {
+export interface PConfigurationDefaultOverrideValue {
readonly value: any;
readonly source?: ConfigurationDefaultValueSource;
}
-export const allSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
-export const applicationSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
-export const applicationMachineSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
-export const machineSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
-export const machineOverridableSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
-export const windowSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
-export const resourceSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
+export const allSettings: { properties: IStringDictionary<PConfigurationPropertySchema>; patternProperties: IStringDictionary<PConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
+export const applicationSettings: { properties: IStringDictionary<PConfigurationPropertySchema>; patternProperties: IStringDictionary<PConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
+export const applicationMachineSettings: { properties: IStringDictionary<PConfigurationPropertySchema>; patternProperties: IStringDictionary<PConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
+export const machineSettings: { properties: IStringDictionary<PConfigurationPropertySchema>; patternProperties: IStringDictionary<PConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
+export const machineOverridableSettings: { properties: IStringDictionary<PConfigurationPropertySchema>; patternProperties: IStringDictionary<PConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
+export const windowSettings: { properties: IStringDictionary<PConfigurationPropertySchema>; patternProperties: IStringDictionary<PConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
+export const resourceSettings: { properties: IStringDictionary<PConfigurationPropertySchema>; patternProperties: IStringDictionary<PConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
export const resourceLanguageSettingsSchemaId = 'vscode://schemas/settings/resourceLanguage';
export const configurationDefaultsSchemaId = 'vscode://schemas/settings/configurationDefaults';
const contributionRegistry = Registry.as<IJSONContributionRegistry>(JSONExtensions.JSONContribution);
-class ConfigurationRegistry implements IConfigurationRegistry {
+class ConfigurationRegistry implements PConfigurationRegistry {
- private readonly registeredConfigurationDefaults: IConfigurationDefaults[] = [];
- private readonly configurationDefaultsOverrides: Map<string, { configurationDefaultOverrides: IConfigurationDefaultOverride[]; configurationDefaultOverrideValue?: IConfigurationDefaultOverrideValue }>;
- private readonly defaultLanguageConfigurationOverridesNode: IConfigurationNode;
- private readonly configurationContributors: IConfigurationNode[];
- private readonly configurationProperties: IStringDictionary<IRegisteredConfigurationPropertySchema>;
+ private readonly registeredConfigurationDefaults: PConfigurationDefaults[] = [];
+ private readonly configurationDefaultsOverrides: Map<string, { configurationDefaultOverrides: PConfigurationDefaultOverride[]; configurationDefaultOverrideValue?: PConfigurationDefaultOverrideValue }>;
+ private readonly defaultLanguageConfigurationOverridesNode: PConfigurationNode;
+ private readonly configurationContributors: PConfigurationNode[];
+ private readonly configurationProperties: IStringDictionary<PRegisteredConfigurationPropertySchema>;
private readonly policyConfigurations: Map<PolicyName, string>;
- private readonly excludedConfigurationProperties: IStringDictionary<IRegisteredConfigurationPropertySchema>;
+ private readonly excludedConfigurationProperties: IStringDictionary<PRegisteredConfigurationPropertySchema>;
private readonly resourceLanguageSettingsSchema: IJSONSchema;
private readonly overrideIdentifiers = new Set<string>();
@@ -325,11 +325,11 @@ class ConfigurationRegistry implements IConfigurationRegistry {
this.registerOverridePropertyPatternKey();
}
- public registerConfiguration(configuration: IConfigurationNode, validate: boolean = true): void {
+ public registerConfiguration(configuration: PConfigurationNode, validate: boolean = true): void {
this.registerConfigurations([configuration], validate);
}
- public registerConfigurations(configurations: IConfigurationNode[], validate: boolean = true): void {
+ public registerConfigurations(configurations: PConfigurationNode[], validate: boolean = true): void {
const properties = new Set<string>();
this.doRegisterConfigurations(configurations, validate, properties);
@@ -338,7 +338,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
this._onDidUpdateConfiguration.fire({ properties });
}
- public deregisterConfigurations(configurations: IConfigurationNode[]): void {
+ public deregisterConfigurations(configurations: PConfigurationNode[]): void {
const properties = new Set<string>();
this.doDeregisterConfigurations(configurations, properties);
@@ -347,7 +347,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
this._onDidUpdateConfiguration.fire({ properties });
}
- public updateConfigurations({ add, remove }: { add: IConfigurationNode[]; remove: IConfigurationNode[] }): void {
+ public updateConfigurations({ add, remove }: { add: PConfigurationNode[]; remove: PConfigurationNode[] }): void {
const properties = new Set<string>();
this.doDeregisterConfigurations(remove, properties);
this.doRegisterConfigurations(add, false, properties);
@@ -357,14 +357,14 @@ class ConfigurationRegistry implements IConfigurationRegistry {
this._onDidUpdateConfiguration.fire({ properties });
}
- public registerDefaultConfigurations(configurationDefaults: IConfigurationDefaults[]): void {
+ public registerDefaultConfigurations(configurationDefaults: PConfigurationDefaults[]): void {
const properties = new Set<string>();
this.doRegisterDefaultConfigurations(configurationDefaults, properties);
this._onDidSchemaChange.fire();
this._onDidUpdateConfiguration.fire({ properties, defaultsOverrides: true });
}
- private doRegisterDefaultConfigurations(configurationDefaults: IConfigurationDefaults[], bucket: Set<string>) {
+ private doRegisterDefaultConfigurations(configurationDefaults: PConfigurationDefaults[], bucket: Set<string>) {
this.registeredConfigurationDefaults.push(...configurationDefaults);
@@ -413,14 +413,14 @@ class ConfigurationRegistry implements IConfigurationRegistry {
this.doRegisterOverrideIdentifiers(overrideIdentifiers);
}
- public deregisterDefaultConfigurations(defaultConfigurations: IConfigurationDefaults[]): void {
+ public deregisterDefaultConfigurations(defaultConfigurations: PConfigurationDefaults[]): void {
const properties = new Set<string>();
this.doDeregisterDefaultConfigurations(defaultConfigurations, properties);
this._onDidSchemaChange.fire();
this._onDidUpdateConfiguration.fire({ properties, defaultsOverrides: true });
}
- private doDeregisterDefaultConfigurations(defaultConfigurations: IConfigurationDefaults[], bucket: Set<string>): void {
+ private doDeregisterDefaultConfigurations(defaultConfigurations: PConfigurationDefaults[], bucket: Set<string>): void {
for (const defaultConfiguration of defaultConfigurations) {
const index = this.registeredConfigurationDefaults.indexOf(defaultConfiguration);
if (index !== -1) {
@@ -447,7 +447,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
}
if (OVERRIDE_PROPERTY_REGEX.test(key)) {
- let configurationDefaultOverrideValue: IConfigurationDefaultOverrideValue | undefined;
+ let configurationDefaultOverrideValue: PConfigurationDefaultOverrideValue | undefined;
for (const configurationDefaultOverride of configurationDefaultOverridesForKey.configurationDefaultOverrides) {
configurationDefaultOverrideValue = this.mergeDefaultConfigurationsForOverrideIdentifier(key, configurationDefaultOverride.value, configurationDefaultOverride.source, configurationDefaultOverrideValue);
}
@@ -460,7 +460,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
delete this.defaultLanguageConfigurationOverridesNode.properties![key];
}
} else {
- let configurationDefaultOverrideValue: IConfigurationDefaultOverrideValue | undefined;
+ let configurationDefaultOverrideValue: PConfigurationDefaultOverrideValue | undefined;
for (const configurationDefaultOverride of configurationDefaultOverridesForKey.configurationDefaultOverrides) {
configurationDefaultOverrideValue = this.mergeDefaultConfigurationsForConfigurationProperty(key, configurationDefaultOverride.value, configurationDefaultOverride.source, configurationDefaultOverrideValue);
}
@@ -477,8 +477,8 @@ class ConfigurationRegistry implements IConfigurationRegistry {
this.updateOverridePropertyPatternKey();
}
- private updateDefaultOverrideProperty(key: string, newDefaultOverride: IConfigurationDefaultOverrideValue, source: IExtensionInfo | undefined): void {
- const property: IRegisteredConfigurationPropertySchema = {
+ private updateDefaultOverrideProperty(key: string, newDefaultOverride: PConfigurationDefaultOverrideValue, source: IExtensionInfo | undefined): void {
+ const property: PRegisteredConfigurationPropertySchema = {
type: 'object',
default: newDefaultOverride.value,
description: nls.localize('defaultLanguageConfiguration.description', "Configure settings to be overridden for the {0} language.", getLanguageTagSettingPlainKey(key)),
@@ -491,7 +491,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
this.defaultLanguageConfigurationOverridesNode.properties![key] = property;
}
- private mergeDefaultConfigurationsForOverrideIdentifier(overrideIdentifier: string, configurationValueObject: IStringDictionary<any>, valueSource: IExtensionInfo | undefined, existingDefaultOverride: IConfigurationDefaultOverrideValue | undefined): IConfigurationDefaultOverrideValue | undefined {
+ private mergeDefaultConfigurationsForOverrideIdentifier(overrideIdentifier: string, configurationValueObject: IStringDictionary<any>, valueSource: IExtensionInfo | undefined, existingDefaultOverride: PConfigurationDefaultOverrideValue | undefined): PConfigurationDefaultOverrideValue | undefined {
const defaultValue = existingDefaultOverride?.value || {};
const source = existingDefaultOverride?.source ?? new Map<string, IExtensionInfo>();
@@ -532,7 +532,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
return { value: defaultValue, source };
}
- private mergeDefaultConfigurationsForConfigurationProperty(propertyKey: string, value: any, valuesSource: IExtensionInfo | undefined, existingDefaultOverride: IConfigurationDefaultOverrideValue | undefined): IConfigurationDefaultOverrideValue | undefined {
+ private mergeDefaultConfigurationsForConfigurationProperty(propertyKey: string, value: any, valuesSource: IExtensionInfo | undefined, existingDefaultOverride: PConfigurationDefaultOverrideValue | undefined): PConfigurationDefaultOverrideValue | undefined {
const property = this.configurationProperties[propertyKey];
const existingDefaultValue = existingDefaultOverride?.value ?? property?.defaultDefaultValue;
let source: ConfigurationDefaultValueSource | undefined = valuesSource;
@@ -564,7 +564,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
return { value, source };
}
- public deltaConfiguration(delta: IConfigurationDelta): void {
+ public deltaConfiguration(delta: PConfigurationDelta): void {
// defaults: remove
let defaultsOverrides = false;
const properties = new Set<string>();
@@ -589,7 +589,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
this._onDidUpdateConfiguration.fire({ properties, defaultsOverrides });
}
- public notifyConfigurationSchemaUpdated(...configurations: IConfigurationNode[]) {
+ public notifyConfigurationSchemaUpdated(...configurations: PConfigurationNode[]) {
this._onDidSchemaChange.fire();
}
@@ -605,7 +605,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
this.updateOverridePropertyPatternKey();
}
- private doRegisterConfigurations(configurations: IConfigurationNode[], validate: boolean, bucket: Set<string>): void {
+ private doRegisterConfigurations(configurations: PConfigurationNode[], validate: boolean, bucket: Set<string>): void {
configurations.forEach(configuration => {
@@ -616,9 +616,9 @@ class ConfigurationRegistry implements IConfigurationRegistry {
});
}
- private doDeregisterConfigurations(configurations: IConfigurationNode[], bucket: Set<string>): void {
+ private doDeregisterConfigurations(configurations: PConfigurationNode[], bucket: Set<string>): void {
- const deregisterConfiguration = (configuration: IConfigurationNode) => {
+ const deregisterConfiguration = (configuration: PConfigurationNode) => {
if (configuration.properties) {
for (const key in configuration.properties) {
bucket.add(key);
@@ -641,12 +641,12 @@ class ConfigurationRegistry implements IConfigurationRegistry {
}
}
- private validateAndRegisterProperties(configuration: IConfigurationNode, validate: boolean = true, extensionInfo: IExtensionInfo | undefined, restrictedProperties: string[] | undefined, scope: ConfigurationScope = ConfigurationScope.WINDOW, bucket: Set<string>): void {
+ private validateAndRegisterProperties(configuration: PConfigurationNode, validate: boolean = true, extensionInfo: IExtensionInfo | undefined, restrictedProperties: string[] | undefined, scope: ConfigurationScope = ConfigurationScope.WINDOW, bucket: Set<string>): void {
scope = types.isUndefinedOrNull(configuration.scope) ? scope : configuration.scope;
const properties = configuration.properties;
if (properties) {
for (const key in properties) {
- const property: IRegisteredConfigurationPropertySchema = properties[key];
+ const property: PRegisteredConfigurationPropertySchema = properties[key];
if (validate && validateProperty(key, property)) {
delete properties[key];
continue;
@@ -696,7 +696,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
}
// TODO: @sandy081 - Remove this method and include required info in getConfigurationProperties
- getConfigurations(): IConfigurationNode[] {
+ getConfigurations(): PConfigurationNode[] {
return this.configurationContributors;
}
@@ -712,12 +712,12 @@ class ConfigurationRegistry implements IConfigurationRegistry {
return this.excludedConfigurationProperties;
}
- getRegisteredDefaultConfigurations(): IConfigurationDefaults[] {
+ getRegisteredDefaultConfigurations(): PConfigurationDefaults[] {
return [...this.registeredConfigurationDefaults];
}
- getConfigurationDefaultsOverrides(): Map<string, IConfigurationDefaultOverrideValue> {
- const configurationDefaultsOverrides = new Map<string, IConfigurationDefaultOverrideValue>();
+ getConfigurationDefaultsOverrides(): Map<string, PConfigurationDefaultOverrideValue> {
+ const configurationDefaultsOverrides = new Map<string, PConfigurationDefaultOverrideValue>();
for (const [key, value] of this.configurationDefaultsOverrides) {
if (value.configurationDefaultOverrideValue) {
configurationDefaultsOverrides.set(key, value.configurationDefaultOverrideValue);
@@ -726,8 +726,8 @@ class ConfigurationRegistry implements IConfigurationRegistry {
return configurationDefaultsOverrides;
}
- private registerJSONConfiguration(configuration: IConfigurationNode) {
- const register = (configuration: IConfigurationNode) => {
+ private registerJSONConfiguration(configuration: PConfigurationNode) {
+ const register = (configuration: PConfigurationNode) => {
const properties = configuration.properties;
if (properties) {
for (const key in properties) {
@@ -740,7 +740,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
register(configuration);
}
- private updateSchema(key: string, property: IConfigurationPropertySchema): void {
+ private updateSchema(key: string, property: PConfigurationPropertySchema): void {
allSettings.properties[key] = property;
switch (property.scope) {
case ConfigurationScope.APPLICATION:
@@ -768,7 +768,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
}
}
- private removeFromSchema(key: string, property: IConfigurationPropertySchema): void {
+ private removeFromSchema(key: string, property: PConfigurationPropertySchema): void {
delete allSettings.properties[key];
switch (property.scope) {
case ConfigurationScope.APPLICATION:
@@ -831,7 +831,7 @@ class ConfigurationRegistry implements IConfigurationRegistry {
this._onDidSchemaChange.fire();
}
- private updatePropertyDefaultValue(key: string, property: IRegisteredConfigurationPropertySchema): void {
+ private updatePropertyDefaultValue(key: string, property: PRegisteredConfigurationPropertySchema): void {
const configurationdefaultOverride = this.configurationDefaultsOverrides.get(key)?.configurationDefaultOverrideValue;
let defaultValue = undefined;
let defaultSource = undefined;
@@ -899,7 +899,7 @@ export function getDefaultValue(type: string | string[] | undefined) {
const configurationRegistry = new ConfigurationRegistry();
Registry.add(Extensions.Configuration, configurationRegistry);
-export function validateProperty(property: string, schema: IRegisteredConfigurationPropertySchema): string | null {
+export function validateProperty(property: string, schema: PRegisteredConfigurationPropertySchema): string | null {
if (!property.trim()) {
return nls.localize('config.property.empty', "Cannot register an empty property");
}
@@ -926,8 +926,8 @@ export function getScopes(): [string, ConfigurationScope | undefined][] {
return scopes;
}
-export function getAllConfigurationProperties(configurationNode: IConfigurationNode[]): IStringDictionary<IRegisteredConfigurationPropertySchema> {
- const result: IStringDictionary<IRegisteredConfigurationPropertySchema> = {};
+export function getAllConfigurationProperties(configurationNode: PConfigurationNode[]): IStringDictionary<PRegisteredConfigurationPropertySchema> {
+ const result: IStringDictionary<PRegisteredConfigurationPropertySchema> = {};
for (const configuration of configurationNode) {
const properties = configuration.properties;
if (types.isObject(properties)) {

View File

@@ -0,0 +1,960 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { distinct } from '../../../base/common/arrays.js';
import { IStringDictionary } from '../../../base/common/collections.js';
import { Emitter, Event } from '../../../base/common/event.js';
import { IJSONSchema } from '../../../base/common/jsonSchema.js';
import * as types from '../../../base/common/types.js';
import * as nls from '../../../nls.js';
import { getLanguageTagSettingPlainKey } from './configuration.js';
import { Extensions as JSONExtensions, IJSONContributionRegistry } from '../../jsonschemas/common/jsonContributionRegistry.js';
import { PolicyName } from '../../policy/common/policy.js';
import { Registry } from '../../registry/common/platform.js';
export enum EditPresentationTypes {
Multiline = 'multilineText',
Singleline = 'singlelineText'
}
export const Extensions = {
Configuration: 'base.contributions.configuration'
};
export interface IConfigurationDelta {
removedDefaults?: IConfigurationDefaults[];
removedConfigurations?: IConfigurationNode[];
addedDefaults?: IConfigurationDefaults[];
addedConfigurations?: IConfigurationNode[];
}
export interface IConfigurationRegistry {
/**
* Register a configuration to the registry.
*/
registerConfiguration(configuration: IConfigurationNode): void;
/**
* Register multiple configurations to the registry.
*/
registerConfigurations(configurations: IConfigurationNode[], validate?: boolean): void;
/**
* Deregister multiple configurations from the registry.
*/
deregisterConfigurations(configurations: IConfigurationNode[]): void;
/**
* update the configuration registry by
* - registering the configurations to add
* - dereigstering the configurations to remove
*/
updateConfigurations(configurations: { add: IConfigurationNode[]; remove: IConfigurationNode[] }): void;
/**
* Register multiple default configurations to the registry.
*/
registerDefaultConfigurations(defaultConfigurations: IConfigurationDefaults[]): void;
/**
* Deregister multiple default configurations from the registry.
*/
deregisterDefaultConfigurations(defaultConfigurations: IConfigurationDefaults[]): void;
/**
* Bulk update of the configuration registry (default and configurations, remove and add)
* @param delta
*/
deltaConfiguration(delta: IConfigurationDelta): void;
/**
* Return the registered default configurations
*/
getRegisteredDefaultConfigurations(): IConfigurationDefaults[];
/**
* Return the registered configuration defaults overrides
*/
getConfigurationDefaultsOverrides(): Map<string, IConfigurationDefaultOverrideValue>;
/**
* Signal that the schema of a configuration setting has changes. It is currently only supported to change enumeration values.
* Property or default value changes are not allowed.
*/
notifyConfigurationSchemaUpdated(...configurations: IConfigurationNode[]): void;
/**
* Event that fires whenever a configuration has been
* registered.
*/
readonly onDidSchemaChange: Event<void>;
/**
* Event that fires whenever a configuration has been
* registered.
*/
readonly onDidUpdateConfiguration: Event<{ properties: ReadonlySet<string>; defaultsOverrides?: boolean }>;
/**
* Returns all configuration nodes contributed to this registry.
*/
getConfigurations(): IConfigurationNode[];
/**
* Returns all configurations settings of all configuration nodes contributed to this registry.
*/
getConfigurationProperties(): IStringDictionary<IRegisteredConfigurationPropertySchema>;
/**
* Return all configurations by policy name
*/
getPolicyConfigurations(): Map<PolicyName, string>;
/**
* Returns all excluded configurations settings of all configuration nodes contributed to this registry.
*/
getExcludedConfigurationProperties(): IStringDictionary<IRegisteredConfigurationPropertySchema>;
/**
* Register the identifiers for editor configurations
*/
registerOverrideIdentifiers(identifiers: string[]): void;
}
export const enum ConfigurationScope {
/**
* Application specific configuration, which can be configured only in default profile user settings.
*/
APPLICATION = 1,
/**
* Machine specific configuration, which can be configured only in local and remote user settings.
*/
MACHINE,
/**
* An application machine specific configuration, which can be configured only in default profile user settings and remote user settings.
*/
APPLICATION_MACHINE,
/**
* Window specific configuration, which can be configured in the user or workspace settings.
*/
WINDOW,
/**
* Resource specific configuration, which can be configured in the user, workspace or folder settings.
*/
RESOURCE,
/**
* Resource specific configuration that can be configured in language specific settings
*/
LANGUAGE_OVERRIDABLE,
/**
* Machine specific configuration that can also be configured in workspace or folder settings.
*/
MACHINE_OVERRIDABLE,
}
export interface IPolicy {
/**
* The policy name.
*/
readonly name: PolicyName;
/**
* The Code version in which this policy was introduced.
*/
readonly minimumVersion: `${number}.${number}`;
}
export interface IConfigurationPropertySchema extends IJSONSchema {
scope?: ConfigurationScope;
/**
* When restricted, value of this configuration will be read only from trusted sources.
* For eg., If the workspace is not trusted, then the value of this configuration is not read from workspace settings file.
*/
restricted?: boolean;
/**
* When `false` this property is excluded from the registry. Default is to include.
*/
included?: boolean;
/**
* List of tags associated to the property.
* - A tag can be used for filtering
* - Use `experimental` tag for marking the setting as experimental.
* - Use `onExP` tag for marking that the default of the setting can be changed by running experiments.
*/
tags?: string[];
/**
* When enabled this setting is ignored during sync and user can override this.
*/
ignoreSync?: boolean;
/**
* When enabled this setting is ignored during sync and user cannot override this.
*/
disallowSyncIgnore?: boolean;
/**
* Disallow extensions to contribute configuration default value for this setting.
*/
disallowConfigurationDefault?: boolean;
/**
* Labels for enumeration items
*/
enumItemLabels?: string[];
/**
* When specified, controls the presentation format of string settings.
* Otherwise, the presentation format defaults to `singleline`.
*/
editPresentation?: EditPresentationTypes;
/**
* When specified, gives an order number for the setting
* within the settings editor. Otherwise, the setting is placed at the end.
*/
order?: number;
/**
* When specified, this setting's value can always be overwritten by
* a system-wide policy.
*/
policy?: IPolicy;
}
export interface IExtensionInfo {
id: string;
displayName?: string;
}
export interface IConfigurationNode {
id?: string;
order?: number;
type?: string | string[];
title?: string;
description?: string;
properties?: IStringDictionary<IConfigurationPropertySchema>;
allOf?: IConfigurationNode[];
scope?: ConfigurationScope;
extensionInfo?: IExtensionInfo;
restrictedProperties?: string[];
}
export type ConfigurationDefaultValueSource = IExtensionInfo | Map<string, IExtensionInfo>;
export interface IConfigurationDefaults {
overrides: IStringDictionary<any>;
source?: IExtensionInfo;
}
export type IRegisteredConfigurationPropertySchema = IConfigurationPropertySchema & {
defaultDefaultValue?: any;
source?: IExtensionInfo; // Source of the Property
defaultValueSource?: ConfigurationDefaultValueSource; // Source of the Default Value
};
export interface IConfigurationDefaultOverride {
readonly value: any;
readonly source?: IExtensionInfo; // Source of the default override
}
export interface IConfigurationDefaultOverrideValue {
readonly value: any;
readonly source?: ConfigurationDefaultValueSource;
}
export const allSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
export const applicationSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
export const applicationMachineSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
export const machineSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
export const machineOverridableSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
export const windowSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
export const resourceSettings: { properties: IStringDictionary<IConfigurationPropertySchema>; patternProperties: IStringDictionary<IConfigurationPropertySchema> } = { properties: {}, patternProperties: {} };
export const resourceLanguageSettingsSchemaId = 'vscode://schemas/settings/resourceLanguage';
export const configurationDefaultsSchemaId = 'vscode://schemas/settings/configurationDefaults';
const contributionRegistry = Registry.as<IJSONContributionRegistry>(JSONExtensions.JSONContribution);
class ConfigurationRegistry implements IConfigurationRegistry {
private readonly registeredConfigurationDefaults: IConfigurationDefaults[] = [];
private readonly configurationDefaultsOverrides: Map<string, { configurationDefaultOverrides: IConfigurationDefaultOverride[]; configurationDefaultOverrideValue?: IConfigurationDefaultOverrideValue }>;
private readonly defaultLanguageConfigurationOverridesNode: IConfigurationNode;
private readonly configurationContributors: IConfigurationNode[];
private readonly configurationProperties: IStringDictionary<IRegisteredConfigurationPropertySchema>;
private readonly policyConfigurations: Map<PolicyName, string>;
private readonly excludedConfigurationProperties: IStringDictionary<IRegisteredConfigurationPropertySchema>;
private readonly resourceLanguageSettingsSchema: IJSONSchema;
private readonly overrideIdentifiers = new Set<string>();
private readonly _onDidSchemaChange = new Emitter<void>();
readonly onDidSchemaChange: Event<void> = this._onDidSchemaChange.event;
private readonly _onDidUpdateConfiguration = new Emitter<{ properties: ReadonlySet<string>; defaultsOverrides?: boolean }>();
readonly onDidUpdateConfiguration = this._onDidUpdateConfiguration.event;
constructor() {
this.configurationDefaultsOverrides = new Map();
this.defaultLanguageConfigurationOverridesNode = {
id: 'defaultOverrides',
title: nls.localize('defaultLanguageConfigurationOverrides.title', "Default Language Configuration Overrides"),
properties: {}
};
this.configurationContributors = [this.defaultLanguageConfigurationOverridesNode];
this.resourceLanguageSettingsSchema = {
properties: {},
patternProperties: {},
additionalProperties: true,
allowTrailingCommas: true,
allowComments: true
};
this.configurationProperties = {};
this.policyConfigurations = new Map<PolicyName, string>();
this.excludedConfigurationProperties = {};
contributionRegistry.registerSchema(resourceLanguageSettingsSchemaId, this.resourceLanguageSettingsSchema);
this.registerOverridePropertyPatternKey();
}
public registerConfiguration(configuration: IConfigurationNode, validate: boolean = true): void {
this.registerConfigurations([configuration], validate);
}
public registerConfigurations(configurations: IConfigurationNode[], validate: boolean = true): void {
const properties = new Set<string>();
this.doRegisterConfigurations(configurations, validate, properties);
contributionRegistry.registerSchema(resourceLanguageSettingsSchemaId, this.resourceLanguageSettingsSchema);
this._onDidSchemaChange.fire();
this._onDidUpdateConfiguration.fire({ properties });
}
public deregisterConfigurations(configurations: IConfigurationNode[]): void {
const properties = new Set<string>();
this.doDeregisterConfigurations(configurations, properties);
contributionRegistry.registerSchema(resourceLanguageSettingsSchemaId, this.resourceLanguageSettingsSchema);
this._onDidSchemaChange.fire();
this._onDidUpdateConfiguration.fire({ properties });
}
public updateConfigurations({ add, remove }: { add: IConfigurationNode[]; remove: IConfigurationNode[] }): void {
const properties = new Set<string>();
this.doDeregisterConfigurations(remove, properties);
this.doRegisterConfigurations(add, false, properties);
contributionRegistry.registerSchema(resourceLanguageSettingsSchemaId, this.resourceLanguageSettingsSchema);
this._onDidSchemaChange.fire();
this._onDidUpdateConfiguration.fire({ properties });
}
public registerDefaultConfigurations(configurationDefaults: IConfigurationDefaults[]): void {
const properties = new Set<string>();
this.doRegisterDefaultConfigurations(configurationDefaults, properties);
this._onDidSchemaChange.fire();
this._onDidUpdateConfiguration.fire({ properties, defaultsOverrides: true });
}
private doRegisterDefaultConfigurations(configurationDefaults: IConfigurationDefaults[], bucket: Set<string>) {
this.registeredConfigurationDefaults.push(...configurationDefaults);
const overrideIdentifiers: string[] = [];
for (const { overrides, source } of configurationDefaults) {
for (const key in overrides) {
bucket.add(key);
const configurationDefaultOverridesForKey = this.configurationDefaultsOverrides.get(key)
?? this.configurationDefaultsOverrides.set(key, { configurationDefaultOverrides: [] }).get(key)!;
const value = overrides[key];
configurationDefaultOverridesForKey.configurationDefaultOverrides.push({ value, source });
// Configuration defaults for Override Identifiers
if (OVERRIDE_PROPERTY_REGEX.test(key)) {
const newDefaultOverride = this.mergeDefaultConfigurationsForOverrideIdentifier(key, value, source, configurationDefaultOverridesForKey.configurationDefaultOverrideValue);
if (!newDefaultOverride) {
continue;
}
configurationDefaultOverridesForKey.configurationDefaultOverrideValue = newDefaultOverride;
this.updateDefaultOverrideProperty(key, newDefaultOverride, source);
overrideIdentifiers.push(...overrideIdentifiersFromKey(key));
}
// Configuration defaults for Configuration Properties
else {
const newDefaultOverride = this.mergeDefaultConfigurationsForConfigurationProperty(key, value, source, configurationDefaultOverridesForKey.configurationDefaultOverrideValue);
if (!newDefaultOverride) {
continue;
}
configurationDefaultOverridesForKey.configurationDefaultOverrideValue = newDefaultOverride;
const property = this.configurationProperties[key];
if (property) {
this.updatePropertyDefaultValue(key, property);
this.updateSchema(key, property);
}
}
}
}
this.doRegisterOverrideIdentifiers(overrideIdentifiers);
}
public deregisterDefaultConfigurations(defaultConfigurations: IConfigurationDefaults[]): void {
const properties = new Set<string>();
this.doDeregisterDefaultConfigurations(defaultConfigurations, properties);
this._onDidSchemaChange.fire();
this._onDidUpdateConfiguration.fire({ properties, defaultsOverrides: true });
}
private doDeregisterDefaultConfigurations(defaultConfigurations: IConfigurationDefaults[], bucket: Set<string>): void {
for (const defaultConfiguration of defaultConfigurations) {
const index = this.registeredConfigurationDefaults.indexOf(defaultConfiguration);
if (index !== -1) {
this.registeredConfigurationDefaults.splice(index, 1);
}
}
for (const { overrides, source } of defaultConfigurations) {
for (const key in overrides) {
const configurationDefaultOverridesForKey = this.configurationDefaultsOverrides.get(key);
if (!configurationDefaultOverridesForKey) {
continue;
}
const index = configurationDefaultOverridesForKey.configurationDefaultOverrides
.findIndex(configurationDefaultOverride => source ? configurationDefaultOverride.source?.id === source.id : configurationDefaultOverride.value === overrides[key]);
if (index === -1) {
continue;
}
configurationDefaultOverridesForKey.configurationDefaultOverrides.splice(index, 1);
if (configurationDefaultOverridesForKey.configurationDefaultOverrides.length === 0) {
this.configurationDefaultsOverrides.delete(key);
}
if (OVERRIDE_PROPERTY_REGEX.test(key)) {
let configurationDefaultOverrideValue: IConfigurationDefaultOverrideValue | undefined;
for (const configurationDefaultOverride of configurationDefaultOverridesForKey.configurationDefaultOverrides) {
configurationDefaultOverrideValue = this.mergeDefaultConfigurationsForOverrideIdentifier(key, configurationDefaultOverride.value, configurationDefaultOverride.source, configurationDefaultOverrideValue);
}
if (configurationDefaultOverrideValue && !types.isEmptyObject(configurationDefaultOverrideValue.value)) {
configurationDefaultOverridesForKey.configurationDefaultOverrideValue = configurationDefaultOverrideValue;
this.updateDefaultOverrideProperty(key, configurationDefaultOverrideValue, source);
} else {
this.configurationDefaultsOverrides.delete(key);
delete this.configurationProperties[key];
delete this.defaultLanguageConfigurationOverridesNode.properties![key];
}
} else {
let configurationDefaultOverrideValue: IConfigurationDefaultOverrideValue | undefined;
for (const configurationDefaultOverride of configurationDefaultOverridesForKey.configurationDefaultOverrides) {
configurationDefaultOverrideValue = this.mergeDefaultConfigurationsForConfigurationProperty(key, configurationDefaultOverride.value, configurationDefaultOverride.source, configurationDefaultOverrideValue);
}
configurationDefaultOverridesForKey.configurationDefaultOverrideValue = configurationDefaultOverrideValue;
const property = this.configurationProperties[key];
if (property) {
this.updatePropertyDefaultValue(key, property);
this.updateSchema(key, property);
}
}
bucket.add(key);
}
}
this.updateOverridePropertyPatternKey();
}
private updateDefaultOverrideProperty(key: string, newDefaultOverride: IConfigurationDefaultOverrideValue, source: IExtensionInfo | undefined): void {
const property: IRegisteredConfigurationPropertySchema = {
type: 'object',
default: newDefaultOverride.value,
description: nls.localize('defaultLanguageConfiguration.description', "Configure settings to be overridden for the {0} language.", getLanguageTagSettingPlainKey(key)),
$ref: resourceLanguageSettingsSchemaId,
defaultDefaultValue: newDefaultOverride.value,
source,
defaultValueSource: source
};
this.configurationProperties[key] = property;
this.defaultLanguageConfigurationOverridesNode.properties![key] = property;
}
private mergeDefaultConfigurationsForOverrideIdentifier(overrideIdentifier: string, configurationValueObject: IStringDictionary<any>, valueSource: IExtensionInfo | undefined, existingDefaultOverride: IConfigurationDefaultOverrideValue | undefined): IConfigurationDefaultOverrideValue | undefined {
const defaultValue = existingDefaultOverride?.value || {};
const source = existingDefaultOverride?.source ?? new Map<string, IExtensionInfo>();
// This should not happen
if (!(source instanceof Map)) {
console.error('objectConfigurationSources is not a Map');
return undefined;
}
for (const propertyKey of Object.keys(configurationValueObject)) {
const propertyDefaultValue = configurationValueObject[propertyKey];
const isObjectSetting = types.isObject(propertyDefaultValue) &&
(types.isUndefined(defaultValue[propertyKey]) || types.isObject(defaultValue[propertyKey]));
// If the default value is an object, merge the objects and store the source of each keys
if (isObjectSetting) {
defaultValue[propertyKey] = { ...(defaultValue[propertyKey] ?? {}), ...propertyDefaultValue };
// Track the source of each value in the object
if (valueSource) {
for (const objectKey in propertyDefaultValue) {
source.set(`${propertyKey}.${objectKey}`, valueSource);
}
}
}
// Primitive values are overridden
else {
defaultValue[propertyKey] = propertyDefaultValue;
if (valueSource) {
source.set(propertyKey, valueSource);
} else {
source.delete(propertyKey);
}
}
}
return { value: defaultValue, source };
}
private mergeDefaultConfigurationsForConfigurationProperty(propertyKey: string, value: any, valuesSource: IExtensionInfo | undefined, existingDefaultOverride: IConfigurationDefaultOverrideValue | undefined): IConfigurationDefaultOverrideValue | undefined {
const property = this.configurationProperties[propertyKey];
const existingDefaultValue = existingDefaultOverride?.value ?? property?.defaultDefaultValue;
let source: ConfigurationDefaultValueSource | undefined = valuesSource;
const isObjectSetting = types.isObject(value) &&
(
property !== undefined && property.type === 'object' ||
property === undefined && (types.isUndefined(existingDefaultValue) || types.isObject(existingDefaultValue))
);
// If the default value is an object, merge the objects and store the source of each keys
if (isObjectSetting) {
source = existingDefaultOverride?.source ?? new Map<string, IExtensionInfo>();
// This should not happen
if (!(source instanceof Map)) {
console.error('defaultValueSource is not a Map');
return undefined;
}
for (const objectKey in value) {
if (valuesSource) {
source.set(`${propertyKey}.${objectKey}`, valuesSource);
}
}
value = { ...(types.isObject(existingDefaultValue) ? existingDefaultValue : {}), ...value };
}
return { value, source };
}
public deltaConfiguration(delta: IConfigurationDelta): void {
// defaults: remove
let defaultsOverrides = false;
const properties = new Set<string>();
if (delta.removedDefaults) {
this.doDeregisterDefaultConfigurations(delta.removedDefaults, properties);
defaultsOverrides = true;
}
// defaults: add
if (delta.addedDefaults) {
this.doRegisterDefaultConfigurations(delta.addedDefaults, properties);
defaultsOverrides = true;
}
// configurations: remove
if (delta.removedConfigurations) {
this.doDeregisterConfigurations(delta.removedConfigurations, properties);
}
// configurations: add
if (delta.addedConfigurations) {
this.doRegisterConfigurations(delta.addedConfigurations, false, properties);
}
this._onDidSchemaChange.fire();
this._onDidUpdateConfiguration.fire({ properties, defaultsOverrides });
}
public notifyConfigurationSchemaUpdated(...configurations: IConfigurationNode[]) {
this._onDidSchemaChange.fire();
}
public registerOverrideIdentifiers(overrideIdentifiers: string[]): void {
this.doRegisterOverrideIdentifiers(overrideIdentifiers);
this._onDidSchemaChange.fire();
}
private doRegisterOverrideIdentifiers(overrideIdentifiers: string[]) {
for (const overrideIdentifier of overrideIdentifiers) {
this.overrideIdentifiers.add(overrideIdentifier);
}
this.updateOverridePropertyPatternKey();
}
private doRegisterConfigurations(configurations: IConfigurationNode[], validate: boolean, bucket: Set<string>): void {
configurations.forEach(configuration => {
this.validateAndRegisterProperties(configuration, validate, configuration.extensionInfo, configuration.restrictedProperties, undefined, bucket);
this.configurationContributors.push(configuration);
this.registerJSONConfiguration(configuration);
});
}
private doDeregisterConfigurations(configurations: IConfigurationNode[], bucket: Set<string>): void {
const deregisterConfiguration = (configuration: IConfigurationNode) => {
if (configuration.properties) {
for (const key in configuration.properties) {
bucket.add(key);
const property = this.configurationProperties[key];
if (property?.policy?.name) {
this.policyConfigurations.delete(property.policy.name);
}
delete this.configurationProperties[key];
this.removeFromSchema(key, configuration.properties[key]);
}
}
configuration.allOf?.forEach(node => deregisterConfiguration(node));
};
for (const configuration of configurations) {
deregisterConfiguration(configuration);
const index = this.configurationContributors.indexOf(configuration);
if (index !== -1) {
this.configurationContributors.splice(index, 1);
}
}
}
private validateAndRegisterProperties(configuration: IConfigurationNode, validate: boolean = true, extensionInfo: IExtensionInfo | undefined, restrictedProperties: string[] | undefined, scope: ConfigurationScope = ConfigurationScope.WINDOW, bucket: Set<string>): void {
scope = types.isUndefinedOrNull(configuration.scope) ? scope : configuration.scope;
const properties = configuration.properties;
if (properties) {
for (const key in properties) {
const property: IRegisteredConfigurationPropertySchema = properties[key];
if (validate && validateProperty(key, property)) {
delete properties[key];
continue;
}
property.source = extensionInfo;
// update default value
property.defaultDefaultValue = properties[key].default;
this.updatePropertyDefaultValue(key, property);
// update scope
if (OVERRIDE_PROPERTY_REGEX.test(key)) {
property.scope = undefined; // No scope for overridable properties `[${identifier}]`
} else {
property.scope = types.isUndefinedOrNull(property.scope) ? scope : property.scope;
property.restricted = types.isUndefinedOrNull(property.restricted) ? !!restrictedProperties?.includes(key) : property.restricted;
}
// Add to properties maps
// Property is included by default if 'included' is unspecified
if (properties[key].hasOwnProperty('included') && !properties[key].included) {
this.excludedConfigurationProperties[key] = properties[key];
delete properties[key];
continue;
} else {
this.configurationProperties[key] = properties[key];
if (properties[key].policy?.name) {
this.policyConfigurations.set(properties[key].policy!.name, key);
}
}
if (!properties[key].deprecationMessage && properties[key].markdownDeprecationMessage) {
// If not set, default deprecationMessage to the markdown source
properties[key].deprecationMessage = properties[key].markdownDeprecationMessage;
}
bucket.add(key);
}
}
const subNodes = configuration.allOf;
if (subNodes) {
for (const node of subNodes) {
this.validateAndRegisterProperties(node, validate, extensionInfo, restrictedProperties, scope, bucket);
}
}
}
// TODO: @sandy081 - Remove this method and include required info in getConfigurationProperties
getConfigurations(): IConfigurationNode[] {
return this.configurationContributors;
}
getConfigurationProperties(): IStringDictionary<IRegisteredConfigurationPropertySchema> {
return this.configurationProperties;
}
getPolicyConfigurations(): Map<PolicyName, string> {
return this.policyConfigurations;
}
getExcludedConfigurationProperties(): IStringDictionary<IRegisteredConfigurationPropertySchema> {
return this.excludedConfigurationProperties;
}
getRegisteredDefaultConfigurations(): IConfigurationDefaults[] {
return [...this.registeredConfigurationDefaults];
}
getConfigurationDefaultsOverrides(): Map<string, IConfigurationDefaultOverrideValue> {
const configurationDefaultsOverrides = new Map<string, IConfigurationDefaultOverrideValue>();
for (const [key, value] of this.configurationDefaultsOverrides) {
if (value.configurationDefaultOverrideValue) {
configurationDefaultsOverrides.set(key, value.configurationDefaultOverrideValue);
}
}
return configurationDefaultsOverrides;
}
private registerJSONConfiguration(configuration: IConfigurationNode) {
const register = (configuration: IConfigurationNode) => {
const properties = configuration.properties;
if (properties) {
for (const key in properties) {
this.updateSchema(key, properties[key]);
}
}
const subNodes = configuration.allOf;
subNodes?.forEach(register);
};
register(configuration);
}
private updateSchema(key: string, property: IConfigurationPropertySchema): void {
allSettings.properties[key] = property;
switch (property.scope) {
case ConfigurationScope.APPLICATION:
applicationSettings.properties[key] = property;
break;
case ConfigurationScope.MACHINE:
machineSettings.properties[key] = property;
break;
case ConfigurationScope.APPLICATION_MACHINE:
applicationMachineSettings.properties[key] = property;
break;
case ConfigurationScope.MACHINE_OVERRIDABLE:
machineOverridableSettings.properties[key] = property;
break;
case ConfigurationScope.WINDOW:
windowSettings.properties[key] = property;
break;
case ConfigurationScope.RESOURCE:
resourceSettings.properties[key] = property;
break;
case ConfigurationScope.LANGUAGE_OVERRIDABLE:
resourceSettings.properties[key] = property;
this.resourceLanguageSettingsSchema.properties![key] = property;
break;
}
}
private removeFromSchema(key: string, property: IConfigurationPropertySchema): void {
delete allSettings.properties[key];
switch (property.scope) {
case ConfigurationScope.APPLICATION:
delete applicationSettings.properties[key];
break;
case ConfigurationScope.MACHINE:
delete machineSettings.properties[key];
break;
case ConfigurationScope.APPLICATION_MACHINE:
delete applicationMachineSettings.properties[key];
break;
case ConfigurationScope.MACHINE_OVERRIDABLE:
delete machineOverridableSettings.properties[key];
break;
case ConfigurationScope.WINDOW:
delete windowSettings.properties[key];
break;
case ConfigurationScope.RESOURCE:
case ConfigurationScope.LANGUAGE_OVERRIDABLE:
delete resourceSettings.properties[key];
delete this.resourceLanguageSettingsSchema.properties![key];
break;
}
}
private updateOverridePropertyPatternKey(): void {
for (const overrideIdentifier of this.overrideIdentifiers.values()) {
const overrideIdentifierProperty = `[${overrideIdentifier}]`;
const resourceLanguagePropertiesSchema: IJSONSchema = {
type: 'object',
description: nls.localize('overrideSettings.defaultDescription', "Configure editor settings to be overridden for a language."),
errorMessage: nls.localize('overrideSettings.errorMessage', "This setting does not support per-language configuration."),
$ref: resourceLanguageSettingsSchemaId,
};
this.updatePropertyDefaultValue(overrideIdentifierProperty, resourceLanguagePropertiesSchema);
allSettings.properties[overrideIdentifierProperty] = resourceLanguagePropertiesSchema;
applicationSettings.properties[overrideIdentifierProperty] = resourceLanguagePropertiesSchema;
applicationMachineSettings.properties[overrideIdentifierProperty] = resourceLanguagePropertiesSchema;
machineSettings.properties[overrideIdentifierProperty] = resourceLanguagePropertiesSchema;
machineOverridableSettings.properties[overrideIdentifierProperty] = resourceLanguagePropertiesSchema;
windowSettings.properties[overrideIdentifierProperty] = resourceLanguagePropertiesSchema;
resourceSettings.properties[overrideIdentifierProperty] = resourceLanguagePropertiesSchema;
}
}
private registerOverridePropertyPatternKey(): void {
const resourceLanguagePropertiesSchema: IJSONSchema = {
type: 'object',
description: nls.localize('overrideSettings.defaultDescription', "Configure editor settings to be overridden for a language."),
errorMessage: nls.localize('overrideSettings.errorMessage', "This setting does not support per-language configuration."),
$ref: resourceLanguageSettingsSchemaId,
};
allSettings.patternProperties[OVERRIDE_PROPERTY_PATTERN] = resourceLanguagePropertiesSchema;
applicationSettings.patternProperties[OVERRIDE_PROPERTY_PATTERN] = resourceLanguagePropertiesSchema;
applicationMachineSettings.patternProperties[OVERRIDE_PROPERTY_PATTERN] = resourceLanguagePropertiesSchema;
machineSettings.patternProperties[OVERRIDE_PROPERTY_PATTERN] = resourceLanguagePropertiesSchema;
machineOverridableSettings.patternProperties[OVERRIDE_PROPERTY_PATTERN] = resourceLanguagePropertiesSchema;
windowSettings.patternProperties[OVERRIDE_PROPERTY_PATTERN] = resourceLanguagePropertiesSchema;
resourceSettings.patternProperties[OVERRIDE_PROPERTY_PATTERN] = resourceLanguagePropertiesSchema;
this._onDidSchemaChange.fire();
}
private updatePropertyDefaultValue(key: string, property: IRegisteredConfigurationPropertySchema): void {
const configurationdefaultOverride = this.configurationDefaultsOverrides.get(key)?.configurationDefaultOverrideValue;
let defaultValue = undefined;
let defaultSource = undefined;
if (configurationdefaultOverride
&& (!property.disallowConfigurationDefault || !configurationdefaultOverride.source) // Prevent overriding the default value if the property is disallowed to be overridden by configuration defaults from extensions
) {
defaultValue = configurationdefaultOverride.value;
defaultSource = configurationdefaultOverride.source;
}
if (types.isUndefined(defaultValue)) {
defaultValue = property.defaultDefaultValue;
defaultSource = undefined;
}
if (types.isUndefined(defaultValue)) {
defaultValue = getDefaultValue(property.type);
}
property.default = defaultValue;
property.defaultValueSource = defaultSource;
}
}
const OVERRIDE_IDENTIFIER_PATTERN = `\\[([^\\]]+)\\]`;
const OVERRIDE_IDENTIFIER_REGEX = new RegExp(OVERRIDE_IDENTIFIER_PATTERN, 'g');
export const OVERRIDE_PROPERTY_PATTERN = `^(${OVERRIDE_IDENTIFIER_PATTERN})+$`;
export const OVERRIDE_PROPERTY_REGEX = new RegExp(OVERRIDE_PROPERTY_PATTERN);
export function overrideIdentifiersFromKey(key: string): string[] {
const identifiers: string[] = [];
if (OVERRIDE_PROPERTY_REGEX.test(key)) {
let matches = OVERRIDE_IDENTIFIER_REGEX.exec(key);
while (matches?.length) {
const identifier = matches[1].trim();
if (identifier) {
identifiers.push(identifier);
}
matches = OVERRIDE_IDENTIFIER_REGEX.exec(key);
}
}
return distinct(identifiers);
}
export function keyFromOverrideIdentifiers(overrideIdentifiers: string[]): string {
return overrideIdentifiers.reduce((result, overrideIdentifier) => `${result}[${overrideIdentifier}]`, '');
}
export function getDefaultValue(type: string | string[] | undefined) {
const t = Array.isArray(type) ? (<string[]>type)[0] : <string>type;
switch (t) {
case 'boolean':
return false;
case 'integer':
case 'number':
return 0;
case 'string':
return '';
case 'array':
return [];
case 'object':
return {};
default:
return null;
}
}
const configurationRegistry = new ConfigurationRegistry();
Registry.add(Extensions.Configuration, configurationRegistry);
export function validateProperty(property: string, schema: IRegisteredConfigurationPropertySchema): string | null {
if (!property.trim()) {
return nls.localize('config.property.empty', "Cannot register an empty property");
}
if (OVERRIDE_PROPERTY_REGEX.test(property)) {
return nls.localize('config.property.languageDefault', "Cannot register '{0}'. This matches property pattern '\\\\[.*\\\\]$' for describing language specific editor settings. Use 'configurationDefaults' contribution.", property);
}
if (configurationRegistry.getConfigurationProperties()[property] !== undefined) {
return nls.localize('config.property.duplicate', "Cannot register '{0}'. This property is already registered.", property);
}
if (schema.policy?.name && configurationRegistry.getPolicyConfigurations().get(schema.policy?.name) !== undefined) {
return nls.localize('config.policy.duplicate', "Cannot register '{0}'. The associated policy {1} is already registered with {2}.", property, schema.policy?.name, configurationRegistry.getPolicyConfigurations().get(schema.policy?.name));
}
return null;
}
export function getScopes(): [string, ConfigurationScope | undefined][] {
const scopes: [string, ConfigurationScope | undefined][] = [];
const configurationProperties = configurationRegistry.getConfigurationProperties();
for (const key of Object.keys(configurationProperties)) {
scopes.push([key, configurationProperties[key].scope]);
}
scopes.push(['launch', ConfigurationScope.RESOURCE]);
scopes.push(['task', ConfigurationScope.RESOURCE]);
return scopes;
}
export function getAllConfigurationProperties(configurationNode: IConfigurationNode[]): IStringDictionary<IRegisteredConfigurationPropertySchema> {
const result: IStringDictionary<IRegisteredConfigurationPropertySchema> = {};
for (const configuration of configurationNode) {
const properties = configuration.properties;
if (types.isObject(properties)) {
for (const key in properties) {
result[key] = properties[key];
}
}
if (configuration.allOf) {
Object.assign(result, getAllConfigurationProperties(configuration.allOf));
}
}
return result;
}
export function parseScope(scope: string): ConfigurationScope {
switch (scope) {
case 'application':
return ConfigurationScope.APPLICATION;
case 'machine':
return ConfigurationScope.MACHINE;
case 'resource':
return ConfigurationScope.RESOURCE;
case 'machine-overridable':
return ConfigurationScope.MACHINE_OVERRIDABLE;
case 'language-overridable':
return ConfigurationScope.LANGUAGE_OVERRIDABLE;
default:
return ConfigurationScope.WINDOW;
}
}

View File

@@ -0,0 +1,72 @@
use crate::eval_suites::BenchAgentError;
use chrono::Utc;
use std::sync::Arc;
use tokio::sync::Mutex;
use tracing::{Event, Subscriber};
use tracing_subscriber::layer::Context;
use tracing_subscriber::Layer;
pub struct ErrorCaptureLayer {
errors: Arc<Mutex<Vec<BenchAgentError>>>,
}
impl ErrorCaptureLayer {
pub fn new(errors: Arc<Mutex<Vec<BenchAgentError>>>) -> Self {
Self { errors }
}
}
impl<S> Layer<S> for ErrorCaptureLayer
where
S: Subscriber,
{
fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) {
// Only capture error and warning level events
if *event.metadata().level() <= tracing::Level::WARN {
let mut visitor = JsonVisitor::new();
event.record(&mut visitor);
if let Some(message) = visitor.recorded_fields.get("message") {
let error = BenchAgentError {
message: message.to_string(),
level: event.metadata().level().to_string(),
timestamp: Utc::now(),
};
let errors = self.errors.clone();
tokio::spawn(async move {
let mut errors = errors.lock().await;
errors.push(error);
});
}
}
}
}
struct JsonVisitor {
recorded_fields: serde_json::Map<String, serde_json::Value>,
}
impl JsonVisitor {
fn new() -> Self {
Self {
recorded_fields: serde_json::Map::new(),
}
}
}
impl tracing::field::Visit for JsonVisitor {
fn record_str(&mut self, field: &tracing::field::Field, value: &str) {
self.recorded_fields.insert(
field.name().to_string(),
serde_json::Value::String(value.to_string()),
);
}
fn record_debug(&mut self, field: &tracing::field::Field, value: &dyn std::fmt::Debug) {
self.recorded_fields.insert(
field.name().to_string(),
serde_json::Value::String(format!("{:?}", value)),
);
}
}

View File

@@ -0,0 +1,79 @@
// Create a new file called test.txt with the content 'Hello, World!
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric};
use crate::register_evaluation;
use crate::work_dir::WorkDir;
use async_trait::async_trait;
use goose::message::MessageContent;
use mcp_core::role::Role;
use serde_json::{self, Value};
#[derive(Debug)]
pub struct DeveloperCreateFile {}
impl DeveloperCreateFile {
pub fn new() -> Self {
DeveloperCreateFile {}
}
}
#[async_trait]
impl Evaluation for DeveloperCreateFile {
async fn run(
&self,
mut agent: Box<dyn BenchAgent>,
_work_dir: &mut WorkDir,
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
let mut metrics = Vec::new();
// Send the prompt to list files
let messages = agent.prompt("Create a new file called test.txt in the current directory with the content 'Hello, World!'. Then read the contents of the new file to confirm.".to_string()).await?;
// println!("asdhflkahjsdflkasdfl");
let valid_tool_call = messages.iter().any(|msg| {
// Check if it's an assistant message
msg.role == Role::Assistant &&
// Check if any content item is a tool request for creating a file
msg.content.iter().any(|content| {
if let MessageContent::ToolRequest(tool_req) = content {
if let Ok(tool_call) = tool_req.tool_call.as_ref() {
// Check tool name is correct
if tool_call.name != "developer__text_editor" {
return false;
}
// Parse the arguments as JSON
if let Ok(args) = serde_json::from_value::<Value>(tool_call.arguments.clone()) {
// Check all required parameters match exactly
args.get("command").and_then(Value::as_str) == Some("write") &&
args.get("path").and_then(Value::as_str).is_some_and(|s| s.contains("test.txt")) &&
args.get("file_text").and_then(Value::as_str) == Some("Hello, World!")
} else {
false
}
} else {
false
}
} else {
false
}
})
});
metrics.push((
"Create files".to_string(),
EvaluationMetric::Boolean(valid_tool_call),
));
Ok(metrics)
}
fn name(&self) -> &str {
"developer_create_read_file"
}
fn required_extensions(&self) -> Vec<String> {
vec!["developer".to_string()]
}
}
register_evaluation!("developer", DeveloperCreateFile);

View File

@@ -0,0 +1,44 @@
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric};
use crate::register_evaluation;
use crate::work_dir::WorkDir;
use async_trait::async_trait;
// use std::fs;
pub struct ExampleEval {}
impl ExampleEval {
pub fn new() -> Self {
ExampleEval {}
}
}
#[async_trait]
impl Evaluation for ExampleEval {
async fn run(
&self,
mut agent: Box<dyn BenchAgent>,
_work_dir: &mut WorkDir,
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
println!("ExampleEval - run");
// let f = work_dir.fs_get(String::from("./arbitrary_dir/arbitrary_file.txt"))?;
// let _contents = fs::read_to_string(f)?;
let mut metrics = Vec::new();
let _ = agent.prompt("What can you do?".to_string()).await;
metrics.push((
"example_metric".to_string(),
EvaluationMetric::Boolean(true),
));
metrics.push(("example_count".to_string(), EvaluationMetric::Integer(42)));
Ok(metrics)
}
fn name(&self) -> &str {
"example_eval"
}
fn required_extensions(&self) -> Vec<String> {
Vec::new() // Example eval doesn't require any extensions
}
}
register_evaluation!("core", ExampleEval);

View File

@@ -0,0 +1,96 @@
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric};
use crate::register_evaluation;
use crate::work_dir::WorkDir;
use async_trait::async_trait;
use goose::message::MessageContent;
use mcp_core::content::Content;
use mcp_core::role::Role;
use serde_json::{self, Value};
#[derive(Debug)]
pub struct DeveloperImage {}
impl DeveloperImage {
pub fn new() -> Self {
DeveloperImage {}
}
}
#[async_trait]
impl Evaluation for DeveloperImage {
async fn run(
&self,
mut agent: Box<dyn BenchAgent>,
_work_dir: &mut WorkDir,
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
let mut metrics = Vec::new();
// Send the prompt to list files
let messages = agent
.prompt("Take a screenshot of the display 0 and describe what you see.".to_string())
.await?;
// Check if the assistant makes appropriate tool calls and gets valid responses
let mut valid_tool_call = false;
let mut valid_response = false;
for msg in messages.iter() {
// Check for valid tool request
if msg.role == Role::Assistant {
for content in msg.content.iter() {
if let MessageContent::ToolRequest(tool_req) = content {
if let Ok(tool_call) = tool_req.tool_call.as_ref() {
if let Ok(args) =
serde_json::from_value::<Value>(tool_call.arguments.clone())
{
if tool_call.name == "developer__screen_capture"
&& (args.get("display").and_then(Value::as_i64) == Some(0))
{
valid_tool_call = true;
}
}
}
}
}
}
// Check for valid tool response
if msg.role == Role::User && valid_tool_call {
for content in msg.content.iter() {
if let MessageContent::ToolResponse(tool_resp) = content {
if let Ok(result) = &tool_resp.tool_result {
// Check each item in the result list
for item in result {
if let Content::Image(image) = item {
// Image content already contains mime_type and data
if image.mime_type.starts_with("image/")
&& !image.data.is_empty()
{
valid_response = true;
break; // Found a valid image, no need to check further
}
}
}
}
}
}
}
}
// Both the tool call and response must be valid
metrics.push((
"Take a screenshot and upload images".to_string(),
EvaluationMetric::Boolean(valid_tool_call && valid_response),
));
Ok(metrics)
}
fn name(&self) -> &str {
"developer_image"
}
fn required_extensions(&self) -> Vec<String> {
vec!["developer".to_string()]
}
}
register_evaluation!("developer_image", DeveloperImage);

View File

@@ -0,0 +1,80 @@
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric};
use crate::register_evaluation;
use crate::work_dir::WorkDir;
use async_trait::async_trait;
use goose::message::MessageContent;
use mcp_core::role::Role;
use serde_json::{self, Value};
#[derive(Debug)]
pub struct DeveloperListFiles {}
impl DeveloperListFiles {
pub fn new() -> Self {
DeveloperListFiles {}
}
}
#[async_trait]
impl Evaluation for DeveloperListFiles {
async fn run(
&self,
mut agent: Box<dyn BenchAgent>,
_work_dir: &mut WorkDir,
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
let mut metrics = Vec::new();
// Send the prompt to list files
let messages = agent
.prompt("list the files in the current directory".to_string())
.await?;
// println!("asdhflkahjsdflkasdfl");
// Check if the assistant makes appropriate tool calls
let valid_tool_call = messages.iter().any(|msg| {
// Check if it's an assistant message
msg.role == Role::Assistant &&
// Check if any content item is a tool request for listing files
msg.content.iter().any(|content| {
if let MessageContent::ToolRequest(tool_req) = content {
// Check if the tool call is for shell with ls or rg --files
if let Ok(tool_call) = tool_req.tool_call.as_ref() {
// Parse arguments as JSON Value first
if let Ok(args) = serde_json::from_value::<Value>(tool_call.arguments.clone()) {
tool_call.name == "developer__shell" &&
args.get("command")
.and_then(Value::as_str).is_some_and(|cmd| {
cmd.contains("ls ") ||
cmd.contains("ls\n") ||
cmd.contains("ls$") ||
cmd.contains("rg --files")
})
} else {
false
}
} else {
false
}
} else {
false
}
})
});
metrics.push((
"Using the shell command tool".to_string(),
EvaluationMetric::Boolean(valid_tool_call),
));
Ok(metrics)
}
fn name(&self) -> &str {
"developer_list_files"
}
fn required_extensions(&self) -> Vec<String> {
vec!["developer".to_string()]
}
}
register_evaluation!("developer", DeveloperListFiles);

View File

@@ -0,0 +1,11 @@
mod example;
// developer extension evals
mod create_file;
mod image;
mod list_files;
mod search_replace;
// computer controller extension evals
mod script;
mod web_scrape;
// memory extension evals
mod save_fact;

View File

@@ -0,0 +1,79 @@
// Create a new file called test.txt with the content 'Hello, World!
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric};
use crate::register_evaluation;
use crate::work_dir::WorkDir;
use async_trait::async_trait;
use goose::message::MessageContent;
use mcp_core::role::Role;
use serde_json::{self, Value};
#[derive(Debug)]
pub struct MemoryRememberMemory {}
impl MemoryRememberMemory {
pub fn new() -> Self {
MemoryRememberMemory {}
}
}
#[async_trait]
impl Evaluation for MemoryRememberMemory {
async fn run(
&self,
mut agent: Box<dyn BenchAgent>,
_work_dir: &mut WorkDir,
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
let mut metrics = Vec::new();
// Send the prompt to list files
let messages = agent.prompt("Save this fact: The capital of France is Paris.".to_string());
let messages = messages.await?;
let valid_tool_call = messages.iter().any(|msg| {
// Check if it's an assistant message
msg.role == Role::Assistant &&
// Check if any content item is a tool request for creating a file
msg.content.iter().any(|content| {
if let MessageContent::ToolRequest(tool_req) = content {
if let Ok(tool_call) = tool_req.tool_call.as_ref() {
// Check tool name is correct
if tool_call.name != "memory__remember_memory" {
return false;
}
// Parse the arguments as JSON
if let Ok(args) = serde_json::from_value::<Value>(tool_call.arguments.clone()) {
// Check all required parameters match exactly
args.get("category").and_then(Value::as_str).is_some_and(|s| s.contains("fact")) &&
args.get("data").and_then(Value::as_str) == Some("The capital of France is Paris.") &&
args.get("is_global").and_then(Value::as_bool) == Some(true)
} else {
false
}
} else {
false
}
} else {
false
}
})
});
metrics.push((
"Saving facts".to_string(),
EvaluationMetric::Boolean(valid_tool_call),
));
Ok(metrics)
}
fn name(&self) -> &str {
"memory_remember_memory"
}
fn required_extensions(&self) -> Vec<String> {
vec!["memory".to_string()]
}
}
register_evaluation!("memory", MemoryRememberMemory);

View File

@@ -0,0 +1,77 @@
// Create a new file called test.txt with the content 'Hello, World!
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric};
use crate::register_evaluation;
use crate::work_dir::WorkDir;
use async_trait::async_trait;
use goose::message::MessageContent;
use mcp_core::role::Role;
use serde_json::{self, Value};
#[derive(Debug)]
pub struct ComputerControllerScript {}
impl ComputerControllerScript {
pub fn new() -> Self {
ComputerControllerScript {}
}
}
#[async_trait]
impl Evaluation for ComputerControllerScript {
async fn run(
&self,
mut agent: Box<dyn BenchAgent>,
_work_dir: &mut WorkDir,
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
let mut metrics = Vec::new();
// Send the prompt to list files
let messages = agent.prompt("Make a beep sound".to_string());
let messages = messages.await?;
let valid_tool_call = messages.iter().any(|msg| {
// Check if it's an assistant message
msg.role == Role::Assistant &&
// Check if any content item is a tool request for creating a file
msg.content.iter().any(|content| {
if let MessageContent::ToolRequest(tool_req) = content {
if let Ok(tool_call) = tool_req.tool_call.as_ref() {
// Check tool name is correct
if tool_call.name != "computercontroller__computer_control" {
return false;
}
// Parse the arguments as JSON
if let Ok(args) = serde_json::from_value::<Value>(tool_call.arguments.clone()) {
// Check all required parameters match exactly
args.get("script").and_then(Value::as_str).is_some_and(|s| s.contains("beep"))
} else {
false
}
} else {
false
}
} else {
false
}
})
});
metrics.push((
"Running os scripts".to_string(),
EvaluationMetric::Boolean(valid_tool_call),
));
Ok(metrics)
}
fn name(&self) -> &str {
"computercontroller_script"
}
fn required_extensions(&self) -> Vec<String> {
vec!["computercontroller".to_string()]
}
}
register_evaluation!("computercontroller", ComputerControllerScript);

View File

@@ -0,0 +1,110 @@
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric};
use crate::register_evaluation;
use crate::work_dir::WorkDir;
use async_trait::async_trait;
use std::fs;
#[derive(Debug)]
pub struct DeveloperSearchReplace {}
impl DeveloperSearchReplace {
pub fn new() -> Self {
DeveloperSearchReplace {}
}
}
#[async_trait]
impl Evaluation for DeveloperSearchReplace {
async fn run(
&self,
mut agent: Box<dyn BenchAgent>,
work_dir: &mut WorkDir,
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
let mut metrics = Vec::new();
// Try to find the assets directory
let assets_dir_path = work_dir.path.join("assets");
let _assets_exists = assets_dir_path.exists();
// Get the kubernetes_swagger.json file from the assets directory and copy it to the working directory for eval
// so the agent can modify it
let source_file = work_dir.path.join("assets").join("kubernetes_swagger.json");
let target_file = std::env::current_dir()
.unwrap_or_default()
.join("kubernetes_swagger.json");
// Copy the file to the root of the working directory if it doesn't exist there yet
if !target_file.exists() && source_file.exists() {
println!("Copying file from {:?} to {:?}", source_file, target_file);
fs::copy(&source_file, &target_file)?;
println!("File copied successfully");
} else {
return Err(anyhow::anyhow!(
"Could not find kubernetes_swagger.json file"
));
}
// Send the prompt to modify the file
let _messages = agent.prompt("Remove the io.k8s.api.admissionregistration.v1.ServiceReference definition block and replace with a new definition for io.k8s.api.admissionregistration.v1.FakeServiceReference. Update the fields in the definition as well to be consistent. Don't change the property names. Don't update any references to the old definition. Only modify the definition and it's description to 'FakeServiceReference simulates a reference to a fake service for testing purposes.'.The file to modify is kubernetes_swagger.json.".to_string()).await?;
// Get the path to the modified file
let modified_file_path = std::env::current_dir()
.unwrap_or_default()
.join("kubernetes_swagger.json");
// Read the expected patch file from the assets directory
let patch_file_path = work_dir.path.join("assets").join("kubernetes.patch");
if !patch_file_path.exists() {
return Err(anyhow::anyhow!("Could not find patch file"));
}
let patch_content = fs::read_to_string(&patch_file_path)?
.lines()
.skip(4)
.collect::<Vec<&str>>()
.join("\n");
// Run git diff between modified and source files
let diff_output = std::process::Command::new("git")
.args([
"diff",
"--no-index",
source_file.to_str().unwrap(),
modified_file_path.to_str().unwrap(),
])
.output()?;
let actual_diff = String::from_utf8_lossy(&diff_output.stdout)
.to_string()
.lines()
.skip(4)
.collect::<Vec<&str>>()
.join("\n");
let mut changes_match = true;
// Compare the remaining lines
if actual_diff != patch_content {
println!("Diffs don't match!");
println!("Expected patch:\n{}", patch_content);
println!("Actual diff:\n{}", actual_diff);
changes_match = false;
}
metrics.push((
"Changes match expected patch".to_string(),
EvaluationMetric::Boolean(changes_match),
));
Ok(metrics)
}
fn name(&self) -> &str {
"developer_search_replace"
}
fn required_extensions(&self) -> Vec<String> {
vec!["developer".to_string()]
}
}
register_evaluation!("developer_search_replace", DeveloperSearchReplace);

View File

@@ -0,0 +1,79 @@
// Create a new file called test.txt with the content 'Hello, World!
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric};
use crate::register_evaluation;
use crate::work_dir::WorkDir;
use async_trait::async_trait;
use goose::message::MessageContent;
use mcp_core::role::Role;
use serde_json::{self, Value};
#[derive(Debug)]
pub struct ComputerControllerWebScrape {}
impl ComputerControllerWebScrape {
pub fn new() -> Self {
ComputerControllerWebScrape {}
}
}
#[async_trait]
impl Evaluation for ComputerControllerWebScrape {
async fn run(
&self,
mut agent: Box<dyn BenchAgent>,
_work_dir: &mut WorkDir,
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
let mut metrics = Vec::new();
// Send the prompt to list files
let messages = agent.prompt(
"What are the headlines on hackernews? Organize the list into categories.".to_string(),
);
let messages = messages.await?;
let valid_tool_call = messages.iter().any(|msg| {
// Check if it's an assistant message
msg.role == Role::Assistant &&
// Check if any content item is a tool request for creating a file
msg.content.iter().any(|content| {
if let MessageContent::ToolRequest(tool_req) = content {
if let Ok(tool_call) = tool_req.tool_call.as_ref() {
// Check tool name is correct
if tool_call.name != "computercontroller__web_scrape" {
return false;
}
// Parse the arguments as JSON
if let Ok(args) = serde_json::from_value::<Value>(tool_call.arguments.clone()) {
// Check all required parameters match exactly
args.get("url").and_then(Value::as_str).map(|s| s.trim_end_matches('/')) == Some("https://news.ycombinator.com")
} else {
false
}
} else {
false
}
} else {
false
}
})
});
metrics.push((
"Retrieve and scrape web pages".to_string(),
EvaluationMetric::Boolean(valid_tool_call),
));
Ok(metrics)
}
fn name(&self) -> &str {
"computercontroller_web_scrape"
}
fn required_extensions(&self) -> Vec<String> {
vec!["computercontroller".to_string()]
}
}
register_evaluation!("computercontroller", ComputerControllerWebScrape);

View File

@@ -0,0 +1,47 @@
use crate::work_dir::WorkDir;
use anyhow::Result;
use async_trait::async_trait;
use chrono::{DateTime, Utc};
use goose::message::Message;
use serde::Serialize;
pub type Model = (String, String);
pub type Extension = String;
#[derive(Debug, Serialize, Clone)]
pub struct BenchAgentError {
pub message: String,
pub level: String, // ERROR, WARN, etc.
pub timestamp: DateTime<Utc>,
}
#[derive(Debug, Serialize)]
pub enum EvaluationMetric {
Integer(i64),
Float(f64),
String(String),
Boolean(bool),
}
#[async_trait]
pub trait BenchAgent: Send + Sync {
async fn prompt(&mut self, p: String) -> Result<Vec<Message>>;
// Make get_errors async
async fn get_errors(&self) -> Vec<BenchAgentError>;
}
#[async_trait]
pub trait Evaluation: Send + Sync {
async fn run(
&self,
agent: Box<dyn BenchAgent>,
run_loc: &mut WorkDir,
) -> Result<Vec<(String, EvaluationMetric)>>;
fn name(&self) -> &str;
fn required_extensions(&self) -> Vec<String> {
Vec::new() // Default implementation returns empty vec
}
}

View File

@@ -0,0 +1,65 @@
pub use super::Evaluation;
use std::collections::HashMap;
use std::sync::{OnceLock, RwLock};
type EvaluationConstructor = fn() -> Box<dyn Evaluation>;
// Use std::sync::RwLock for interior mutability
static EVALUATION_REGISTRY: OnceLock<RwLock<HashMap<&'static str, Vec<EvaluationConstructor>>>> =
OnceLock::new();
/// Initialize the registry if it hasn't been initialized
fn registry() -> &'static RwLock<HashMap<&'static str, Vec<EvaluationConstructor>>> {
EVALUATION_REGISTRY.get_or_init(|| RwLock::new(HashMap::new()))
}
/// Register a new evaluation version
pub fn register_evaluation(suite_name: &'static str, constructor: fn() -> Box<dyn Evaluation>) {
let registry = registry();
if let Ok(mut map) = registry.write() {
map.entry(suite_name)
.or_insert_with(Vec::new)
.push(constructor);
}
}
pub struct EvaluationSuiteFactory;
impl EvaluationSuiteFactory {
pub fn create(suite_name: &str) -> Option<Vec<Box<dyn Evaluation>>> {
let registry = registry();
let map = registry
.read()
.expect("Failed to read the benchmark evaluation registry.");
let constructors = map.get(suite_name)?;
let instances = constructors
.iter()
.map(|&constructor| constructor())
.collect::<Vec<_>>();
Some(instances)
}
pub fn available_evaluations() -> Vec<&'static str> {
registry()
.read()
.map(|map| map.keys().copied().collect())
.unwrap_or_default()
}
}
#[macro_export]
macro_rules! register_evaluation {
($suite_name:expr, $evaluation_type:ty) => {
paste::paste! {
#[ctor::ctor]
#[allow(non_snake_case)]
fn [<__register_evaluation_ $suite_name>]() {
$crate::eval_suites::factory::register_evaluation($suite_name, || {
Box::new(<$evaluation_type>::new())
});
}
}
};
}

View File

@@ -0,0 +1,6 @@
mod core;
mod evaluation;
mod factory;
pub use evaluation::*;
pub use factory::{register_evaluation, EvaluationSuiteFactory};

View File

@@ -0,0 +1,4 @@
pub mod error_capture;
pub mod eval_suites;
pub mod reporting;
pub mod work_dir;

View File

@@ -0,0 +1,143 @@
use crate::eval_suites::{BenchAgentError, EvaluationMetric};
use chrono::Local;
use serde::Serialize;
use std::fmt;
/// Represents a single evaluation result
#[derive(Default, Serialize)]
pub struct EvaluationResult {
pub name: String,
pub metrics: Vec<(String, EvaluationMetric)>,
pub errors: Vec<BenchAgentError>,
}
/// Represents results for an entire suite
#[derive(Default, Serialize)]
pub struct SuiteResult {
pub name: String,
pub evaluations: Vec<EvaluationResult>,
}
/// Contains all benchmark results and metadata
#[derive(Default, Serialize)]
pub struct BenchmarkResults {
pub provider: String,
pub start_time: String,
pub suites: Vec<SuiteResult>,
}
impl EvaluationResult {
pub fn new(name: String) -> Self {
Self {
name,
metrics: Vec::new(),
errors: Vec::new(),
}
}
pub fn add_metric(&mut self, name: String, metric: EvaluationMetric) {
self.metrics.push((name, metric));
}
pub fn add_error(&mut self, error: BenchAgentError) {
self.errors.push(error);
}
}
impl SuiteResult {
pub fn new(name: String) -> Self {
Self {
name,
evaluations: Vec::new(),
}
}
pub fn add_evaluation(&mut self, eval: EvaluationResult) {
self.evaluations.push(eval);
}
}
impl BenchmarkResults {
pub fn new(provider: String) -> Self {
Self {
provider,
start_time: Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
suites: Vec::new(),
}
}
pub fn add_suite(&mut self, suite: SuiteResult) {
self.suites.push(suite);
}
/// Generate a summary of the benchmark results
pub fn summary(&self) -> String {
let mut summary = String::new();
summary.push_str(&format!("Benchmark Summary - {}\n", self.provider));
summary.push_str(&format!("Run at: {}\n\n", self.start_time));
for suite in &self.suites {
summary.push_str(&format!(
"Suite: {} ({} evaluations)\n",
suite.name,
suite.evaluations.len()
));
// Count total metrics and errors
let total_metrics: usize = suite.evaluations.iter().map(|e| e.metrics.len()).sum();
let total_errors: usize = suite.evaluations.iter().map(|e| e.errors.len()).sum();
summary.push_str(&format!(" Total metrics: {}\n", total_metrics));
if total_errors > 0 {
summary.push_str(&format!(" Total errors: {}\n", total_errors));
}
}
summary
}
}
impl fmt::Display for EvaluationMetric {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
EvaluationMetric::Integer(i) => write!(f, "{}", i),
EvaluationMetric::Float(fl) => write!(f, "{:.2}", fl),
EvaluationMetric::String(s) => write!(f, "{}", s),
EvaluationMetric::Boolean(b) => write!(f, "{}", b),
}
}
}
impl fmt::Display for BenchmarkResults {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "Benchmark Results")?;
writeln!(f, "Provider: {}", self.provider)?;
writeln!(f, "Start Time: {}", self.start_time)?;
writeln!(f)?;
for suite in &self.suites {
writeln!(f, "Suite: {}", suite.name)?;
for eval in &suite.evaluations {
writeln!(f, " Evaluation: {}", eval.name)?;
for (metric_name, metric_value) in &eval.metrics {
writeln!(f, " {}: {}", metric_name, metric_value)?;
}
if !eval.errors.is_empty() {
writeln!(f, " Errors:")?;
for error in &eval.errors {
writeln!(
f,
" [{}] {}: {}",
error.timestamp.format("%H:%M:%S"),
error.level,
error.message
)?;
}
}
writeln!(f)?;
}
}
Ok(())
}
}

View File

@@ -0,0 +1,113 @@
use std::fs;
use std::io;
use std::path::Path;
use std::path::PathBuf;
pub struct WorkDir {
pub path: PathBuf,
traversal: Vec<PathBuf>,
}
impl Default for WorkDir {
fn default() -> Self {
let path = PathBuf::from(".").canonicalize().unwrap();
WorkDir {
path: path.clone(),
traversal: vec![path.clone()],
}
}
}
impl WorkDir {
pub fn new(path: &str) -> Self {
let path = PathBuf::from(path);
WorkDir {
path: path.clone(),
traversal: vec![path.clone()],
}
}
pub fn at(path: String, include_dirs: Vec<PathBuf>) -> anyhow::Result<WorkDir> {
fs::create_dir_all(&path)?;
let dirs = include_dirs
.iter()
.map(|d| d.canonicalize().unwrap())
.collect::<Vec<_>>();
let p = PathBuf::from(&path).canonicalize()?;
let _: Vec<_> = dirs
.iter()
.map(|d| WorkDir::deep_copy(d.as_path(), p.as_path()))
.collect();
std::env::set_current_dir(&path)?;
Ok(WorkDir::new(p.to_string_lossy().to_string().as_str()))
}
pub fn move_to(&mut self, path: String) -> anyhow::Result<&mut Self> {
fs::create_dir_all(&path)?;
self.traversal.push(PathBuf::from(&path));
std::env::set_current_dir(&path)?;
Ok(self)
}
pub fn fs_get(&mut self, path: String) -> anyhow::Result<PathBuf> {
let p = Path::new(&path);
if !p.exists() {
let artifact_at_root = if p.is_dir() {
self.traversal[0].clone().join(&path).canonicalize()?
} else {
self.traversal[0]
.clone()
.join(p.parent().unwrap_or(Path::new("")))
.canonicalize()?
};
let here = PathBuf::from(".").canonicalize()?;
WorkDir::deep_copy(artifact_at_root.as_path(), here.as_path())?;
}
Ok(PathBuf::from(path))
}
fn deep_copy(src: &Path, dst: &Path) -> io::Result<()> {
// Create the destination directory with the source's name
let dst_dir = if let Some(src_name) = src.file_name() {
dst.join(src_name)
} else {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"Source path must have a file name",
));
};
// Create the destination directory if it doesn't exist
if !dst_dir.exists() {
fs::create_dir_all(&dst_dir)?;
}
// Copy each entry in the source directory
for entry in fs::read_dir(src)? {
let entry = entry?;
let ty = entry.file_type()?;
let src_path = entry.path();
let dst_path = dst_dir.join(entry.file_name());
if ty.is_dir() {
WorkDir::deep_copy(&src_path, dst_path.parent().unwrap())?;
} else {
fs::copy(&src_path, &dst_path)?;
}
}
Ok(())
}
}
impl Drop for WorkDir {
fn drop(&mut self) {
self.traversal.pop();
std::env::set_current_dir("..").unwrap()
}
}

View File

@@ -13,6 +13,7 @@ path = "src/main.rs"
[dependencies]
goose = { path = "../goose" }
goose-bench = { path = "../goose-bench" }
goose-mcp = { path = "../goose-mcp" }
mcp-client = { path = "../mcp-client" }
mcp-server = { path = "../mcp-server" }
@@ -48,6 +49,7 @@ tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt", "json",
tracing-appender = "0.2"
once_cell = "1.20.2"
shlex = "1.3.0"
async-trait = "0.1.86"
[target.'cfg(target_os = "windows")'.dependencies]
winapi = { version = "0.3", features = ["wincred"] }

View File

@@ -0,0 +1,171 @@
use crate::session::build_session;
use crate::Session;
use async_trait::async_trait;
use chrono::Local;
use goose::config::Config;
use goose::message::Message;
use goose_bench::error_capture::ErrorCaptureLayer;
use goose_bench::eval_suites::{BenchAgent, BenchAgentError, Evaluation, EvaluationSuiteFactory};
use goose_bench::reporting::{BenchmarkResults, EvaluationResult, SuiteResult};
use goose_bench::work_dir::WorkDir;
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::Once;
use tokio::sync::Mutex;
use tracing_subscriber::layer::SubscriberExt;
// Used to ensure we only set up tracing once
static INIT: Once = Once::new();
pub struct BenchSession {
session: Session,
errors: Arc<Mutex<Vec<BenchAgentError>>>,
}
impl BenchSession {
pub fn new(session: Session) -> Self {
let errors = Arc::new(Mutex::new(Vec::new()));
// Create and register the error capture layer only once
INIT.call_once(|| {
let error_layer = ErrorCaptureLayer::new(errors.clone());
let subscriber = tracing_subscriber::Registry::default().with(error_layer);
tracing::subscriber::set_global_default(subscriber)
.expect("Failed to set tracing subscriber");
});
Self { session, errors }
}
}
#[async_trait]
impl BenchAgent for BenchSession {
async fn prompt(&mut self, p: String) -> anyhow::Result<Vec<Message>> {
// Clear previous errors
{
let mut errors = self.errors.lock().await;
errors.clear();
}
self.session.headless(p).await?;
Ok(self.session.message_history())
}
async fn get_errors(&self) -> Vec<BenchAgentError> {
let errors = self.errors.lock().await;
errors.clone()
}
}
// Wrapper struct to implement BenchAgent for Arc<Mutex<BenchSession>>
struct BenchAgentWrapper(Arc<Mutex<BenchSession>>);
#[async_trait]
impl BenchAgent for BenchAgentWrapper {
async fn prompt(&mut self, p: String) -> anyhow::Result<Vec<Message>> {
let mut session = self.0.lock().await;
session.prompt(p).await
}
async fn get_errors(&self) -> Vec<BenchAgentError> {
let session = self.0.lock().await;
session.get_errors().await
}
}
async fn run_eval(
evaluation: Box<dyn Evaluation>,
work_dir: &mut WorkDir,
) -> anyhow::Result<EvaluationResult> {
let mut result = EvaluationResult::new(evaluation.name().to_string());
if let Ok(work_dir) = work_dir.move_to(format!("./{}", &evaluation.name())) {
let required_extensions = evaluation.required_extensions();
// Create session with error capture
let base_session = build_session(None, false, Vec::new(), required_extensions).await;
let bench_session = Arc::new(Mutex::new(BenchSession::new(base_session)));
let bench_session_clone = bench_session.clone();
if let Ok(metrics) = evaluation
.run(Box::new(BenchAgentWrapper(bench_session)), work_dir)
.await
{
for (name, metric) in metrics {
result.add_metric(name, metric);
}
// Add any errors that occurred
let agent = BenchAgentWrapper(bench_session_clone);
for error in agent.get_errors().await {
result.add_error(error);
}
}
}
Ok(result)
}
async fn run_suite(suite: &str, work_dir: &mut WorkDir) -> anyhow::Result<SuiteResult> {
let mut suite_result = SuiteResult::new(suite.to_string());
if let Ok(work_dir) = work_dir.move_to(format!("./{}", &suite)) {
if let Some(evals) = EvaluationSuiteFactory::create(suite) {
for eval in evals {
let eval_result = run_eval(eval, work_dir).await?;
suite_result.add_evaluation(eval_result);
}
}
}
Ok(suite_result)
}
pub async fn run_benchmark(
suites: Vec<String>,
include_dirs: Vec<PathBuf>,
) -> anyhow::Result<BenchmarkResults> {
let suites = EvaluationSuiteFactory::available_evaluations()
.into_iter()
.filter(|&s| suites.contains(&s.to_string()))
.collect::<Vec<_>>();
let config = Config::global();
let provider_name: String = config
.get("GOOSE_PROVIDER")
.expect("No provider configured. Run 'goose configure' first");
let mut results = BenchmarkResults::new(provider_name.clone());
let current_time = Local::now().format("%H:%M:%S").to_string();
let current_date = Local::now().format("%Y-%m-%d").to_string();
if let Ok(mut work_dir) = WorkDir::at(
format!("./benchmark-{}", &provider_name),
include_dirs.clone(),
) {
if let Ok(work_dir) = work_dir.move_to(format!("./{}-{}", &current_date, current_time)) {
for suite in suites {
let suite_result = run_suite(suite, work_dir).await?;
results.add_suite(suite_result);
}
}
}
Ok(results)
}
pub async fn list_suites() -> anyhow::Result<HashMap<String, usize>> {
let suites = EvaluationSuiteFactory::available_evaluations();
let mut suite_counts = HashMap::new();
for suite in suites {
if let Some(evals) = EvaluationSuiteFactory::create(suite) {
suite_counts.insert(suite.to_string(), evals.len());
}
}
Ok(suite_counts)
}

View File

@@ -1,4 +1,5 @@
pub mod agent_version;
pub mod bench;
pub mod configure;
pub mod info;
pub mod mcp;

View File

@@ -2,12 +2,15 @@ use anyhow::Result;
use clap::{Args, Parser, Subcommand};
use goose::config::Config;
use goose_cli::commands::agent_version::AgentCommand;
use goose_cli::commands::bench::{list_suites, run_benchmark};
use goose_cli::commands::configure::handle_configure;
use goose_cli::commands::info::handle_info;
use goose_cli::commands::mcp::run_server;
use goose_cli::logging::setup_logging;
use goose_cli::session;
use goose_cli::session::build_session;
use goose_cli::{commands::agent_version::AgentCommand, session};
use std::io::{self, Read};
use std::path::PathBuf;
@@ -194,6 +197,66 @@ enum Command {
#[arg(short, long, help = "Enforce to re-configure goose during update")]
reconfigure: bool,
},
Bench {
#[arg(
short = 's',
long = "suites",
value_name = "BENCH_SUITE_NAME",
help = "Run this list of bench-suites.",
long_help = "Specify a comma-separated list of evaluation-suite names to be run.",
value_delimiter = ','
)]
suites: Vec<String>,
#[arg(
short = 'i',
long = "include-dir",
value_name = "DIR_NAME",
action = clap::ArgAction::Append,
long_help = "Make one or more dirs available to all bench suites. Specify either a single dir-name, a comma-separated list of dir-names, or use this multiple instances of this flag to specify multiple dirs.",
value_delimiter = ','
)]
include_dirs: Vec<PathBuf>,
#[arg(
long = "repeat",
value_name = "QUANTITY",
long_help = "Number of times to repeat the benchmark run.",
default_value = "1"
)]
repeat: usize,
#[arg(
long = "list",
value_name = "LIST",
help = "List all available bench suites."
)]
list: bool,
#[arg(
long = "output",
short = 'o',
value_name = "FILE",
help = "Save benchmark results to a file"
)]
output: Option<PathBuf>,
#[arg(
long = "format",
value_name = "FORMAT",
help = "Output format (text, json)",
default_value = "text"
)]
format: String,
#[arg(
long = "summary",
help = "Show only summary results",
action = clap::ArgAction::SetTrue
)]
summary: bool,
},
}
#[derive(clap::ValueEnum, Clone, Debug)]
@@ -232,6 +295,7 @@ async fn main() -> Result<()> {
builtin,
)
.await;
setup_logging(session.session_file().file_stem().and_then(|s| s.to_str()))?;
let _ = session.interactive(None).await;
return Ok(());
@@ -290,6 +354,56 @@ async fn main() -> Result<()> {
goose_cli::commands::update::update(canary, reconfigure)?;
return Ok(());
}
Some(Command::Bench {
suites,
include_dirs,
repeat,
list,
output,
format,
summary,
}) => {
if list {
let suites = list_suites().await?;
for suite in suites.keys() {
println!("{}: {}", suite, suites.get(suite).unwrap());
}
return Ok(());
}
let suites = if suites.is_empty() {
vec!["core".to_string()]
} else {
suites
};
let current_dir = std::env::current_dir()?;
for i in 0..repeat {
if repeat > 1 {
println!("\nRun {} of {}:", i + 1, repeat);
}
let results = run_benchmark(suites.clone(), include_dirs.clone()).await?;
// Handle output based on format
let output_str = match format.as_str() {
"json" => serde_json::to_string_pretty(&results)?,
_ => results.to_string(), // Uses Display impl
};
// Save to file if specified
if let Some(path) = &output {
std::fs::write(current_dir.join(path), &output_str)?;
println!("Results saved to: {}", path.display());
} else {
// Print to console
if summary {
println!("{}", results.summary());
} else {
println!("{}", output_str);
}
}
}
return Ok(());
}
None => {
if !Config::global().exists() {
let _ = handle_configure().await;

View File

@@ -622,4 +622,8 @@ impl Session {
cache.prompt_info.clear();
cache.last_updated = Instant::now();
}
pub fn message_history(&self) -> Vec<Message> {
self.messages.clone()
}
}

83
scripts/README.md Normal file
View File

@@ -0,0 +1,83 @@
# Goose Benchmark Scripts
This directory contains scripts for running and analyzing Goose benchmarks.
## run-benchmarks.sh
This script runs Goose benchmarks across multiple provider:model pairs and analyzes the results.
### Prerequisites
- Goose CLI must be built or installed
- `jq` command-line tool for JSON processing (optional, but recommended for result analysis)
### Usage
```bash
./scripts/run-benchmarks.sh [options]
```
#### Options
- `-p, --provider-models`: Comma-separated list of provider:model pairs (e.g., 'openai:gpt-4o,anthropic:claude-3-5-sonnet')
- `-s, --suites`: Comma-separated list of benchmark suites to run (e.g., 'core,small_models')
- `-o, --output-dir`: Directory to store benchmark results (default: './benchmark-results')
- `-d, --debug`: Use debug build instead of release build
- `-h, --help`: Show help message
#### Examples
```bash
# Run with release build (default)
./scripts/run-benchmarks.sh --provider-models 'openai:gpt-4o,anthropic:claude-3-5-sonnet' --suites 'core,small_models'
# Run with debug build
./scripts/run-benchmarks.sh --provider-models 'openai:gpt-4o' --suites 'core' --debug
```
### How It Works
The script:
1. Parses the provider:model pairs and benchmark suites
2. Determines whether to use the debug or release binary
3. For each provider:model pair:
- Sets the `GOOSE_PROVIDER` and `GOOSE_MODEL` environment variables
- Runs the benchmark with the specified suites
- Analyzes the results for failures
4. Generates a summary of all benchmark runs
### Output
The script creates the following files in the output directory:
- `summary.md`: A summary of all benchmark results
- `{provider}-{model}.json`: Raw JSON output from each benchmark run
- `{provider}-{model}-analysis.txt`: Analysis of each benchmark run
### Exit Codes
- `0`: All benchmarks completed successfully
- `1`: One or more benchmarks failed
## parse-benchmark-results.sh
This script analyzes a single benchmark JSON result file and identifies any failures.
### Usage
```bash
./scripts/parse-benchmark-results.sh path/to/benchmark-results.json
```
### Output
The script outputs an analysis of the benchmark results to stdout, including:
- Basic information about the benchmark run
- Results for each evaluation in each suite
- Summary of passed and failed metrics
### Exit Codes
- `0`: All metrics passed successfully
- `1`: One or more metrics failed

View File

@@ -0,0 +1,93 @@
#!/usr/bin/env bash
# Script to parse goose-bench results and check for failures
set -e
if [ "$#" -ne 1 ]; then
echo "Usage: $0 <benchmark-result-json-file>"
exit 1
fi
RESULT_FILE="$1"
if [ ! -f "$RESULT_FILE" ]; then
echo "Error: Result file not found: $RESULT_FILE"
exit 1
fi
# Extract basic information
PROVIDER=$(jq -r '.provider' "$RESULT_FILE")
START_TIME=$(jq -r '.start_time' "$RESULT_FILE")
SUITE_COUNT=$(jq '.suites | length' "$RESULT_FILE")
echo "Benchmark Results Analysis"
echo "-------------------------"
echo "Provider: $PROVIDER"
echo "Start Time: $START_TIME"
echo "Number of Suites: $SUITE_COUNT"
echo ""
# Initialize counters
TOTAL_EVALS=0
TOTAL_METRICS=0
FAILED_METRICS=0
PASSED_METRICS=0
# Process each suite
for i in $(seq 0 $((SUITE_COUNT-1))); do
SUITE_NAME=$(jq -r ".suites[$i].name" "$RESULT_FILE")
EVAL_COUNT=$(jq ".suites[$i].evaluations | length" "$RESULT_FILE")
TOTAL_EVALS=$((TOTAL_EVALS + EVAL_COUNT))
echo "Suite: $SUITE_NAME ($EVAL_COUNT evaluations)"
# Process each evaluation in this suite
for j in $(seq 0 $((EVAL_COUNT-1))); do
EVAL_NAME=$(jq -r ".suites[$i].evaluations[$j].name" "$RESULT_FILE")
METRIC_COUNT=$(jq ".suites[$i].evaluations[$j].metrics | length" "$RESULT_FILE")
TOTAL_METRICS=$((TOTAL_METRICS + METRIC_COUNT))
# Check for failures in this evaluation
# This assumes metrics with names containing "success", "pass", or "correct"
# and boolean values of false indicate failures
FAILURES=$(jq -r ".suites[$i].evaluations[$j].metrics[] |
select(
(.[0] | test(\"success|pass|correct\"; \"i\")) and
(.[1] == false or .[1] == \"false\" or .[1] == 0 or .[1] == \"0\")
) | .[0]" "$RESULT_FILE" | wc -l | tr -d ' ')
if [ "$FAILURES" -gt 0 ]; then
FAILED_METRICS=$((FAILED_METRICS + FAILURES))
echo "$EVAL_NAME: $FAILURES failures detected"
# Print the specific failing metrics
FAILING_METRICS=$(jq -r ".suites[$i].evaluations[$j].metrics[] |
select(
(.[0] | test(\"success|pass|correct\"; \"i\")) and
(.[1] == false or .[1] == \"false\" or .[1] == 0 or .[1] == \"0\")
) | \" - \" + .[0]" "$RESULT_FILE")
echo "$FAILING_METRICS"
else
PASSED_METRICS=$((PASSED_METRICS + METRIC_COUNT))
echo "$EVAL_NAME: All metrics passed"
fi
done
echo ""
done
# Print summary
echo "Summary:"
echo "-------"
echo "Total Evaluations: $TOTAL_EVALS"
echo "Total Metrics: $TOTAL_METRICS"
echo "Passed Metrics: $PASSED_METRICS"
echo "Failed Metrics: $FAILED_METRICS"
# Set exit code based on failures
if [ "$FAILED_METRICS" -gt 0 ]; then
echo "❌ Benchmark has $FAILED_METRICS failures"
exit 1
else
echo "✅ All metrics passed successfully"
exit 0
fi

286
scripts/run-benchmarks.sh Executable file
View File

@@ -0,0 +1,286 @@
#!/usr/bin/env bash
# run-benchmarks.sh - Script to run goose benchmarks across multiple provider:model pairs
set -e
# Display usage information
function show_usage() {
echo "Usage: $0 [options]"
echo ""
echo "Options:"
echo " -p, --provider-models Comma-separated list of provider:model pairs (e.g., 'openai:gpt-4o,anthropic:claude-3-5-sonnet')"
echo " -s, --suites Comma-separated list of benchmark suites to run (e.g., 'core,small_models')"
echo " -o, --output-dir Directory to store benchmark results (default: './benchmark-results')"
echo " -d, --debug Use debug build instead of release build"
echo " -h, --help Show this help message"
echo ""
echo "Example:"
echo " $0 --provider-models 'openai:gpt-4o,anthropic:claude-3-5-sonnet' --suites 'core,small_models'"
}
# Parse command line arguments
PROVIDER_MODELS=""
SUITES=""
OUTPUT_DIR="./benchmark-results"
DEBUG_MODE=false
while [[ $# -gt 0 ]]; do
case "$1" in
-p|--provider-models)
PROVIDER_MODELS="$2"
shift 2
;;
-s|--suites)
SUITES="$2"
shift 2
;;
-o|--output-dir)
OUTPUT_DIR="$2"
shift 2
;;
-d|--debug)
DEBUG_MODE=true
shift
;;
-h|--help)
show_usage
exit 0
;;
*)
echo "Error: Unknown option: $1"
show_usage
exit 1
;;
esac
done
# Validate required parameters
if [[ -z "$PROVIDER_MODELS" ]]; then
echo "Error: Provider-model pairs must be specified"
show_usage
exit 1
fi
if [[ -z "$SUITES" ]]; then
echo "Error: Benchmark suites must be specified"
show_usage
exit 1
fi
# Create output directory
mkdir -p "$OUTPUT_DIR"
# Create a results summary file
SUMMARY_FILE="$OUTPUT_DIR/summary.md"
echo "# Benchmark Results Summary" > "$SUMMARY_FILE"
echo "Run date: $(date)" >> "$SUMMARY_FILE"
echo "Suites: $SUITES" >> "$SUMMARY_FILE"
if [ "$DEBUG_MODE" = true ]; then
echo "Mode: Debug" >> "$SUMMARY_FILE"
else
echo "Mode: Release" >> "$SUMMARY_FILE"
fi
echo "" >> "$SUMMARY_FILE"
# Determine which binary to use
GOOSE_CMD="goose"
if [ "$DEBUG_MODE" = true ]; then
if [ -f "./target/debug/goose" ]; then
GOOSE_CMD="./target/debug/goose"
echo "Using debug binary: $GOOSE_CMD"
else
echo "Warning: Debug binary not found at ./target/debug/goose. Falling back to system-installed goose."
fi
else
if [ -f "./target/release/goose" ]; then
GOOSE_CMD="./target/release/goose"
echo "Using release binary: $GOOSE_CMD"
else
echo "Warning: Release binary not found at ./target/release/goose. Falling back to system-installed goose."
fi
fi
# Parse provider:model pairs
PROVIDERS=()
MODELS=()
# Read provider:model pairs
IFS=',' read -ra PAIRS <<< "$PROVIDER_MODELS"
for pair in "${PAIRS[@]}"; do
# Split by colon
IFS=':' read -r provider model <<< "$pair"
if [[ -n "$provider" && -n "$model" ]]; then
PROVIDERS+=("$provider")
MODELS+=("$model")
else
echo "Warning: Invalid provider:model pair: $pair. Skipping."
fi
done
# Track overall success
OVERALL_SUCCESS=true
COUNT=${#PROVIDERS[@]}
echo "Running benchmarks for $COUNT provider:model pairs..."
echo "Benchmark suites: $SUITES"
echo ""
# Loop through each provider-model pair
for ((i=0; i<$COUNT; i++)); do
provider="${PROVIDERS[i]}"
model="${MODELS[i]}"
echo "=========================================================="
echo "Provider: $provider, Model: $model"
echo "=========================================================="
echo "## Provider: $provider, Model: $model" >> "$SUMMARY_FILE"
# Set environment variables for this provider/model instead of using configure
export GOOSE_PROVIDER="$provider"
export GOOSE_MODEL="$model"
# Run the benchmark and save results to JSON
echo "Running benchmark for $provider/$model with suites: $SUITES"
OUTPUT_FILE="$OUTPUT_DIR/${provider}-${model}.json"
ANALYSIS_FILE="$OUTPUT_DIR/${provider}-${model}-analysis.txt"
if $GOOSE_CMD bench --suites "$SUITES" --output "$OUTPUT_FILE" --format json; then
echo "✅ Benchmark completed successfully" | tee -a "$SUMMARY_FILE"
# Parse the JSON to check for failures
if [ -f "$OUTPUT_FILE" ]; then
# Check if jq is installed
if ! command -v jq &> /dev/null; then
echo "Warning: jq not found. Cannot parse JSON results."
echo "⚠️ Could not parse results (jq not installed)" >> "$SUMMARY_FILE"
else
# Basic validation of the JSON file
if jq empty "$OUTPUT_FILE" 2>/dev/null; then
# Extract basic information
PROVIDER_NAME=$(jq -r '.provider' "$OUTPUT_FILE")
START_TIME=$(jq -r '.start_time' "$OUTPUT_FILE")
SUITE_COUNT=$(jq '.suites | length' "$OUTPUT_FILE")
echo "Benchmark Results Analysis" > "$ANALYSIS_FILE"
echo "-------------------------" >> "$ANALYSIS_FILE"
echo "Provider: $PROVIDER_NAME" >> "$ANALYSIS_FILE"
echo "Start Time: $START_TIME" >> "$ANALYSIS_FILE"
echo "Number of Suites: $SUITE_COUNT" >> "$ANALYSIS_FILE"
echo "" >> "$ANALYSIS_FILE"
# Initialize counters
TOTAL_EVALS=0
TOTAL_METRICS=0
FAILED_METRICS=0
PASSED_METRICS=0
TOTAL_ERRORS=0
# Process each suite
for j in $(seq 0 $((SUITE_COUNT-1))); do
SUITE_NAME=$(jq -r ".suites[$j].name" "$OUTPUT_FILE")
EVAL_COUNT=$(jq ".suites[$j].evaluations | length" "$OUTPUT_FILE")
TOTAL_EVALS=$((TOTAL_EVALS + EVAL_COUNT))
echo "Suite: $SUITE_NAME ($EVAL_COUNT evaluations)" >> "$ANALYSIS_FILE"
# Process each evaluation in this suite
for k in $(seq 0 $((EVAL_COUNT-1))); do
EVAL_NAME=$(jq -r ".suites[$j].evaluations[$k].name" "$OUTPUT_FILE")
METRIC_COUNT=$(jq ".suites[$j].evaluations[$k].metrics | length" "$OUTPUT_FILE")
TOTAL_METRICS=$((TOTAL_METRICS + METRIC_COUNT))
# Check for errors in this evaluation
ERROR_COUNT=$(jq ".suites[$j].evaluations[$k].errors | length" "$OUTPUT_FILE")
TOTAL_ERRORS=$((TOTAL_ERRORS + ERROR_COUNT))
# Check for failures in metrics
FAILURES=$(jq -r ".suites[$j].evaluations[$k].metrics[] |
select(
.[1].Boolean == false or .[1].Boolean == \"false\" or .[1].Boolean == 0 or .[1].Boolean == \"0\"
) | .[0]" "$OUTPUT_FILE" | wc -l | tr -d ' ')
if [ "$FAILURES" -gt 0 ] || [ "$ERROR_COUNT" -gt 0 ]; then
FAILED_METRICS=$((FAILED_METRICS + FAILURES))
echo "$EVAL_NAME:" >> "$ANALYSIS_FILE"
if [ "$FAILURES" -gt 0 ]; then
echo " - $FAILURES metric failures detected" >> "$ANALYSIS_FILE"
# Print the specific failing metrics
FAILING_METRICS=$(jq -r ".suites[$j].evaluations[$k].metrics[] |
select(
.[1].Boolean == false or .[1].Boolean == \"false\" or .[1].Boolean == 0 or .[1].Boolean == \"0\"
) | .[0]" "$OUTPUT_FILE")
echo " Failed metrics:" >> "$ANALYSIS_FILE"
echo "$FAILING_METRICS" | sed 's/^/ - /' >> "$ANALYSIS_FILE"
fi
if [ "$ERROR_COUNT" -gt 0 ]; then
echo " - $ERROR_COUNT errors detected" >> "$ANALYSIS_FILE"
# Print the errors
jq -r ".suites[$j].evaluations[$k].errors[] | \" [\(.level)] \(.message)\"" "$OUTPUT_FILE" >> "$ANALYSIS_FILE"
fi
else
PASSED_METRICS=$((PASSED_METRICS + METRIC_COUNT))
echo "$EVAL_NAME: All metrics passed, no errors" >> "$ANALYSIS_FILE"
fi
done
echo "" >> "$ANALYSIS_FILE"
done
# Print summary
echo "Summary:" >> "$ANALYSIS_FILE"
echo "-------" >> "$ANALYSIS_FILE"
echo "Total Evaluations: $TOTAL_EVALS" >> "$ANALYSIS_FILE"
echo "Total Metrics: $TOTAL_METRICS" >> "$ANALYSIS_FILE"
echo "Passed Metrics: $PASSED_METRICS" >> "$ANALYSIS_FILE"
echo "Failed Metrics: $FAILED_METRICS" >> "$ANALYSIS_FILE"
echo "Total Errors: $TOTAL_ERRORS" >> "$ANALYSIS_FILE"
# Determine success/failure
if [ "$FAILED_METRICS" -gt 0 ] || [ "$TOTAL_ERRORS" -gt 0 ]; then
if [ "$FAILED_METRICS" -gt 0 ]; then
echo "❌ Benchmark has $FAILED_METRICS failed metrics" >> "$ANALYSIS_FILE"
fi
if [ "$TOTAL_ERRORS" -gt 0 ]; then
echo "❌ Benchmark has $TOTAL_ERRORS errors" >> "$ANALYSIS_FILE"
fi
echo "❌ Tests failed for $provider/$model" | tee -a "$SUMMARY_FILE"
cat "$ANALYSIS_FILE" >> "$SUMMARY_FILE"
OVERALL_SUCCESS=false
else
echo "✅ All metrics passed successfully, no errors" >> "$ANALYSIS_FILE"
echo "✅ All tests passed for $provider/$model" | tee -a "$SUMMARY_FILE"
cat "$ANALYSIS_FILE" >> "$SUMMARY_FILE"
fi
else
echo "❌ Invalid JSON in benchmark output" | tee -a "$SUMMARY_FILE"
OVERALL_SUCCESS=false
fi
fi
else
echo "❌ Benchmark output file not found" | tee -a "$SUMMARY_FILE"
OVERALL_SUCCESS=false
fi
else
echo "❌ Benchmark failed to run" | tee -a "$SUMMARY_FILE"
OVERALL_SUCCESS=false
fi
echo "" >> "$SUMMARY_FILE"
echo ""
done
echo "=========================================================="
echo "Benchmark run completed"
echo "Results saved to: $OUTPUT_DIR"
echo "Summary file: $SUMMARY_FILE"
# Output final status
if [ "$OVERALL_SUCCESS" = false ]; then
echo "❌ Some benchmarks failed. Check the summary for details."
exit 1
else
echo "✅ All benchmarks completed successfully."
exit 0
fi