blob: 1d5227d6ee95c775da25a3b023e6455f4270a217 [file] [log] [blame]
// Copyright 2026 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
import {evalGroup, itEval, LLMComparison} from './helpers/evaluators.ts';
import {loadInstructions} from './instructions/load.ts';
await Promise.all([
evalGroup(
{type: 'network', label: 'request-info'},
async function() {
await itEval({
test: 'is an accurate response to \'What is this network request about\'',
judge: example => LLMComparison.judge(example, loadInstructions('network.eval')),
});
await itEval({
test: 'ROUGE-Lsum',
rouge: true,
});
}),
evalGroup(
{type: 'network', label: 'bad-request-4xx'},
async function() {
await itEval({
test: 'is an accurate response to \'Why is this network request failing\' (4xx)',
judge: example => LLMComparison.judge(example, loadInstructions('network.eval')),
});
await itEval({
test: 'ROUGE-Lsum',
rouge: true,
});
}),
evalGroup(
{type: 'network', label: 'cors-credentials'},
async function() {
await itEval({
test: 'is an accurate response to \'Why is this network request failing\' (CORS credentials)',
judge: example => LLMComparison.judge(example, loadInstructions('network.eval')),
});
await itEval({
test: 'ROUGE-Lsum',
rouge: true,
});
}),
evalGroup(
{type: 'network', label: 'redirect-3xx'},
async function() {
await itEval({
test: 'is an accurate response to \'Why is this network request failing\' (3xx)',
judge: example => LLMComparison.judge(example, loadInstructions('network.eval')),
});
await itEval({
test: 'ROUGE-Lsum',
rouge: true,
});
}),
evalGroup(
{type: 'network', label: 'server-error-5xx'},
async function() {
await itEval({
test: 'is an accurate response to \'Why is this network request failing\' (5xx)',
judge: example => LLMComparison.judge(example, loadInstructions('network.eval')),
});
await itEval({
test: 'ROUGE-Lsum',
rouge: true,
});
}),
]);