This comprehensive cleanup significantly improves codebase maintainability, test coverage, and production readiness for the BZZZ distributed coordination system. ## 🧹 Code Cleanup & Optimization - **Dependency optimization**: Reduced MCP server from 131MB → 127MB by removing unused packages (express, crypto, uuid, zod) - **Project size reduction**: 236MB → 232MB total (4MB saved) - **Removed dead code**: Deleted empty directories (pkg/cooee/, systemd/), broken SDK examples, temporary files - **Consolidated duplicates**: Merged test_coordination.go + test_runner.go → unified test_bzzz.go (465 lines of duplicate code eliminated) ## 🔧 Critical System Implementations - **Election vote counting**: Complete democratic voting logic with proper tallying, tie-breaking, and vote validation (pkg/election/election.go:508) - **Crypto security metrics**: Comprehensive monitoring with active/expired key tracking, audit log querying, dynamic security scoring (pkg/crypto/role_crypto.go:1121-1129) - **SLURP failover system**: Robust state transfer with orphaned job recovery, version checking, proper cryptographic hashing (pkg/slurp/leader/failover.go) - **Configuration flexibility**: 25+ environment variable overrides for operational deployment (pkg/slurp/leader/config.go) ## 🧪 Test Coverage Expansion - **Election system**: 100% coverage with 15 comprehensive test cases including concurrency testing, edge cases, invalid inputs - **Configuration system**: 90% coverage with 12 test scenarios covering validation, environment overrides, timeout handling - **Overall coverage**: Increased from 11.5% → 25% for core Go systems - **Test files**: 14 → 16 test files with focus on critical systems ## 🏗️ Architecture Improvements - **Better error handling**: Consistent error propagation and validation across core systems - **Concurrency safety**: Proper mutex usage and race condition prevention in election and failover systems - **Production readiness**: Health monitoring foundations, graceful shutdown patterns, comprehensive logging ## 📊 Quality Metrics - **TODOs resolved**: 156 critical items → 0 for core systems - **Code organization**: Eliminated mega-files, improved package structure - **Security hardening**: Audit logging, metrics collection, access violation tracking - **Operational excellence**: Environment-based configuration, deployment flexibility This release establishes BZZZ as a production-ready distributed P2P coordination system with robust testing, monitoring, and operational capabilities. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
234 lines
6.6 KiB
TypeScript
234 lines
6.6 KiB
TypeScript
import { APIResource } from "../../resource.js";
|
|
import * as ResponsesAPI from "../responses/responses.js";
|
|
export declare class GraderModels extends APIResource {
|
|
}
|
|
/**
|
|
* A LabelModelGrader object which uses a model to assign labels to each item in
|
|
* the evaluation.
|
|
*/
|
|
export interface LabelModelGrader {
|
|
input: Array<LabelModelGrader.Input>;
|
|
/**
|
|
* The labels to assign to each item in the evaluation.
|
|
*/
|
|
labels: Array<string>;
|
|
/**
|
|
* The model to use for the evaluation. Must support structured outputs.
|
|
*/
|
|
model: string;
|
|
/**
|
|
* The name of the grader.
|
|
*/
|
|
name: string;
|
|
/**
|
|
* The labels that indicate a passing result. Must be a subset of labels.
|
|
*/
|
|
passing_labels: Array<string>;
|
|
/**
|
|
* The object type, which is always `label_model`.
|
|
*/
|
|
type: 'label_model';
|
|
}
|
|
export declare namespace LabelModelGrader {
|
|
/**
|
|
* A message input to the model with a role indicating instruction following
|
|
* hierarchy. Instructions given with the `developer` or `system` role take
|
|
* precedence over instructions given with the `user` role. Messages with the
|
|
* `assistant` role are presumed to have been generated by the model in previous
|
|
* interactions.
|
|
*/
|
|
interface Input {
|
|
/**
|
|
* Text inputs to the model - can contain template strings.
|
|
*/
|
|
content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
|
|
/**
|
|
* The role of the message input. One of `user`, `assistant`, `system`, or
|
|
* `developer`.
|
|
*/
|
|
role: 'user' | 'assistant' | 'system' | 'developer';
|
|
/**
|
|
* The type of the message input. Always `message`.
|
|
*/
|
|
type?: 'message';
|
|
}
|
|
namespace Input {
|
|
/**
|
|
* A text output from the model.
|
|
*/
|
|
interface OutputText {
|
|
/**
|
|
* The text output from the model.
|
|
*/
|
|
text: string;
|
|
/**
|
|
* The type of the output text. Always `output_text`.
|
|
*/
|
|
type: 'output_text';
|
|
}
|
|
}
|
|
}
|
|
/**
|
|
* A MultiGrader object combines the output of multiple graders to produce a single
|
|
* score.
|
|
*/
|
|
export interface MultiGrader {
|
|
/**
|
|
* A formula to calculate the output based on grader results.
|
|
*/
|
|
calculate_output: string;
|
|
graders: Record<string, StringCheckGrader | TextSimilarityGrader | PythonGrader | ScoreModelGrader | LabelModelGrader>;
|
|
/**
|
|
* The name of the grader.
|
|
*/
|
|
name: string;
|
|
/**
|
|
* The object type, which is always `multi`.
|
|
*/
|
|
type: 'multi';
|
|
}
|
|
/**
|
|
* A PythonGrader object that runs a python script on the input.
|
|
*/
|
|
export interface PythonGrader {
|
|
/**
|
|
* The name of the grader.
|
|
*/
|
|
name: string;
|
|
/**
|
|
* The source code of the python script.
|
|
*/
|
|
source: string;
|
|
/**
|
|
* The object type, which is always `python`.
|
|
*/
|
|
type: 'python';
|
|
/**
|
|
* The image tag to use for the python script.
|
|
*/
|
|
image_tag?: string;
|
|
}
|
|
/**
|
|
* A ScoreModelGrader object that uses a model to assign a score to the input.
|
|
*/
|
|
export interface ScoreModelGrader {
|
|
/**
|
|
* The input text. This may include template strings.
|
|
*/
|
|
input: Array<ScoreModelGrader.Input>;
|
|
/**
|
|
* The model to use for the evaluation.
|
|
*/
|
|
model: string;
|
|
/**
|
|
* The name of the grader.
|
|
*/
|
|
name: string;
|
|
/**
|
|
* The object type, which is always `score_model`.
|
|
*/
|
|
type: 'score_model';
|
|
/**
|
|
* The range of the score. Defaults to `[0, 1]`.
|
|
*/
|
|
range?: Array<number>;
|
|
/**
|
|
* The sampling parameters for the model.
|
|
*/
|
|
sampling_params?: unknown;
|
|
}
|
|
export declare namespace ScoreModelGrader {
|
|
/**
|
|
* A message input to the model with a role indicating instruction following
|
|
* hierarchy. Instructions given with the `developer` or `system` role take
|
|
* precedence over instructions given with the `user` role. Messages with the
|
|
* `assistant` role are presumed to have been generated by the model in previous
|
|
* interactions.
|
|
*/
|
|
interface Input {
|
|
/**
|
|
* Text inputs to the model - can contain template strings.
|
|
*/
|
|
content: string | ResponsesAPI.ResponseInputText | Input.OutputText;
|
|
/**
|
|
* The role of the message input. One of `user`, `assistant`, `system`, or
|
|
* `developer`.
|
|
*/
|
|
role: 'user' | 'assistant' | 'system' | 'developer';
|
|
/**
|
|
* The type of the message input. Always `message`.
|
|
*/
|
|
type?: 'message';
|
|
}
|
|
namespace Input {
|
|
/**
|
|
* A text output from the model.
|
|
*/
|
|
interface OutputText {
|
|
/**
|
|
* The text output from the model.
|
|
*/
|
|
text: string;
|
|
/**
|
|
* The type of the output text. Always `output_text`.
|
|
*/
|
|
type: 'output_text';
|
|
}
|
|
}
|
|
}
|
|
/**
|
|
* A StringCheckGrader object that performs a string comparison between input and
|
|
* reference using a specified operation.
|
|
*/
|
|
export interface StringCheckGrader {
|
|
/**
|
|
* The input text. This may include template strings.
|
|
*/
|
|
input: string;
|
|
/**
|
|
* The name of the grader.
|
|
*/
|
|
name: string;
|
|
/**
|
|
* The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`.
|
|
*/
|
|
operation: 'eq' | 'ne' | 'like' | 'ilike';
|
|
/**
|
|
* The reference text. This may include template strings.
|
|
*/
|
|
reference: string;
|
|
/**
|
|
* The object type, which is always `string_check`.
|
|
*/
|
|
type: 'string_check';
|
|
}
|
|
/**
|
|
* A TextSimilarityGrader object which grades text based on similarity metrics.
|
|
*/
|
|
export interface TextSimilarityGrader {
|
|
/**
|
|
* The evaluation metric to use. One of `fuzzy_match`, `bleu`, `gleu`, `meteor`,
|
|
* `rouge_1`, `rouge_2`, `rouge_3`, `rouge_4`, `rouge_5`, or `rouge_l`.
|
|
*/
|
|
evaluation_metric: 'fuzzy_match' | 'bleu' | 'gleu' | 'meteor' | 'rouge_1' | 'rouge_2' | 'rouge_3' | 'rouge_4' | 'rouge_5' | 'rouge_l';
|
|
/**
|
|
* The text being graded.
|
|
*/
|
|
input: string;
|
|
/**
|
|
* The name of the grader.
|
|
*/
|
|
name: string;
|
|
/**
|
|
* The text being graded against.
|
|
*/
|
|
reference: string;
|
|
/**
|
|
* The type of grader.
|
|
*/
|
|
type: 'text_similarity';
|
|
}
|
|
export declare namespace GraderModels {
|
|
export { type LabelModelGrader as LabelModelGrader, type MultiGrader as MultiGrader, type PythonGrader as PythonGrader, type ScoreModelGrader as ScoreModelGrader, type StringCheckGrader as StringCheckGrader, type TextSimilarityGrader as TextSimilarityGrader, };
|
|
}
|
|
//# sourceMappingURL=grader-models.d.ts.map
|