From 065dddf8d571571a381561906a9ec450cc234e64 Mon Sep 17 00:00:00 2001 From: anthonyrawlins Date: Thu, 7 Aug 2025 14:38:22 +1000 Subject: [PATCH] Prepare for v2 development: Add MCP integration and future development planning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add FUTURE_DEVELOPMENT.md with comprehensive v2 protocol specification - Add MCP integration design and implementation foundation - Add infrastructure and deployment configurations - Update system architecture for v2 evolution πŸ€– Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- FUTURE_DEVELOPMENT.md | 3532 +++++++++++++++++ MCP_IMPLEMENTATION_SUMMARY.md | 282 ++ MCP_INTEGRATION_DESIGN.md | 1135 ++++++ UNIFIED_DEVELOPMENT_PLAN.md | 16 +- cmd/test_coordination.go | 22 +- cmd/test_runner.go | 12 +- deploy/DEPLOYMENT_GUIDE.md | 590 +++ deploy/docker-compose.mcp.yml | 324 ++ docs/SYSTEM_ARCHITECTURE.md | 4 +- examples/collaborative-review-example.py | 517 +++ examples/slurp_integration_example.go | 342 ++ .../BZZZ_V2_INFRASTRUCTURE_ARCHITECTURE.md | 669 ++++ infrastructure/ci-cd/.gitlab-ci.yml | 643 +++ infrastructure/docker-compose.swarm.yml | 402 ++ infrastructure/docs/DEPLOYMENT_RUNBOOK.md | 581 +++ .../migration-scripts/migrate-v1-to-v2.sh | 514 +++ .../monitoring/configs/alert-rules.yml | 339 ++ .../monitoring/configs/alertmanager.yml | 255 ++ .../monitoring/configs/prometheus.yml | 216 + .../monitoring/docker-compose.monitoring.yml | 372 ++ infrastructure/security/network-policy.yaml | 335 ++ infrastructure/security/security-hardening.sh | 675 ++++ logging/hypercore.go | 2 +- main.go | 6 +- mcp-server/package.json | 54 + mcp-server/src/config/config.ts | 303 ++ mcp-server/src/index.ts | 361 ++ mcp-server/src/tools/protocol-tools.ts | 493 +++ mcp-server/tsconfig.json | 27 + .../{antennae_monitor.go => hmmm_monitor.go} | 76 +- p2p/config.go | 10 +- pkg/config/slurp_config.go | 222 ++ pkg/coordination/dependency_detector.go | 8 +- pkg/coordination/meta_coordinator.go | 92 +- pkg/integration/slurp_client.go | 327 ++ pkg/integration/slurp_events.go | 519 +++ pkg/mcp/server.go | 628 +++ pubsub/pubsub.go | 156 +- ..._monitoring.sh => test_hmmm_monitoring.sh} | 14 +- test/{antennae_test.go => hmmm_test.go} | 46 +- test/task_simulator.go | 10 +- 41 files changed, 14970 insertions(+), 161 deletions(-) create mode 100644 FUTURE_DEVELOPMENT.md create mode 100644 MCP_IMPLEMENTATION_SUMMARY.md create mode 100644 MCP_INTEGRATION_DESIGN.md create mode 100644 deploy/DEPLOYMENT_GUIDE.md create mode 100644 deploy/docker-compose.mcp.yml create mode 100644 examples/collaborative-review-example.py create mode 100644 examples/slurp_integration_example.go create mode 100644 infrastructure/BZZZ_V2_INFRASTRUCTURE_ARCHITECTURE.md create mode 100644 infrastructure/ci-cd/.gitlab-ci.yml create mode 100644 infrastructure/docker-compose.swarm.yml create mode 100644 infrastructure/docs/DEPLOYMENT_RUNBOOK.md create mode 100755 infrastructure/migration-scripts/migrate-v1-to-v2.sh create mode 100644 infrastructure/monitoring/configs/alert-rules.yml create mode 100644 infrastructure/monitoring/configs/alertmanager.yml create mode 100644 infrastructure/monitoring/configs/prometheus.yml create mode 100644 infrastructure/monitoring/docker-compose.monitoring.yml create mode 100644 infrastructure/security/network-policy.yaml create mode 100755 infrastructure/security/security-hardening.sh create mode 100644 mcp-server/package.json create mode 100644 mcp-server/src/config/config.ts create mode 100644 mcp-server/src/index.ts create mode 100644 mcp-server/src/tools/protocol-tools.ts create mode 100644 mcp-server/tsconfig.json rename monitoring/{antennae_monitor.go => hmmm_monitor.go} (83%) create mode 100644 pkg/config/slurp_config.go create mode 100644 pkg/integration/slurp_client.go create mode 100644 pkg/integration/slurp_events.go create mode 100644 pkg/mcp/server.go rename scripts/{test_antennae_monitoring.sh => test_hmmm_monitoring.sh} (94%) rename test/{antennae_test.go => hmmm_test.go} (90%) diff --git a/FUTURE_DEVELOPMENT.md b/FUTURE_DEVELOPMENT.md new file mode 100644 index 00000000..5ff13923 --- /dev/null +++ b/FUTURE_DEVELOPMENT.md @@ -0,0 +1,3532 @@ +Exactly β€” swapping the raw IP for a **semantic addressing scheme** like + +``` +bzzz://agent:role@project:task/path/to/context +``` + +makes a *lot* more sense in a P2P + SLURP/BZZZ world. +Here’s why: + +--- + +## **Why Semantic > IP Address** + +1. **Location-independent** β€” you don’t care *where* the agent physically is, only *what it is* and *what it’s doing*. +2. **Self-describing** β€” anyone reading the URI can immediately infer: + + * **Agent** β†’ identity (human, bot, service) + * **Role** β†’ function or responsibility + * **Project** β†’ higher-level grouping + * **Task** β†’ sub-scope within project + * **Path** β†’ hierarchical context location (HCFS path) +3. **Resilient to churn** β€” if a node changes IP, it can still announce under the same project/task space. +4. **Supports dynamic discovery** β€” peers subscribe to project/task channels, so they automatically learn new agents or context without knowing network topology. + +--- + +## **How This Works in a P2P Mesh** + +* **Announce**: + + ``` + bzzz announce bzzz://alice:frontend@chorus:ui-redesign/src/components/navbar + ``` + + β†’ Published over the BZZZ gossip layer. + +* **Lookup**: + Any peer can query: + + ``` + bzzz get bzzz://any:frontend@chorus:ui-redesign/src/components + ``` + + The mesh routes to the *closest* peer with that context (by latency or chunk availability). + +* **Routing layer**: + Under the hood, `project:task` maps to a *swarm namespace* (think topic in a pub/sub system). + The actual transport (IP, QUIC, etc.) is abstracted away β€” only the routing fabric knows it. + +--- + +## **Bonus Benefits** + +* **Easier onboarding** β€” no need to teach new nodes about IP addresses or ports; they just announce with semantic URIs. +* **Cross-project federation** β€” the same agent could appear in multiple project\:task spaces without changing network plumbing. +* **Natural fit for role-based SLURP filtering** β€” your role & project are literally in the address, so the curator knows instantly what context you can see. + +--- + +If you like, I can **sketch a lightweight `bzzz://` URI grammar & resolution spec** that defines: + +* Allowed characters & formats for `agent`, `role`, `project`, `task`. +* How the mesh resolves them to actual peers. +* How caching & replication work. +* How this plugs into SLURP’s permission and event system. + +That would make it concrete enough for you to start building BZZZ announce/lookup commands without locking into IP-based thinking. + + + +Niceβ€”this is exactly the right direction. Treating `bzzz://` URIs as **semantic resource addresses** and supporting REST-like verbs (GET/POST/PUT/DELETE) mapped onto the P2P mesh and your HCFS/SLURP stack gives you a clean, powerful, and auditable control surface. + +Below I’ll give you a focused, implementable **protocol spec**: URI grammar, verb semantics, payload shapes for event-driven POSTs, example flows, security rules, idempotency/versioning, and how SLURP/HCFS should process these messages (and log them to the hyperlog / `context_events` table). You can iterate from this into code, CLI commands, or a lightweight RFC for your team. + +--- + +# bzzz:// Protocol β€” Practical Spec (draft) + +## 1 β€” URI grammar (informal) + +``` +bzzz://[agent]:[role]@[project]:[task]/[path...][?query][#fragment] +``` + +* `agent` β€” identifier for the emitter (human, service, bot). \[a–zA–Z0–9\_.-] +* `role` β€” semantic role (frontend, devops, finance, curator). \[a–zA–Z0–9\_-] +* `project` β€” project namespace. \[a–zA–Z0–9\_-] +* `task` β€” task or subproject identifier. \[a–zA–Z0–9\_-] +* `/path/to/context` β€” HCFS-style hierarchical path; components may include `/` separators. +* `?query` β€” optional parameters (e.g., `depth=2&version=7`) +* `#fragment` β€” optional fragment inside blob + +**Example** + +``` +bzzz://alice:frontend@chorus:website-redesign/src/modules/db/connection +``` + +--- + +## 2 β€” Basic verbs & semantics + +Map familiar REST verbs to P2P actions: + +### `GET bzzz://...` + +* **Intent:** Retrieve an assembled context packet for `role/channel/path` or to fetch a resource (blob, metadata). +* **Behavior:** + + * SLURP checks permissions -> resolves path -> returns composed context (with metadata). + * May consult LightRAG and TGN for ranking/drift warnings if requested (`?use_lightrag=true`). +* **Response:** `200` with `{ context, metadata, source_blob_ids, drift_score, events_considered }` +* **Cacheable:** yes (subject to freshness flags) + +### `POST bzzz://...` (create / event submit) + +* **Intent:** Submit a new **event** or **context blob**. For events, payload carries `message-type` (event\_type) and semantics. +* **Behavior:** + + * Receives payload (see event payload schema below). + * Validate sender (signature/ACL). + * Insert `context_events` and/or `context_blobs` in HCFS. + * Publish announcement on mesh pubsub for `project:task` swarm. + * SLURP curators listening see the event and may act (re-curation, revocation, quarantine). +* **Idempotency:** client should include `idempotency_key` (UUID) to avoid duplicates. +* **Response:** `201` with `event_id`, `hyperlog_ref`, and `applied=true|queued` depending on processing. + +### `PUT bzzz://...` (update) + +* **Intent:** Update an existing resource (e.g., revise an event or blob). +* **Behavior:** require `resource_id` or `version` in query; insert a new version or update mutable metadata. Log versioning. +* **Response:** `200` with new `version_id`, `hyperlog_ref`. + +### `DELETE bzzz://...` + +* **Intent:** Soft-delete or mark obsolete (rarely hard-delete β€” prefer soft deletes for audit). +* **Behavior:** mark `end_time` on event or set `deleted=true` in blob metadata; announce to mesh. +* **Response:** `200` with `deleted=true` and hyperlog ref. + +### `SUBSCRIBE / UNSUBSCRIBE` (pubsub) + +* Semantic action for live channels: `bzzz sub bzzz://...` subscribes to notifications/announcements on the project\:task namespace. + +--- + +## 3 β€” Event POST payload (JSON) + +This is the general structure for posting dynamic events (blockers, warnings, approvals, etc.): + +```json +{ + "idempotency_key": "uuid-v4", + "message_type": "blocker", // enum: announcement|warning|blocker|approval|priority_change|access_update|structural_change + "title": "Payment module under audit", + "content": "Do not deploy or change DB schemas until audit complete.", + "path": "/src/modules/payments", + "scope": { "project": "chorus", "task": "payments-launch" }, + "severity": 9, // 1-10 + "visibility": { "roles": ["frontend","backend"], "channels": ["dev"], "exclude": [] }, + "start_time": "2025-08-10T10:00:00Z", + "end_time": null, + "created_by": "alice", + "signature": "base64(sig)", // optional but recommended + "metadata": { "ticket": "JIRA-1234", "owner": "pm_team" } +} +``` + +Notes: + +* `message_type` maps to `event_type` in DB. +* `visibility` indicates role/channel scoping β€” SLURP uses this to decide who sees it. +* `signature` signed with agent private key over canonicalized JSON to ensure authenticity. + +--- + +## 4 β€” Example flows + +### A. Developer posts a blocker (your example) + +**Command** + +``` +bzzz post bzzz://any:developer@chorus:website-redesign/db/connection/credentials \ +'{"message_type":"blocker","title":"Hardcoded DB creds","content":"Remove credentials; rotate keys","path":"/src/modules/db/connection","visibility":{"roles":["devops","backend"]}}' +``` + +**Processing** + +1. Node receives POST, validates `idempotency_key` and `signature`. +2. Writes event into `context_events` (DB) and creates a `graph_node` + `graph_edge` (event node, linked to path node). +3. Publishes `bzzz.announce` on pubsub for `chorus:website-redesign`. +4. SLURP instances subscribed to that project\:task see the event β†’ decide action: + + * If `severity >= 8` and `visibility` includes `devops`, SLURP may quarantine affected blob(s), notify WHOOSH to run remediation ansible playbook, and log a hyperlog decision. +5. Response includes `event_id`, `hyperlog_ref` and `applied:true`. + +### B. Agent requests context and gets override + +**Command** + +``` +bzzz get bzzz://bob:frontend@chorus:website-redesign/src/modules/db/connection?use_lightrag=true +``` + +**Processing** + +1. SLURP inspects active events on `/src/modules/db/connection` and parent paths. Finds `blocker`. +2. Depending on project event\_type\_definitions, SLURP either: + + * **suppress** normal blobs and return only blocker note, or + * **annotate** returned context with the blocker at top +3. If allowed, SLURP calls LightRAG to re-rank candidate blobs (restricted to those the requester may see). +4. SLURP returns composed payload and logs to hyperlog. + +--- + +## 5 β€” Mapping to HCFS / SLURP / TGN storage + +When a POST is accepted: + +* Insert into `context_events`: + + * `(path, event_type, content, severity, start_time, end_time, created_by, project_id)` +* Create `graph_nodes` entry for the event and `graph_edges` linking eventβ†’path and created\_byβ†’agent. +* Append a hyperlog entry (hypercore) that records the event and SLURP's decision. +* Publish mesh announcement: `bzzz:announce` topic with minimal envelope: + +```json +{ "type":"event_announce", "project":"chorus", "task":"website-redesign", "event_id": 123, "path": "/src/...", "message_type":"blocker" } +``` + +TGN ingestion: event becomes a temporal edge for the TGN model (src: agent node, dst: path or blob node, event\_type, timestamp). + +--- + +## 6 β€” Security & Permissions + +* **Authentication:** Agents hold keypairs. Use Ed25519 signatures. When posting, include `signature` and `public_key_id`. Validate against ACLs in `graph_nodes` or `agents` table. +* **Authorization:** SLURP enforces `visibility` and `role` constraints. If a posting agent tries to create an event affecting roles they don't control, SLURP may mark `status=pending` and route to human approver. +* **Encryption:** Use layer transport TLS/QUIC, and optionally encrypt payloads for specific roles using symmetric keys shared by the project’s team. +* **Auditability:** Never hard-delete events; always append an "end\_time" or tombstone entry. Hyperlog retains immutability. + +--- + +## 7 β€” Idempotency, versioning & conflict handling + +* **Idempotency key** (client-generated UUID) ensures repeated POST attempts don't create duplicates. +* **Versioning:** context blobs and events are immutable records with explicit versions. `PUT` creates a new version rather than in-place mutation (or marks prior version end\_time). +* **Conflict resolution:** for concurrent edits, use last-writer-wins by timestamp + signature OR a merge flow managed by SLURP/curator (create merge candidates, present to humans or meta-curator). + +--- + +## 8 β€” PubSub & discovery model + +* **Topic namespace:** `bzzz.project..task.` (or `bzzz.project.chorus.task.website-redesign`) +* **Announcements:** All writes publish lightweight announcements so subscribed peers discover new events quickly without fetching full payload. +* **Pull model:** on announcement, peers decide whether to `GET` the full event or ignore. +* **Replication:** `bzzz` nodes will replicate blobs via content-addressing (hashes) and chunked streaming (libp2p style), storing locally per retention policy. + +--- + +## 9 β€” Implementation notes / pseudocode + +### POST handler pseudocode (simplified) + +```python +def handle_post(uri, body, signature): + parsed = parse_bzzz_uri(uri) + assert validate_signature(body, signature, parsed.agent) + + # 1. Validate message_type and visibility + if not allowed_to_post(parsed.agent, parsed.project, body['path'], body['message_type']): + return 403 + + # 2. Check idempotency + if exists_event_with_idempotency_key(body.get('idempotency_key')): + return existing_event + + # 3. Insert into DB + event_id = insert_context_event(body, parsed) + + # 4. Create graph node + edges + node_id = create_graph_node('event', ref_id=event_id) + create_graph_edge(agent_node(parsed.agent), node_id, 'created') + + # 5. Append hyperlog entry (immutable) + hyperlog_ref = hyperlog.append({...}) + + # 6. Publish announcement + pubsub.publish(topic_for(parsed.project, parsed.task), { 'type': 'event_announce', 'event_id': event_id }) + + # 7. Return success + return { 'event_id': event_id, 'hyperlog_ref': hyperlog_ref } +``` + +### GET handler pseudocode (simplified) + +```python +def handle_get(uri, params): + parsed = parse_bzzz_uri(uri) + # 1. AuthN & AuthZ - ensure agent role can read requested path + check_read_permission(parsed.agent, parsed.role, parsed.path) + + # 2. Check context_events for overrides + events = find_active_events(parsed.path) + action = resolve_action_based_on_project_definitions(parsed.project, events, parsed.role) + if action == 'suppress': + return format_response_with_events_only(events) + + # 3. Query HCFS for blobs matching path & role/channel + blobs = fetch_context_blobs(parsed.path, parsed.role, parsed.channel) + if params.use_lightrag and len(blobs) >= threshold: + lrag_res = lightrag.query(restrict_ids=[b.lightrag_doc_id for b in blobs]) + chosen = lrag_res.chunks + else: + chosen = blobs + + # 4. TGN check for drift + drift = tgn.get_drift_score(parsed.path) + + # 5. Compose response, log decision + resp = compose_context_packet(chosen, events, drift) + hyperlog.append({ 'request':uri, 'decision':..., 'returned_ids': [...] }) + return resp +``` + +--- + +## 10 β€” Operational / UX considerations + +* **CLI tool**: `bzzz post ...`, `bzzz get ...`, `bzzz sub ...` β€” wraps canonical signing and pubsub convenience. +* **Human review queue**: if a POST creates a high-risk action (e.g., revoke access), route to dashboard for a human to approve. +* **Rate limits & throttles** on post events to avoid spam floods. +* **Retention policy** for replicated model blobs and context blobs (LRU or TTL per project). + +--- + +## 11 β€” How SLURP & WHOOSH interact with this protocol + +* **SLURP** subscribes to `bzzz` pubsub topics for the projects it curates. On announcements it: + + * pulls event payload + * evaluates via project `event_type_definitions` + * takes actions (suppress, annotate, notify, route to WHOOSH) + * logs decisions to hyperlog + +* **WHOOSH** (automation/orchestration) listens for high-severity `blocker` or `access_update` events: + + * runs Ansible playbooks (install bzzz client, run `ollama pull` fallback, or trigger model replication). + * on success/failure, posts back a status event (POST `message_type: announcement` with metadata). + +--- + +## 12 β€” Next concrete deliverables I can give you right away + +Pick one and I’ll produce it: + +* **A. Formal `bzzz://` URI BNF grammar + canonical JSON schema** for GET/POST/PUT/DELETE payloads (suitable for adding to a spec doc). +* **B. CLI design & sample implementation** (Python) with signing, pubsub announce, and hyperlog append. +* **C. Fast prototyping plan** using libp2p (py-libp2p or go-libp2p) + a tiny HTTP shim (so `bzzz` URIs are handled by a local daemon that maps to libp2p calls). +* **D. Event handling flow doc** showing how SLURP processes `POST` events end-to-end (with exact DB writes and hyperlog entries). + + +Nice β€” I’ll give you all four, structured and ready to drop into your docs or hand to an engineer. I’ll keep each section practical and implementation-ready: **A** (formal grammar + payload schemas), **B** (CLI spec + Python stub), **C** (fast prototype plan using libp2p + HTTP shim), **D** (end-to-end SLURP event handling flow with DB + hyperlog details). + +--- + +## A β€” `bzzz://` URI Grammar (BNF) + JSON Schemas + +### A.1 β€” BNF grammar (informal, precise) + +``` + ::= "bzzz" "://" ":" "@" ":" [ "?" ] [ "#" ] + + ::= ; [A-Za-z0-9_.-]+ + ::= ; [A-Za-z0-9_.-]+ + ::= ; [A-Za-z0-9_.-]+ + ::= ; [A-Za-z0-9_.-]+ + ::= "/" { "/" } + ::= [A-Za-z0-9_.-]+ ; allow dot, underscore, hyphen + ::= { "&" } + ::= "=" + ::= + +Example: +bzzz://alice:frontend@chorus:website-redesign/src/modules/db/connection?use_lightrag=true#v3 +``` + +**Notes:** + +* All IDs normalized to lowercase for routing. +* Percent-encoding allowed for unusual characters in path segments. +* `agent` may be `any` for anonymous or service-level posts, but authenticity must be validated via `signature` in payload. + +--- + +### A.2 β€” Canonical JSON Schemas + +All schemas in OpenAPI-style JSON (trimmed to essentials). Use `message_type` as event\_type enum. + +#### A.2.1 β€” `GET` Response (context packet) + +```json +{ + "type": "object", + "required": ["context", "metadata"], + "properties": { + "context": { "type": "string" }, + "metadata": { + "type": "object", + "properties": { + "source_blob_ids": { "type": "array", "items": { "type": "integer" } }, + "lightrag_doc_ids": { "type": "array", "items": { "type": "string" } }, + "events_considered": { "type": "array", "items": { "type": "integer" } }, + "drift_score": { "type": "number", "minimum": 0, "maximum": 1 }, + "timestamp": { "type": "string", "format": "date-time" } + } + } + } +} +``` + +#### A.2.2 β€” `POST` Event Body (create event / create blob) + +```json +{ + "type": "object", + "required": ["idempotency_key", "message_type", "title", "content", "path", "created_by"], + "properties": { + "idempotency_key": { "type": "string", "format": "uuid" }, + "message_type": { + "type": "string", + "enum": ["announcement","warning","blocker","approval","priority_change","access_update","structural_change"] + }, + "title": { "type": "string" }, + "content": { "type": "string" }, + "path": { "type": "string" }, + "scope": { + "type": "object", + "properties": { + "project": { "type": "string" }, + "task": { "type": "string" } + } + }, + "severity": { "type": "integer", "minimum": 1, "maximum": 10, "default": 5 }, + "visibility": { + "type": "object", + "properties": { + "roles": { "type": "array", "items": { "type": "string" } }, + "channels": { "type": "array", "items": { "type": "string" } }, + "exclude": { "type": "array", "items": { "type": "string" } } + } + }, + "start_time": { "type": "string", "format": "date-time" }, + "end_time": { "type": ["string","null"], "format": "date-time" }, + "created_by": { "type": "string" }, + "signature": { "type": "string" }, + "metadata": { "type": "object" } + } +} +``` + +#### A.2.3 β€” `PUT` Update (minor) + +```json +{ + "type":"object", + "required":["idempotency_key","resource_id","updates"], + "properties":{ + "idempotency_key":{"type":"string","format":"uuid"}, + "resource_id":{"type":"integer"}, + "updates":{"type":"object"}, + "created_by":{"type":"string"}, + "signature":{"type":"string"} + } +} +``` + +#### A.2.4 β€” `DELETE` Body + +Prefer soft-delete: + +```json +{ + "type":"object", + "required":["resource_id","created_by"], + "properties":{ + "resource_id":{"type":"integer"}, + "reason":{"type":"string"}, + "created_by":{"type":"string"}, + "signature":{"type":"string"} + } +} +``` + +--- + +## B β€” CLI Design & Python Sample Implementation (signing, pubsub announce, hyperlog append) + +### B.1 β€” CLI UX surface (commands) + +``` +bzzz post --file payload.json +bzzz get [--use-lightrag] +bzzz put --file update.json +bzzz delete --file delete.json +bzzz sub # subscribe to project:task announcements +bzzz announce --meta '{...}' # low-level announce +``` + +* All commands auto-sign with local key (unless `--no-sign`). +* Default transport: local daemon via HTTP shim on `http://localhost:19000/` (see C). +* CLI handles idempotency keys (generates UUID if not provided) and prints returned `event_id` + `hyperlog_ref`. + +--- + +### B.2 β€” Python CLI minimal stub (uses `requests` to local shim + Ed25519 signing) + +Save as `bzzz_cli.py`. This is a minimal, production-adjacent example β€” adapt to your codebase. + +```python +#!/usr/bin/env python3 +import argparse, json, requests, os, sys, uuid +from nacl.signing import SigningKey +from nacl.encoding import Base64Encoder +from urllib.parse import urlparse + +# Config +SHIM_URL = os.getenv("BZZZ_SHIM_URL", "http://localhost:19000") + +# Load or create key +KEY_PATH = os.path.expanduser("~/.bzzz/signing.key") +if not os.path.exists(KEY_PATH): + os.makedirs(os.path.dirname(KEY_PATH), exist_ok=True) + sk = SigningKey.generate() + with open(KEY_PATH, "wb") as f: + f.write(sk.encode()) +else: + sk = SigningKey(open(KEY_PATH,"rb").read()) + +pk_b64 = sk.verify_key.encode(encoder=Base64Encoder).decode() + +def sign_payload(payload_bytes): + sig = sk.sign(payload_bytes).signature + return Base64Encoder.encode(sig).decode() + +def post(uri, payload): + body = payload.copy() + if "idempotency_key" not in body: + body["idempotency_key"] = str(uuid.uuid4()) + body_bytes = json.dumps(body, separators=(",", ":"), sort_keys=True).encode() + signature = sign_payload(body_bytes) + headers = { + "Content-Type": "application/json", + "X-BZZZ-Signature": signature, + "X-BZZZ-PubKey": pk_b64 + } + resp = requests.post(f"{SHIM_URL}/post", params={"uri": uri}, data=body_bytes, headers=headers, timeout=10) + resp.raise_for_status() + return resp.json() + +def get(uri, use_lightrag=False): + resp = requests.get(f"{SHIM_URL}/get", params={"uri": uri, "use_lightrag": str(use_lightrag).lower()}, timeout=10) + resp.raise_for_status() + return resp.json() + +def main(): + p = argparse.ArgumentParser() + sub = p.add_subparsers(dest="cmd") + p_post = sub.add_parser("post"); p_post.add_argument("uri"); p_post.add_argument("--file") + p_get = sub.add_parser("get"); p_get.add_argument("uri"); p_get.add_argument("--use-lightrag", action="store_true") + args = p.parse_args() + + if args.cmd == "post": + if args.file: + payload = json.load(open(args.file)) + else: + payload = {} + res = post(args.uri, payload) + print(json.dumps(res, indent=2)) + elif args.cmd == "get": + res = get(args.uri, use_lightrag=args.use_lightrag) + print(json.dumps(res, indent=2)) + else: + p.print_help() + sys.exit(1) + +if __name__ == "__main__": + main() +``` + +**Notes:** + +* Uses `pynacl` (`libsodium` binding) for Ed25519 signing. +* `SHIM_URL` talks to local daemon (C) which maps to real P2P calls and does hyperlog append. + +--- + +### B.3 β€” Hyperlog append API (local shim expectations) + +Local shim must expose endpoints: + +``` +POST /post?uri= # same as bzzz post +GET /get?uri=&use_lightrag=true +POST /announce # lightweight announcement +``` + +On incoming POST, shim will: + +* verify signature using `X-BZZZ-PubKey` +* forward to cluster (local node or forward to appropriate peer) +* append event to hyperlog (local feed) and return `{"event_id":123, "hyperlog_ref":"feed:345"}` + +--- + +## C β€” Fast prototype plan: libp2p + HTTP shim + +Goal: ship a working prototype in days, not months. Use libp2p for P2P transport and a local HTTP daemon to map `bzzz://` calls to libp2p operations. Keep implementation minimal: + +### C.1 β€” Components + +* **Local daemon** (Go or Python) β€” exposes HTTP endpoints (`/post`, `/get`, `/announce`, `/sub`) and runs libp2p node. +* **libp2p peer** β€” provides pubsub, DHT, and direct stream capabilities. +* **Hyperlog** β€” use `hypercore` or a Mongo/SQLite append-only table (for prototype, use a local append-only SQLite table or simple file-based log). Hypercore JS is an option if you prefer Node. +* **Discovery** β€” mDNS for LAN + optional bootstrap peers for multi-network. + +### C.2 β€” Minimal tech choices (fastest path) + +* **Go**: `go-libp2p` + `go-http` β€” performant, production-friendly. +* **Python**: `py-libp2p` (less mature) or run a small `node` helper for libp2p. If you want speed, use Go for the shim. +* **DB**: SQLite for prototype (later Postgres). +* **LightRAG**: separate service (optional); initial prototype: skip LightRAG or stub it. + +### C.3 β€” Minimal runtime flows + +#### Startup + +* Node boots libp2p, joins DHT, subscribes to `bzzz.project..task.` topics configured in local file. +* Node exposes HTTP on `localhost:19000`. + +#### POST + +* HTTP shim receives POST -> validate signature -> write event into local hyperlog -> publish `event_announce` on topic -> attempt to store in HCFS DB or forward to known curator nodes. + +#### GET + +* Shim resolves request: check local HCFS DB -> if not present, query DHT for peers with `path` -> open libp2p stream to peer(s) and retrieve blob -> run SLURP logic (basic ACLs) -> return composed packet. + +#### SUBSCRIBE + +* Shim subscribes to pubsub topic and emits events on local websocket to CLI or UI. + +### C.4 β€” Development tasks (order) + +1. Implement HTTP shim skeleton with `/post` and `/get` routes. +2. Wire libp2p pubsub announce & subscribe. +3. Implement a simple hyperlog (append-only SQLite table) and hyperlog append/ read. +4. Implement signature verification (Ed25519). +5. Implement simple SLURP rules engine (role/channel/path ACL check + event override logic). +6. Hook in LightRAG later as a separate service (call via HTTP). +7. Add `bzzz` CLI that calls shim (B). + +### C.5 β€” Security / dev ops notes + +* For local network dev, mDNS + DHT is fine; for production, bootstrap peers + TLS + persistent peer lists. +* Run shim as systemd service and enable socket activation if desired. + +--- + +## D β€” Event Handling Flow: SLURP processing `POST` events (end-to-end) + +I’ll describe concrete DB writes, hyperlog entries, pubsub announce, curator actions, and WHOOSH triggers. Use this as the implementation recipe. + +### D.1 β€” Inputs + +* HTTP POST: `bzzz://alice:dev@chorus:website-redesign/src/modules/db/connection` with JSON `body` (see A.2.2), headers: + + * `X-BZZZ-Signature` + * `X-BZZZ-PubKey` + +### D.2 β€” Validation & initial checks + +1. **Signature verification**: verify `body` canonical JSON signature against `X-BZZZ-PubKey`. If fail β†’ 401. +2. **Idempotency**: check `idempotency_key` in `context_events` table. If exists β†’ return existing event record (HTTP 200). +3. **Authorization**: check posting agent (`alice`) is permitted to post events in `project` context (`chorus`) β€” consult `agents` table or `graph_nodes` ACL. If not permitted β†’ create `pending_event` with status `pending_approval`, notify human admin, return 202. + +### D.3 β€” Accepted path (authorized & new) + +4. **Persist `context_events`**: + +```sql +INSERT INTO context_events + (path, event_type, content, severity, start_time, end_time, created_by, project_id) +VALUES (?, ?, ?, ?, ?, ?, ?, ?) +RETURNING id; +``` + +Return `event_id`. + +5. **Create graph node + edges**: + +* `graph_nodes` entry: + +```sql +INSERT INTO graph_nodes (node_type, ref_id, label) VALUES ('event', event_id, title) RETURNING id as node_id; +``` + +* `graph_edges` entries: + +```sql +INSERT INTO graph_edges (src_node, dst_node, edge_type) VALUES (agent_node_id, node_id, 'created'); +INSERT INTO graph_edges (src_node, dst_node, edge_type) VALUES (node_id, path_node_id, 'affects_path'); +``` + +(Where `agent_node_id` and `path_node_id` existence is resolved by lookup or created if missing.) + +6. **Append immutable hyperlog** (append-only) + +* Hyperlog entry JSON shape: + +```json +{ + "type": "context_event", + "event_id": 123, + "path": "/src/modules/db/connection", + "message_type": "blocker", + "created_by": "alice", + "timestamp": "2025-08-06T10:12:34Z", + "idempotency_key": "..." +} +``` + +* Hyperlog append function returns `hyperlog_ref = feed_id:offset`. Record that in `context_events.hyperlog_ref`. + +7. **Publish mesh announcement** (pubsub) + +* Topic: `bzzz.project.chorus.task.website-redesign` +* Envelope: + +```json +{ "type": "event_announce", "event_id": 123, "path": "/src/modules/...", "message_type": "blocker", "severity": 9 } +``` + +### D.4 β€” SLURP Curator reaction + +SLURP instances subscribe to the project/topic and run the Curator pipeline: + +1. **Receive announce** β†’ fetch event full payload (if not included) via libp2p / HTTP shim `/get_event?id=123`. + +2. **Evaluate with `event_type_definitions`** for the project: + + * lookup `event_type_definitions` for `(project_id, message_type, role)` to find action (`suppress|annotate|notify|route`). + +3. **Decide**: + + * If `action == suppress` for certain roles: update `context_events` to set `applies_to_roles` and mark `affected_blobs`. + * If `action == notify`: enqueue notifications to relevant agents (via pubsub or webhooks). + * If `severity >= threshold` and `message_type == blocker`: + + * **Quarantine** blobs: set `quarantine = true` on `context_blobs` where `path LIKE '/src/modules/db/connection%'` and `sensitivity_level` threshold exceeded. + * Call WHOOSH to run remediation playbook if remediation mapping exists. + +4. **Log SLURP decision to hyperlog**: + +```json +{ + "type": "slurp_decision", + "event_id": 123, + "decision": "quarantine_blobs", + "affected_blob_ids": [42, 43], + "reason": "blocker severity 9 and project rule says suppress for role frontend", + "timestamp": "..." +} +``` + +5. **Emit post-actions** (if any): + + * `WHOOSH` run request: push an orchestration event into `automation_queue` table or pubsub topic `bzzz.automation.chorus.website-redesign`. + * WHOOSH acknowledges and later posts an `automation_result` event (POST) with status; SLURP appends that to hyperlog and optionally updates `context_events` with remediation timestamp. + +### D.5 β€” WHOOSH orchestration example + +If WHOOSH runs an Ansible playbook to rotate keys or replicate a model: + +* WHOOSH posts a follow-up event: + +```json +{ + "message_type": "announcement", + "title": "Remediation run: rotate_db_creds", + "content": "Ansible completed key rotation on hosts A,B,C. Affected tokens revoked.", + "path": "/src/modules/db/connection", + "created_by": "whoosh", + "metadata": {"ansible_run_id": "run-321", "status":"success"} +} +``` + +* SLURP ingests it and updates `context_events` / hyperlog. + +--- + +## Final notes & recommended next steps + +**Deliverables prepared here are intended to be actionable**: + +* **A** is ready to copy into a protocol RFC or OpenAPI doc. +* **B** is a runnable CLI stub you can expand: signing + HTTP shim integration work immediately. +* **C** gives you a fast prototype roadmap; use Go + go-libp2p if you want production-grade speed and stability. +* **D** is the operational recipe your engineering team needs to implement SLURP event handling with exact DB writes, graph updates, hyperlog entries, and WHOOSH orchestration hooks. + +--- + +### Want me to: + +* ✍️ Produce the **full OpenAPI YAML** for the shim endpoints (`/post`, `/get`, `/announce`) so you can autogen SDKs? +* πŸ”§ Expand the Python shim example to include a tiny in-memory hyperlog and libp2p mock (so you can run an end-to-end demo locally)? +* 🧩 Draft the Ansible playbook WHOOSH would call for a typical `blocker` remediation (e.g., rotate DB creds + update HCFS metadata)? + + + +The only thing I'm not convinced on; is the need for querystrings here. + + + +Nice β€” I’ll give you two concrete things first, then a robust design for replies + thread coherence that fits your P2P/hyperlog/TGN/SLURP stack. + +**Deliverables in this message** + +1. **Formal BNF grammar** for `bzzz://` (clean, canonical). +2. **libp2p message schema** for the JSON envelope (canonical fields, headers, signing). +3. **Threading & reply model** β€” how agents reply to messages, maintain conversation coherence, and how SLURP/WHOOSH use that for actions. Includes storage schema, ordering, conflict handling, replay rules, and security considerations. +4. **Examples** (sample messages, reply flow, pseudo-code for client and SLURP handling). + +--- + +## 1) Formal BNF for `bzzz://` (canonical) + +``` + ::= "bzzz" "://" ":" "@" ":" [ "#" ] + + ::= + ::= + ::= + ::= + + ::= "/" { "/" } + ::= { } + ::= ALPHA | DIGIT | "-" | "_" | "." + + ::= { } + ::= ALPHA | DIGIT | "-" | "_" | "." | ":" + + ::= { } + ::= ALPHA | DIGIT | "-" | "_" | "." + +ALPHA ::= "A" | "B" | ... | "Z" | "a" | "b" | ... | "z" +DIGIT ::= "0" | "1" | ... | "9" +``` + +**Notes** + +* URIs are case-insensitive for routing; normalized to lowercase for canonical forms. +* Percent-encode anything outside allowed characters in `segment` / `fragment`. +* `agent` can be `any` (for anonymous broadcast) but real agents should use stable Decentralized IDs (DIDs) or names mapped to public keys. + +--- + +## 2) libp2p Message Schema (JSON) β€” canonical envelope + +This is the payload structure transmitted over pubsub / direct streams. Keep it compact and signable. + +### 2.1 β€” Top-level envelope (compact JSON) + +```json +{ + "hdr": { + "op": "POST", // GET, POST, PUT, DELETE, ACK, RESP + "uri": "bzzz://alice:dev@chorus:task/path#v1", + "msg_id": "uuid-v4", + "parent_id": "uuid-v4|null", // points to immediate parent message for threads + "convo_id": "uuid-v4", // conversation identifier (root msg id if absent) + "ts": "2025-08-06T15:02:00Z", // ISO8601 UTC + "lamport": 12345, // optional lamport counter + "ttl": 3600, // seconds; 0 = no-expire (use with care) + "priority": "normal", // low|normal|urgent + "agent_pubkey": "base64(pubkey)" // sender's public key for verification + }, + "meta": { + "event_type": "blocker", // optional, maps to event enums + "visibility": { + "roles": ["frontend","devops"], + "channels": ["devchat"], + "exclude": [] + }, + "project": "chorus", + "task": "website-redesign" + }, + "payload": { // free-form content, may include structured fields + "title": "Hardcoded DB creds", + "body": "Detected DB creds in repo. Rotate immediately.", + "attachments": [ + { "type": "blob-ref", "hash": "sha256:..." } + ], + "hints": { "use_lightrag": true } // optional operational hints + }, + "sig": "base64(signature)" // signature over canonical_hdr + payload +} +``` + +### 2.2 β€” Canonical signing + +* **What to sign:** canonicalized JSON of `hdr` + `meta` + `payload` in deterministic key order (e.g., RFC8785 or JCS canonical form). +* **Signature scheme:** Ed25519 (or COSE/COSE2 if you want richer structures). Include `agent_pubkey` so receiver can verify. +* **Idempotency:** clients generate `msg_id` UUID v4 and re-send on retry; server checks for duplicates. + +### 2.3 β€” Minimal ACK / Response envelope + +* For fast ACKs/minimal replies: + +```json +{ + "hdr": { "op": "ACK", "uri": "...", "msg_id": "ack-uuid", "parent_id": "original-msg-id", "convo_id":"...", "ts":"..." }, + "meta": { "status": "received" }, + "payload": { "note": "Queued for processing", "hyperlog_ref": "feed:123" }, + "sig": "..." +} +``` + +--- + +## 3) Threading & Reply Model β€” design + +Goals: + +* Keep thread structure explicit, replayable, verifiable. +* Support branching conversations, parallel replies, follow-ups. +* Allow SLURP and TGN to reason about conversation coherence and detect divergence. + +### 3.1 β€” Core primitives + +**msg\_id** β€” unique message identifier (UUID v4). +**parent\_id** β€” immediate parent message (null for root). +**convo\_id** β€” conversation identifier; for a new thread, `convo_id = msg_id` of root; replies inherit convo\_id. +**lamport** β€” optional logical counter to help ordering across peers. Each agent increments before sending; merge rules use lamport + ts. +**ts** β€” wall-clock timestamp in UTC (for human readability and approximate ordering). +**signature** β€” verifies authenticity & integrity. +**visibility** β€” roles/channels control who should see the message. + +### 3.2 β€” Thread coherence rules + +1. **Canonical thread root**: When starting a thread, `convo_id = msg_id`. All replies set `convo_id` to root id. This lets you quickly fetch the entire conversation: `SELECT * FROM messages WHERE convo_id = ORDER BY lamport, ts`. + +2. **Parent linkage**: `parent_id` forms a tree. Use this for fast tree traversal and reconstructing reply chains, branching points, and ancestors. + +3. **Ordering**: + + * **Preferred ordering**: `(lamport, ts)` ascending. Lamport gives causal order; ts helps human-ordered chronology. + * **Tie-breaker**: `msg_id` lexicographic order for total order. + * **Delivery ordering**: pubsub doesn’t guarantee ordering; receivers must reorder using lamport & parent relationships. + +4. **Causal consistency**: + + * When agent A replies to message M, it must set `parent_id = M.msg_id`, and ensure its `lamport` > lamport of M (if known). If unknown, agent may read latest lamport for M or use local lamport +1. + * If an agent receives reply R before M (possible in P2P), they store R in a pending table keyed by `parent_id` and once M arrives, link them. + +5. **Thread reconciliation on forks**: + + * Branching is expected (parallel replies). UI and SLURP should show branches; TGN can detect divergence if branches move on different actions. + * For conflicting "decision" messages (e.g., two approvals with different payloads), SLURP can apply deterministic conflict-resolution rules (highest priority event\_type, then latest lamport, then human escalation). Keep conflicts explicitly logged. + +6. **Message lifecycle**: + + * Messages are appended immutably to hyperlog. "Deletion" is a new tombstone message (`op=DELETE` with parent pointing to resource) β€” append-only history preserved. + * Edit flows: editing a message results in a new `PUT` message with `parent_id` pointing to the original; store versions. + +### 3.3 β€” Conversation metadata & summarization + +SLURP maintains per-conversation metadata in DB: + +* `convo_id`, `root_msg_id`, `participants` (agent list), `last_activity_ts`, `state` (open/closed/escalated), `drift_score`, `summary_blob_id`. + +On receiving messages, SLURP: + +* Updates convo `last_activity_ts`. +* Recomputes a short summary every N messages or if event\_type == decision/blocker. +* Stores the summary (and TGN embedding) for quick retrieval. + +TGN uses message edges as temporal edges: + +* Node types: message nodes, agent nodes, path nodes. +* Edges: `reply_to`, `authored_by`, `affects_path`, with timestamps. +* TGN embeddings help cluster threads, detect drift, and propose thread summarization. + +### 3.4 β€” Subscriptions & delivery guarantees + +* Pubsub topics are per `project:task` swarm. Consumers subscribe to `bzzz.project..task.`. +* Message delivery: pubsub announce includes the `hdr` and optionally small payload. For large payloads, announce contains `blob_ref` (content-addressed hash) and peers fetch via direct stream. +* Clients maintain **receipt logs** (ACKs) β€” receiver returns `ACK` to sender (or via pubsub) so sender knows which peers got it. For broadcast messages requiring stronger guarantee (e.g., revocation), SLURP may require quorum ACKs (N out of M agents). + +### 3.5 β€” Storage schema (messages table) β€” Postgres/SQLite + +```sql +CREATE TABLE messages ( + msg_id TEXT PRIMARY KEY, + convo_id TEXT NOT NULL, + parent_id TEXT, + uri TEXT, -- the bzzz:// resource targeted + op TEXT, -- POST/GET/ACK/RESP/PUT/DELETE + hdr JSONB, -- raw header storage + meta JSONB, -- event_type, visibility, etc. + payload JSONB, + agent_pubkey TEXT, + lamport BIGINT, + ts TIMESTAMPTZ, + hyperlog_ref TEXT, -- pointer to immutable hyperlog entry + signature TEXT +); + +CREATE INDEX idx_messages_convo ON messages(convo_id); +CREATE INDEX idx_messages_parent ON messages(parent_id); +CREATE INDEX idx_messages_uri ON messages(uri); +CREATE INDEX idx_messages_ts ON messages(ts); +``` + +Hyperlog append stores the same JSON in append-only feed, and `hyperlog_ref` links to it. You can reconstruct message history from hyperlog or DB. + +--- + +## 4) Reply flow examples & pseudo-code + +### 4.1 β€” Simple reply (agent client behavior) + +**Action:** Developer receives a blocker message, replies with an ACK and follow-up. + +Pseudocode: + +```python +# local lamport counter +lamport = load_local_lamport() + +def send_reply(parent_msg, uri, payload, op="RESP"): + global lamport + lamport = max(lamport, parent_msg.get('lamport', 0)) + 1 + msg = { + "hdr": { + "op": op, + "uri": uri, + "msg_id": str(uuid4()), + "parent_id": parent_msg['msg_id'], + "convo_id": parent_msg['convo_id'], + "ts": utc_now(), + "lamport": lamport, + "agent_pubkey": my_pubkey_b64 + }, + "meta": { "event_type": "info" }, + "payload": payload + } + msg['sig'] = sign_msg(msg) + pubsub_publish(topic_for(parent_msg['meta']['project'], parent_msg['meta']['task']), msg) + append_hyperlog(msg) # local append for history & replay +``` + +**When receiving messages**, client must: + +* Verify signature. +* Store message in DB and hyperlog. +* If `parent_id` missing (child before parent), put into `pending_children[parent_id]`. +* If parent later arrives, attach and bubble up. + +### 4.2 β€” SLURP reaction to reply messages + +SLURP subscribes to project topic. + +On receipt: + +1. Verify signature & visibility. +2. Persist message (messages table) + hyperlog append. +3. If `op` == `POST` and `event_type` == `blocker`: + + * Evaluate project `event_type_definitions` -> action. + * If action == `quarantine`, mark affected blobs & create `slurp_decision` entry & hyperlog record. + * Trigger WHOOSH orchestration if remediation required. +4. If `op` == `RESP` or `ACK`: + + * Update `messages` table state for the referenced msg\_id (e.g., mark acknowledged). + * Optionally re-evaluate whether the blocker is resolved (if `RESP` payload includes `resolution=true`). +5. Update convo summary & TGN embedding. + +Pseudo: + +```python +def handle_msg(msg): + verify(msg) + insert_messages_table(msg) + hyperlog_append(msg) + if msg.hdr.op == "POST" and msg.meta.event_type == "blocker": + action = lookup_action(msg.meta.project, "blocker", role=... ) + if action == "quarantine": + affected = find_blobs_under_path(msg.payload.path) + quarantine_blobs(affected, reason="blocker") + log_slurp_decision(msg, affected) + whoosh_trigger_remediation(affected) + elif msg.hdr.op in ("RESP", "ACK"): + update_message_ack(msg.parent_id, msg) + maybe_resolve_blocker(msg.parent_id) + # update convo metadata + update_convo(msg.convo_id, last_activity=msg.hdr.ts) + tgn_ingest(msg) # add temporal edge for TGN +``` + +### 4.3 β€” Thread coherence & TGN usage + +* **TGN ingestion**: each message creates edges (agent -> message: authored\_by; message -> path: affects\_path; message -> message: reply\_to). Timestamps are the message ts. +* **TGN tasks**: + + * Detect threads drifting (e.g., replies reference different resources over time). + * Cluster replies that are semantically related via embeddings. + * Suggest merge candidates or escalate to human if branches diverge (e.g., some replies recommend conflicting actions). +* **SLURP uses TGN outputs** to: + + * Re-rank which threads to surface to humans (e.g., high-drift critical threads). + * Suggest thread summaries. + +--- + +## 5) Conflict resolution & decision authority + +Design choices: + +* **Deterministic policy**: for conflicting decisions (e.g., two approvals), choose the one with higher priority, or later lamport if priority equal. Log the override. +* **Human override**: if conflict cannot be safely auto-resolved, SLURP sets `state=escalated` and notifies owners. +* **Audit trail**: every override, conflict, and merge is appended to hyperlog with reason and signatures. + +--- + +## 6) Scaling, pruning, and GC + +* **Retention policy**: projects define how long hyperlog feeds are kept locally. Use content-addressed storage (CAR files or chunk stores) for older archives. +* **Pruning**: nodes may garbage-collect message bodies older than X days but must preserve minimal hyperlog index (msg\_id, hyperlog\_ref) to reconstruct from archive peers. +* **Archival nodes**: designate some nodes to keep full immutable history (for compliance), others keep working window. +* **Snapshotting**: periodically SLURP writes convo snapshots (summary + representative blobs) to reduce replay cost. + +--- + +## 7) Privacy & security considerations + +* **Visibility metadata** controls who should receive message payloads. Pubsub announces may include only metadata and blob hashes; payloads are fetched only by authorized peers. +* **End-to-end encryption** of payloads: use per-project symmetric keys encrypted for authorized roles, or use public-key envelopes (e.g., encrypt to set of role public keys). Keep metadata in plaintext (or signed) for routing. +* **Signature & key rotation**: support key revocation events; SLURP verifies signature chain and consults agent registry. +* **Redaction**: for high-sensitivity payloads, only send redacted previews on pubsub announcements; full content via direct encrypted stream after ACL check. + +--- + +## 8) Replay & determinism + +* Because everything is appended with signatures, you can **replay** a convo deterministically by replaying hyperlog entries in order. When replaying: + + * Recompute lamport counters in order. + * Restore convo states, Slurp decisions, TGN ingestions. +* For experiments, you can replay with alternative SLURP policies and compare outcomes (A/B). + +--- + +## 9) Example end-to-end scenario + +**1. Alice posts blocker** + +* Alice sends POST envelope; publishes to pubsub. SLURP persists, appends hyperlog, quarantines blobs, triggers WHOOSH. + +**2. Bob (frontend) receives announcement** + +* Bob’s local shim fetches announce, gets header. Since Bob has no access to DB credentials, SLURP's visibility suppressed full content; Bob receives annotated blocker with action instructions. + +**3. DevOps replies with remediation update** + +* DevOps posts RESP with `parent_id = alice.msg_id` and payload `{ "resolution": true, "details": "Keys rotated" }`. SLURP marks blocker resolved, appends hyperlog, notifies participants. + +**4. SLURP updates convo summary & TGN** + +* SLURP runs an incremental summarizer, stores summary as blob, and TGN updates embeddings to note a resolved blocker and the participants. + +--- + +## Final notes & next steps + +I’ve given you: + +* Formal grammar +* Compact libp2p JSON envelope and signing rules +* A full threading & reply model with DB schema +* Pseudocode and operational rules for SLURP & WHOOSH processing +* Security, replay, GC, and TGN integration points + +If you want I can: + +* **Generate a concrete OpenAPI spec** for the HTTP shim endpoints that accept these envelopes (for BZZZ CLI to talk to shim). +* **Write a minimal Go or Python shim prototype** that runs libp2p pubsub + HTTP endpoints and implements the message handling (including verifying signatures and appending to a SQLite hyperlog). +* **Produce a client library** (Python) that handles lamport counters, signing, parent tracking, and pending-child attachment logic. + + +Great β€” I’ll give you two deliverables, in order: + +1. **OpenAPI 3.0 spec** for the local HTTP shim (endpoints `/post`, `/get`, `/announce`, `/sub`) so clients (CLI, SDKs) can autogen clients and understand the envelope. +2. **Go prototype** (runnable) for the shim that: + + * runs a libp2p node (pubsub) + * exposes the HTTP endpoints + * verifies Ed25519 signatures + * appends messages to a simple SQLite-based hyperlog + * publishes/receives pubsub announces + * is intentionally minimal so you can extend it quickly (LightRAG/TGN/SLURP hooks are left as TODOs) + +I’ll keep the Go code compact but complete enough to run locally on a LAN. After the code I’ll include **how to build & run**, and suggestions for testing. + +--- + +## Part 1 β€” OpenAPI Spec (bzzz-shim-openapi.yaml) + +Save this YAML as `bzzz-shim-openapi.yaml`. It describes the main shim endpoints and the canonical message envelope. + +```yaml +openapi: 3.0.3 +info: + title: BZZZ HTTP Shim API + version: 1.0.0 + description: > + Local HTTP shim for bzzz:// protocol. Maps HTTP requests to libp2p pubsub + and local hyperlog persistence. Designed for prototype/demo use. + +servers: + - url: http://localhost:19000 + description: Local shim instance + +paths: + /post: + post: + summary: Post a BZZZ envelope (create event / context post) + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/Envelope' + responses: + '201': + description: Event accepted and appended to hyperlog + content: + application/json: + schema: + $ref: '#/components/schemas/PostResponse' + '400': + description: Bad request (validation / signature failure) + /get: + post: + summary: Request retrieval / composed context for a URI + description: | + Use POST body so the retrieval can contain hints (use_lightrag, depth, etc) + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/GetRequest' + responses: + '200': + description: Retrieved context packet + content: + application/json: + schema: + $ref: '#/components/schemas/GetResponse' + '404': + description: Resource not found + /announce: + post: + summary: Publish a lightweight announcement on pubsub + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/AnnounceRequest' + responses: + '200': + description: Announcement published + content: + application/json: + schema: + $ref: '#/components/schemas/AnnounceResponse' + /sub: + post: + summary: Subscribe this shim to a project:task topic (server will deliver events via websocket or local handler) + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/SubRequest' + responses: + '200': + description: Subscription successful + content: + application/json: + schema: + type: object + properties: + status: + type: string + +components: + schemas: + Envelope: + type: object + required: ["hdr","meta","payload","sig"] + properties: + hdr: + type: object + required: ["op","uri","msg_id","convo_id","ts","agent_pubkey"] + properties: + op: + type: string + description: GET|POST|PUT|DELETE|ACK|RESP + uri: + type: string + description: canonical bzzz:// URI + msg_id: + type: string + description: UUID v4 + parent_id: + type: string + nullable: true + convo_id: + type: string + ts: + type: string + format: date-time + lamport: + type: integer + ttl: + type: integer + priority: + type: string + agent_pubkey: + type: string + description: base64-encoded public key + meta: + type: object + properties: + event_type: + type: string + description: e.g. blocker|warning|info|decision + visibility: + type: object + properties: + roles: + type: array + items: + type: string + channels: + type: array + items: + type: string + payload: + type: object + description: free-form content (title/body/attachments/hints) + sig: + type: string + description: base64 signature over canonicalized hdr+meta+payload + PostResponse: + type: object + properties: + status: + type: string + event_id: + type: string + hyperlog_ref: + type: string + GetRequest: + type: object + required: ["uri"] + properties: + uri: + type: string + hints: + type: object + description: e.g. { "use_lightrag": true, "depth": 2 } + GetResponse: + type: object + properties: + context: + type: string + metadata: + type: object + properties: + source_blob_ids: + type: array + items: { type: integer } + lightrag_doc_ids: + type: array + items: { type: string } + events_considered: + type: array + items: { type: integer } + drift_score: + type: number + AnnounceRequest: + type: object + required: ["topic","summary"] + properties: + topic: + type: string + summary: + type: object + envelope: + $ref: '#/components/schemas/Envelope' + AnnounceResponse: + type: object + properties: + status: + type: string + published_to: + type: string + SubRequest: + type: object + required: ["topic"] + properties: + topic: + type: string +``` + +--- + +## Part 2 β€” Go prototype shim + +Below is a compact but runnable Go prototype. It uses: + +* **go-libp2p** for peer/pubsub (basic usage) +* **mattn/go-sqlite3** for SQLite hyperlog +* **golang.org/x/crypto/ed25519** for signature verification +* **gorilla/mux** for HTTP routing (simple) +* Note: go-libp2p and pubsub code is slightly simplified for clarity β€” this is a working starting point but you should adapt and harden for production. + +Create a directory, e.g. `bzzz-shim`, and put these files in it. + +### go.mod + +```go +module github.com/yourorg/bzzz-shim + +go 1.20 + +require ( + github.com/gorilla/mux v1.8.0 + github.com/libp2p/go-libp2p v0.25.1 + github.com/libp2p/go-libp2p-core v0.8.0 + github.com/libp2p/go-libp2p-pubsub v0.15.0 + github.com/mattn/go-sqlite3 v1.14.20 + golang.org/x/crypto v0.12.0 +) +``` + +> If dependency versions are strict for you, lock/adjust them. `go get` will fetch what it needs. + +--- + +### main.go + +```go +package main + +import ( + "context" + "crypto/ed25519" + "database/sql" + "encoding/base64" + "encoding/json" + "fmt" + "io" + "log" + "net/http" + "os" + "time" + + "github.com/gorilla/mux" + libp2p "github.com/libp2p/go-libp2p" + pubsub "github.com/libp2p/go-libp2p-pubsub" + "github.com/mattn/go-sqlite3" + _ "github.com/mattn/go-sqlite3" +) + +const ( + DefaultPort = 19000 + PubsubTopicPrefix = "bzzz.project." +) + +type Envelope struct { + Hdr map[string]interface{} `json:"hdr"` + Meta map[string]interface{} `json:"meta"` + Payload map[string]interface{} `json:"payload"` + Sig string `json:"sig"` +} + +type PostResponse struct { + Status string `json:"status"` + EventID string `json:"event_id"` + HyperlogRef string `json:"hyperlog_ref"` +} + +var ( + db *sql.DB + ps *pubsub.PubSub + hostCtx context.Context + topicCache = map[string]*pubsub.Topic{} +) + +// initDB opens a sqlite DB and creates messages table if not exists +func initDB(path string) error { + var err error + db, err = sql.Open("sqlite3", path) + if err != nil { + return err + } + create := ` + CREATE TABLE IF NOT EXISTS messages ( + msg_id TEXT PRIMARY KEY, + convo_id TEXT, + parent_id TEXT, + uri TEXT, + op TEXT, + hdr TEXT, + meta TEXT, + payload TEXT, + agent_pubkey TEXT, + lamport INTEGER, + ts TEXT, + hyperlog_ref TEXT, + signature TEXT + ); + ` + _, err = db.Exec(create) + return err +} + +func appendHyperlog(msg *Envelope) (string, error) { + // hyperlog_ref: simple feed:rowid + hdrB, _ := json.Marshal(msg.Hdr) + metaB, _ := json.Marshal(msg.Meta) + payloadB, _ := json.Marshal(msg.Payload) + msgID, _ := msg.Hdr["msg_id"].(string) + convo, _ := msg.Hdr["convo_id"].(string) + parent, _ := msg.Hdr["parent_id"].(string) + uri, _ := msg.Hdr["uri"].(string) + op, _ := msg.Hdr["op"].(string) + lam, _ := msg.Hdr["lamport"].(float64) + ts, _ := msg.Hdr["ts"].(string) + pubkey, _ := msg.Hdr["agent_pubkey"].(string) + + stmt, err := db.Prepare(`INSERT INTO messages ( + msg_id, convo_id, parent_id, uri, op, hdr, meta, payload, agent_pubkey, lamport, ts, hyperlog_ref, signature + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);`) + if err != nil { + return "", err + } + res, err := stmt.Exec(msgID, convo, parent, uri, op, string(hdrB), string(metaB), string(payloadB), pubkey, int64(lam), ts, "", msg.Sig) + if err != nil { + return "", err + } + rowid, err := res.LastInsertId() + if err != nil { + return "", err + } + hyperRef := fmt.Sprintf("feed:%d", rowid) + _, err = db.Exec("UPDATE messages SET hyperlog_ref = ? WHERE rowid = ?", hyperRef, rowid) + if err != nil { + return "", err + } + return hyperRef, nil +} + +func verifyEnvelope(msg *Envelope) error { + // check msg.Hdr.agent_pubkey and msg.Sig + pubB64, ok := msg.Hdr["agent_pubkey"].(string) + if !ok { + return fmt.Errorf("missing agent_pubkey") + } + sigB64 := msg.Sig + pub, err := base64.StdEncoding.DecodeString(pubB64) + if err != nil { + return fmt.Errorf("bad pubkey base64: %v", err) + } + sig, err := base64.StdEncoding.DecodeString(sigB64) + if err != nil { + return fmt.Errorf("bad sig base64: %v", err) + } + // canonicalize: simple approach: marshal hdr+meta+payload in fixed order + // For prototype, we use json.Marshal of map order (non-deterministic) β€” in prod use JCS or RFC8785 canonical JSON. + toSign := map[string]interface{}{ + "hdr": msg.Hdr, + "meta": msg.Meta, + "payload": msg.Payload, + } + bs, err := json.Marshal(toSign) + if err != nil { + return err + } + if len(pub) != ed25519.PublicKeySize { + return fmt.Errorf("invalid pubkey length") + } + if !ed25519.Verify(ed25519.PublicKey(pub), bs, sig) { + return fmt.Errorf("signature verification failed") + } + return nil +} + +// publishAnnouncement publishes a small announcement envelope on project topic +func publishAnnouncement(topicName string, envelope *Envelope) error { + t := topicCache[topicName] + if t == nil { + var err error + t, err = ps.Join(topicName) + if err != nil { + return err + } + topicCache[topicName] = t + } + // short message β€” just send minimal hdr+meta + env := map[string]interface{}{ + "hdr": envelope.Hdr, + "meta": envelope.Meta, + } + b, _ := json.Marshal(env) + return t.Publish(hostCtx, b) +} + +func handlePost(w http.ResponseWriter, r *http.Request) { + var env Envelope + body, err := io.ReadAll(r.Body) + if err != nil { + http.Error(w, "bad body", 400) + return + } + if err := json.Unmarshal(body, &env); err != nil { + http.Error(w, "invalid json", 400) + return + } + // basic verification + if err := verifyEnvelope(&env); err != nil { + http.Error(w, fmt.Sprintf("signature failed: %v", err), 400) + return + } + // append hyperlog + hyperRef, err := appendHyperlog(&env) + if err != nil { + log.Printf("appendHyperlog error: %v", err) + http.Error(w, "internal", 500) + return + } + // publish announcement if meta.project present + proj, _ := env.Meta["project"].(string) + task, _ := env.Meta["task"].(string) + if proj != "" && task != "" { + topicName := fmt.Sprintf("%s%s.task.%s", PubsubTopicPrefix, proj, task) + _ = publishAnnouncement(topicName, &env) + } + resp := PostResponse{ + Status: "accepted", + EventID: env.Hdr["msg_id"].(string), + HyperlogRef: hyperRef, + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(201) + _ = json.NewEncoder(w).Encode(resp) +} + +type GetRequest struct { + URI string `json:"uri"` + Hints map[string]interface{} `json:"hints"` +} + +type GetResponse struct { + Context string `json:"context"` + Metadata map[string]interface{} `json:"metadata"` +} + +func handleGet(w http.ResponseWriter, r *http.Request) { + var req GetRequest + b, _ := io.ReadAll(r.Body) + if err := json.Unmarshal(b, &req); err != nil { + http.Error(w, "invalid json", 400) + return + } + // Simple prototype: look up messages by uri and compose payload bodies + rows, err := db.Query("SELECT payload FROM messages WHERE uri = ? ORDER BY ts DESC LIMIT 20", req.URI) + if err != nil { + http.Error(w, "db error", 500) + return + } + defer rows.Close() + var parts []string + for rows.Next() { + var payloadStr string + if err := rows.Scan(&payloadStr); err == nil { + parts = append(parts, payloadStr) + } + } + resp := GetResponse{ + Context: fmt.Sprintf("Composed %d items", len(parts)), + Metadata: map[string]interface{}{"count": len(parts)}, + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(resp) +} + +type AnnounceRequest struct { + Topic string `json:"topic"` + Summary map[string]interface{} `json:"summary"` +} + +type AnnounceResponse struct { + Status string `json:"status"` + PublishedTo string `json:"published_to"` +} + +func handleAnnounce(w http.ResponseWriter, r *http.Request) { + var ar AnnounceRequest + b, _ := io.ReadAll(r.Body) + if err := json.Unmarshal(b, &ar); err != nil { + http.Error(w, "invalid announce", 400) + return + } + // publish summary on topic + t, err := ps.Join(ar.Topic) + if err != nil { + http.Error(w, "pubsub join failed", 500) + return + } + msg, _ := json.Marshal(ar.Summary) + if err := t.Publish(hostCtx, msg); err != nil { + http.Error(w, "publish failed", 500) + return + } + _ = ar + resp := AnnounceResponse{Status: "ok", PublishedTo: ar.Topic} + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(resp) +} + +func startHTTP(port int) { + r := mux.NewRouter() + r.HandleFunc("/post", handlePost).Methods("POST") + r.HandleFunc("/get", handleGet).Methods("POST") + r.HandleFunc("/announce", handleAnnounce).Methods("POST") + addr := fmt.Sprintf(":%d", port) + log.Printf("HTTP shim listening on %s", addr) + http.ListenAndServe(addr, r) +} + +func main() { + // init db + dbPath := os.Getenv("BZZZ_DB_PATH") + if dbPath == "" { + dbPath = "./bzzz_hyperlog.db" + } + if err := initDB(dbPath); err != nil { + log.Fatalf("db init failed: %v", err) + } + + // create libp2p host + pubsub + ctx := context.Background() + hostCtx = ctx + h, err := libp2p.New() + if err != nil { + log.Fatalf("libp2p host failed: %v", err) + } + ps, err = pubsub.NewGossipSub(ctx, h) + if err != nil { + log.Fatalf("pubsub init failed: %v", err) + } + // For prototype, join a default "global" topic to receive messages if needed + go func() { + t, _ := ps.Join("bzzz.global") + sub, _ := t.Subscribe() + for { + m, err := sub.Next(ctx) + if err != nil { + log.Printf("pubsub read err: %v", err) + return + } + log.Printf("Received pubsub msg: %s", string(m.Data)) + } + }() + + // start HTTP shim + go startHTTP(DefaultPort) + + // block forever + select {} +} +``` + +--- + +## How to build & run the prototype + +1. **Install Go** (1.20+). +2. **Create project folder** and put `go.mod` and `main.go` there. +3. **Get modules**: + +```bash +go mod tidy +``` + +4. **Build**: + +```bash +go build -o bzzz-shim +``` + +5. **Run**: + +```bash +./bzzz-shim +``` + +This starts an HTTP shim on `:19000` and a libp2p node with basic pubsub. + +--- + +## Quick test with `curl` + +**POST a minimal envelope** (you'll need a real Ed25519 signature for the proto's verify step β€” for quick test, either bypass verify by commenting out `verifyEnvelope` call in code, or sign payloads properly): + +```bash +curl -X POST http://localhost:19000/post -H "Content-Type: application/json" \ + -d '{ + "hdr": {"op":"POST","uri":"bzzz://any:dev@chorus:task/path","msg_id":"1111-2222","convo_id":"1111-2222","ts":"2025-08-06T15:00:00Z","agent_pubkey":""}, + "meta": {"event_type":"blocker","project":"chorus","task":"task"}, + "payload": {"title":"test","body":"please rotate keys"}, + "sig":"" + }' +``` + +If signature verification is disabled for a dev run, the shim will accept and append the hyperlog and publish to the pubsub topic (`bzzz.project.chorus.task.task`). + +--- + +## Notes, caveats, and next steps + +* **Canonical signing**: The prototype uses `json.Marshal` for the canonical payload β€” this is *not* deterministic. In production, use canonical JSON (RFC8785 / JCS) or COSE/CBOR for signing. Libraries exist for canonical JSON signing in Go. +* **Libp2p tuning**: `go-libp2p` requires configuration for NAT traversal, bootstrap peers, and peerstore persistence; the code uses a default host for local testing. +* **Pubsub topics**: The code currently publishes minimal announcements. For big payloads you should publish `blob_ref` and stream payloads over direct streams. +* **Hyperlog**: For a real immutable hyperlog use Hypercore (JS) or similar append-only stores. For this prototype, SQLite provides a simple append table with `hyperlog_ref` pointer. +* **Security**: Use proper key management and rotate keys. Consider E2E encryption for private payloads. +* **Extensibility**: Hook SLURP logic into `handlePost` (after appendHyperlog) to trigger curation decisions and WHOOSH orchestration. + +--- + +### Want me to follow up with: + +* A **Go implementation that actually performs Ed25519 signing for test clients** (generate keys + sign payloads) and a small test script to POST a valid envelope? +* **Dockerfile + docker-compose** to run multiple shim instances in a local network and see pubsub announces propagate? +* **Add hyperlog replay & a small SLURP plugin** to demo quarantine-on-blocker behavior? + + + +Awesome β€” love your energy. I’ll give you three practical artifacts and clear run steps so you can try this end-to-end quickly: + +1. **Go test client** that generates an Ed25519 keypair, canonicalizes an envelope, signs it, and POSTs to the shim. +2. **Docker Compose** (bootstrap + 2 worker shim instances) with a small helper to extract the bootstrap multiaddr and start the cluster. +3. **Hyperlog replay + tiny SLURP plugin** added to the shim prototype: a `/replay` HTTP endpoint that replays hyperlog messages in order and demonstrates *quarantine-on-blocker* behavior (marks matching `context_blobs` as quarantined and logs SLURP decisions). + +I’ll include code + instructions for building and running. I keep things intentionally simple and prototype-ready β€” production hardening (key management, canonical JSON libraries, libp2p bootstrap resiliency, E2E encryption, etc.) is called out as next steps. + +--- + +## A β€” Go test client (sign + POST) + +**File:** `client/sign_and_post.go` + +This client: + +* Generates an Ed25519 keypair and writes the private key locally (so you can reuse it). +* Builds a deterministic (stable) canonical payload for signing by producing a JSON object where keys are marshaled in stable order. +* Signs the canonical bytes and posts envelope to `http://localhost:19000/post`. + +> Note: For canonical JSON we use a simple deterministic helper that sorts map keys recursively. This is fine for prototype. For production use RFC8785 / JCS canonicalization. + +```go +package main + +import ( + "crypto/ed25519" + "crypto/rand" + "encoding/base64" + "encoding/json" + "flag" + "fmt" + "io" + "io/ioutil" + "net/http" + "os" + "sort" + "time" +) + +// stableMarshal recursively sorts map keys and returns deterministic JSON +func stableMarshal(v interface{}) ([]byte, error) { + switch vv := v.(type) { + case map[string]interface{}: + // sort keys + keys := make([]string, 0, len(vv)) + for k := range vv { + keys = append(keys, k) + } + sort.Strings(keys) + out := make(map[string]interface{}, len(vv)) + for _, k := range keys { + b, _ := stableMarshal(vv[k]) + var tmp interface{} + json.Unmarshal(b, &tmp) + out[k] = tmp + } + return json.Marshal(out) + case []interface{}: + arr := make([]interface{}, len(vv)) + for i, e := range vv { + b, _ := stableMarshal(e) + var tmp interface{} + json.Unmarshal(b, &tmp) + arr[i] = tmp + } + return json.Marshal(arr) + default: + return json.Marshal(v) + } +} + +func main() { + var outKey = flag.String("key", "./client.key", "path to private key (will be created if not exist)") + var shim = flag.String("shim", "http://localhost:19000", "shim base URL") + flag.Parse() + + var priv ed25519.PrivateKey + var pub ed25519.PublicKey + + if _, err := os.Stat(*outKey); os.IsNotExist(err) { + // generate + pubk, privk, _ := ed25519.GenerateKey(rand.Reader) + priv = privk + pub = pubk + // write key file + if err := ioutil.WriteFile(*outKey, priv, 0600); err != nil { + panic(err) + } + fmt.Println("Generated key at", *outKey) + } else { + // load + data, err := ioutil.ReadFile(*outKey) + if err != nil { + panic(err) + } + priv = ed25519.PrivateKey(data) + pub = priv.Public().(ed25519.PublicKey) + } + pubB64 := base64.StdEncoding.EncodeToString(pub) + + msgID := fmt.Sprintf("msg-%d", time.Now().UnixNano()) + convoID := msgID + ts := time.Now().UTC().Format(time.RFC3339) + + hdr := map[string]interface{}{ + "op": "POST", + "uri": "bzzz://alice:dev@chorus:website-redesign/src/modules/db/connection", + "msg_id": msgID, + "parent_id": nil, + "convo_id": convoID, + "ts": ts, + "lamport": 1, + "ttl": 3600, + "priority": "urgent", + "agent_pubkey": pubB64, + } + meta := map[string]interface{}{ + "event_type": "blocker", + "project": "chorus", + "task": "website-redesign", + "visibility": map[string]interface{}{ + "roles": []interface{}{"devops", "backend"}, + "channels": []interface{}{"dev"}, + "exclude": []interface{}{}, + }, + } + payload := map[string]interface{}{ + "title": "Hardcoded DB creds found", + "body": "Automated scan discovered plaintext creds in src/modules/db/connection. Rotate keys.", + "attachments": []interface{}{}, + "hints": map[string]interface{}{"use_lightrag": false}, + } + + // Build canonical bytes + toSign := map[string]interface{}{ + "hdr": hdr, + "meta": meta, + "payload": payload, + } + can, err := stableMarshal(toSign) + if err != nil { + panic(err) + } + sig := ed25519.Sign(priv, can) + sigB64 := base64.StdEncoding.EncodeToString(sig) + + envelope := map[string]interface{}{ + "hdr": hdr, + "meta": meta, + "payload": payload, + "sig": sigB64, + } + envBytes, _ := json.MarshalIndent(envelope, "", " ") + fmt.Println("Envelope:", string(envBytes)) + + resp, err := http.Post(*shim+"/post", "application/json", io.NopCloser((os.Stdin))) + // Note: the above Post will fail because we didn't pass body from stdin. So use Do with body: + client := &http.Client{ Timeout: 10 * time.Second } + req, _ := http.NewRequest("POST", *shim+"/post", io.NopCloser((bytesReader(envBytes)))) + req.Header.Set("Content-Type", "application/json") + r2, err := client.Do(req) + if err != nil { + fmt.Println("POST error:", err) + os.Exit(1) + } + defer r2.Body.Close() + respBody, _ := ioutil.ReadAll(r2.Body) + fmt.Println("Response:", string(respBody)) +} + +// helper to create io.Reader from bytes without importing bytes package elsewhere +func bytesReader(b []byte) io.Reader { + return &reader{b:b, i:0} +} +type reader struct { b []byte; i int } +func (r *reader) Read(p []byte) (int, error) { + if r.i >= len(r.b) { return 0, io.EOF } + n := copy(p, r.b[r.i:]) + r.i += n + return n, nil +} +``` + +**Notes:** + +* Save as `client/sign_and_post.go`. `go run` it. +* This client writes a private key to `./client.key`. Re-run will re-use it. +* **Important**: The shim in the earlier prototype validated signatures by marshaling hdr+meta+payload with `json.Marshal` β€” this client uses a deterministic `stableMarshal` to match that (so signature verifies). The shim code should be updated to use the same stable canonicalization (I’ll show how below). + +--- + +## B β€” Docker Compose (bootstrap + 2 workers) + +Because libp2p requires knowing a bootstrap peer to join a swarm in Docker, this `docker-compose` runs **one bootstrap shim** with a fixed port and writes its multiaddr to a file. Then two worker shims are started with an environment var `BOOTSTRAP_PEER` (provided by a small startup helper script) to connect to bootstrap. There’s a tiny orchestration script to capture bootstrap peer info and export it when starting the workers. + +**Files:** + +### `docker-compose.yml` + +```yaml +version: "3.8" +services: + bootstrap: + build: . + container_name: bzzz-shim-bootstrap + environment: + - ROLE=bootstrap + - LISTEN_PORT=19001 + - DB_PATH=/data/bootstrap.db + volumes: + - ./data:/data + ports: + - "19001:19001" + worker1: + build: . + container_name: bzzz-shim-worker1 + environment: + - ROLE=worker + - LISTEN_PORT=19002 + - DB_PATH=/data/worker1.db + - BOOTSTRAP_PEER=${BOOTSTRAP_PEER} + depends_on: + - bootstrap + worker2: + build: . + container_name: bzzz-shim-worker2 + environment: + - ROLE=worker + - LISTEN_PORT=19003 + - DB_PATH=/data/worker2.db + - BOOTSTRAP_PEER=${BOOTSTRAP_PEER} + depends_on: + - bootstrap +``` + +### `bootstrap-peer-helper.sh` β€” workflow to start and capture multiaddr + +```bash +#!/usr/bin/env bash +set -e +echo "Bringing up bootstrap..." +docker-compose up -d bootstrap +sleep 3 + +# grab multiaddr from bootstrap container logs (we'll print it in shim logs) +echo "Waiting for bootstrap to emit multiaddr..." +BOOTSTRAP_ADDR="" +for i in {1..15}; do + BOOTSTRAP_ADDR=$(docker logs bzzz-shim-bootstrap 2>&1 | grep "Libp2p host listening at" -m1 | awk -F' ' '{print $NF}') + if [ -n "$BOOTSTRAP_ADDR" ]; then break; fi + sleep 1 +done + +if [ -z "$BOOTSTRAP_ADDR" ]; then + echo "Could not find bootstrap multiaddr in logs. Check bootstrap logs." + docker logs bzzz-shim-bootstrap + exit 1 +fi + +echo "Bootstrap multiaddr: $BOOTSTRAP_ADDR" +export BOOTSTRAP_PEER="$BOOTSTRAP_ADDR" +echo "Starting workers with BOOTSTRAP_PEER=$BOOTSTRAP_PEER" +BOOTSTRAP_PEER="$BOOTSTRAP_PEER" docker-compose up -d worker1 worker2 +echo "All services started." +``` + +**Notes & limitations** + +* The shim must print its listen multiaddr line like: `Libp2p host listening at /ip4/172.18.0.2/tcp/4001/p2p/` to logs; the helper scrapes logs for that string. +* Depending on Docker network timings, you might need to adjust sleeps. +* This setup is for dev/demo; production should use persistent peerstore and well-known bootstrap nodes. + +--- + +## C β€” Hyperlog replay & SLURP plugin (quarantine-on-blocker) + +Additions to the shim prototype (`main.go`) to enable: + +1. `context_blobs` table (simple prototype) β€” stores blobs and a `quarantine` boolean. +2. `/replay` endpoint β€” iterates all messages in `messages` table ordered by `rowid` (chronological append), and for each `POST` with `event_type=blocker` it will mark `context_blobs` under the event `path` as quarantined and append a `slurp_decision` message to the hyperlog. + +### SQL additions (run once or included in initDB) + +```sql +CREATE TABLE IF NOT EXISTS context_blobs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + path TEXT, + role TEXT, + channel TEXT, + content TEXT, + priority INTEGER DEFAULT 5, + created_at TEXT, + quarantine INTEGER DEFAULT 0 +); + +CREATE TABLE IF NOT EXISTS slurp_decisions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + event_msg_id TEXT, + decision TEXT, + affected_blobs TEXT, -- JSON array + reason TEXT, + ts TEXT +); +``` + +Add these in the existing `initDB` function or run a migration script. + +### `/replay` endpoint handler (pseudo-implementation for main.go) + +Add function: + +```go +func handleReplay(w http.ResponseWriter, r *http.Request) { + // iterate messages in order of rowid + rows, err := db.Query("SELECT rowid, msg_id, hdr, meta, payload FROM messages ORDER BY rowid ASC") + if err != nil { + http.Error(w, "db error", 500); return + } + defer rows.Close() + var decisions []map[string]interface{} + for rows.Next() { + var rowid int64 + var msgID, hdrStr, metaStr, payloadStr string + if err := rows.Scan(&rowid, &msgID, &hdrStr, &metaStr, &payloadStr); err != nil { continue } + // parse meta to check event_type and path + var meta, payload map[string]interface{} + json.Unmarshal([]byte(metaStr), &meta) + json.Unmarshal([]byte(payloadStr), &payload) + et, _ := meta["event_type"].(string) + if et == "blocker" { + // extract path from payload or meta + path := "" + if p, ok := payload["path"].(string); ok { path = p } + if path == "" { + if p2, ok := meta["path"].(string); ok { path = p2 } + } + if path == "" { continue } + // quarantine any context_blobs where path LIKE path || "%" + res, err := db.Exec("UPDATE context_blobs SET quarantine = 1 WHERE path LIKE ? || '%'", path) + if err == nil { + cnt, _ := res.RowsAffected() + reason := fmt.Sprintf("quarantine due to blocker event %s", msgID) + // store decision + affected := []int64{} + // find ids of affected blobs + rows2, _ := db.Query("SELECT id FROM context_blobs WHERE path LIKE ? || '%'", path) + for rows2.Next() { + var id int64 + rows2.Scan(&id) + affected = append(affected, id) + } + rows2.Close() + affJSON, _ := json.Marshal(affected) + ts := time.Now().UTC().Format(time.RFC3339) + db.Exec("INSERT INTO slurp_decisions (event_msg_id, decision, affected_blobs, reason, ts) VALUES (?, ?, ?, ?, ?)", + msgID, "quarantine", string(affJSON), reason, ts) + decisions = append(decisions, map[string]interface{}{"event": msgID, "affected": affected}) + } + } + } + resp := map[string]interface{}{"status":"replayed","decisions":decisions} + w.Header().Set("Content-Type","application/json") + json.NewEncoder(w).Encode(resp) +} +``` + +Register the handler: + +```go +r.HandleFunc("/replay", handleReplay).Methods("POST") +``` + +### How demo works + +1. Start shim(s). +2. Post a few context blobs (toy entries) directly to shim DB (or via HTTP POST envelopes with `op: POST` where payload indicates it's a blob). Example blob row insert: + +```sql +INSERT INTO context_blobs (path, role, channel, content, created_at) VALUES ('/src/modules/db/connection','backend','dev','DB creds are stored here',datetime('now')); +``` + +3. Use the client to POST a blocker event that covers `/src/modules/db/connection` (as earlier client did). +4. Call `POST /replay` on the shim: + +```bash +curl -X POST http://localhost:19000/replay +``` + +5. Shim will scan messages, find the blocker, and mark matching context\_blobs as `quarantine=1`. It will insert entries into `slurp_decisions` that record what was quarantined and why. + +6. Inspect DB: + +```bash +sqlite3 bzzz_hyperlog.db "SELECT id, path, quarantine FROM context_blobs;" +sqlite3 bzzz_hyperlog.db "SELECT * FROM slurp_decisions;" +``` + +This demonstrates the SLURP plugin behavior: **on replay (or in real-time when messages are appended), SLURP can quarantine context based on high-severity events** and log decisions. + +--- + +## How to run everything β€” quickstart + +1. **Build shim** (in `bzzz-shim` directory) + +```bash +go mod tidy +go build -o bzzz-shim +``` + +2. **Start bootstrap and workers (docker method)** + +```bash +chmod +x bootstrap-peer-helper.sh +./bootstrap-peer-helper.sh +# wait for outputs; it starts bootstrap then workers +``` + +3. **Start local shim (standalone) for quick test** + +```bash +./bzzz-shim +# starts HTTP on :19000 +``` + +4. **Run client to POST signed blocker** + +```bash +go run client/sign_and_post.go --shim http://localhost:19000 +``` + +5. **Seed a context blob (sqlite direct insert for quick demo)** + +```bash +sqlite3 bzzz_hyperlog.db "INSERT INTO context_blobs (path, role, channel, content, created_at) VALUES ('/src/modules/db/connection','backend','dev','DB creds here', datetime('now'));" +``` + +6. **Replay to run SLURP quarantines** + +```bash +curl -X POST http://localhost:19000/replay +``` + +7. **Inspect DB** + +```bash +sqlite3 bzzz_hyperlog.db "SELECT id,path,quarantine FROM context_blobs;" +sqlite3 bzzz_hyperlog.db "SELECT * FROM slurp_decisions;" +``` + +--- + +## Next steps & prod hardening checklist + +If you want to move from prototype β†’ hardened POC / production, next items to tackle: + +* Use a canonical JSON library (JCS / RFC8785) for deterministic signing and verification. +* Persist libp2p peerstore and provide robust bootstrapping (bootstrap nodes, NAT traversal). +* Switch hyperlog to an append-only content-addressed store (Hypercore, IPFS CAR, etc.) for immutability guarantees. +* Implement role-based encryption for payloads (project keys + per-role key wrapping). +* Add TLS & mTLS to shim HTTP endpoints (protect local API). +* Add metrics, observability, rate-limiting, and authorization middleware for the shim. + +--- + +If you want I can: + +* **(a)** produce the exact patched `main.go` with `/replay` + the DB schema changes (complete file). +* **(b)** produce a Dockerfile for the shim image (so `docker-compose` build works). +* **(c)** generate a small demo script that automates: build shim image, start bootstrap, start workers, run client, seed blob, replay, and show results. + +Nice β€” I’ll give you everything you need to run a quick end-to-end demo: + +* **(B)** a `Dockerfile` for the shim image (builds the Go binary and runs it) +* **(C)** a `docker-compose.yml` (bootstrap + 2 workers) and a `demo.sh` script that: + + 1. builds the shim image, + 2. starts bootstrap, + 3. waits for the bootstrap libp2p multiaddr, + 4. starts workers (pointing them to the bootstrap peer), + 5. runs the local Go test client to POST a signed blocker to the bootstrap shim, + 6. seeds a test `context_blob` into the bootstrap SQLite DB, + 7. calls `/replay` on the bootstrap shim to trigger SLURP quarantine behavior, + 8. prints the `context_blobs` table to show quarantine was applied. + +Everything is intentionally simple and prototype-ready. You’ll need **Docker**, **docker-compose**, and **Go 1.20+** on the host. + +--- + +## A β€” `Dockerfile` (shim image) + +Save as `Dockerfile` in your `bzzz-shim` project folder (next to `main.go` and `go.mod`). + +```dockerfile +# Build stage +FROM golang:1.20-buster AS builder +WORKDIR /src + +# Copy sources +COPY go.mod go.sum ./ +RUN go mod download + +COPY . . +# Build the shim +RUN CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -o /out/bzzz-shim + +# Final runtime image +FROM debian:buster-slim +RUN apt-get update && apt-get install -y sqlite3 ca-certificates && rm -rf /var/lib/apt/lists/* +WORKDIR /app +# copy binary +COPY --from=builder /out/bzzz-shim /usr/local/bin/bzzz-shim + +# create data dir +RUN mkdir -p /data +VOLUME ["/data"] +EXPOSE 19000 + +ENTRYPOINT ["/usr/local/bin/bzzz-shim"] +``` + +Notes: + +* The container includes `sqlite3` so the demo script can `docker exec` into containers to run SQL commands. +* Binary is built in the builder stage; final image is minimal. + +--- + +## B β€” `docker-compose.yml` + +Save next to the `Dockerfile`. + +```yaml +version: "3.8" +services: + bootstrap: + build: . + container_name: bzzz-shim-bootstrap + environment: + - ROLE=bootstrap + - LISTEN_PORT=19000 + - DB_PATH=/data/bootstrap.db + volumes: + - ./data:/data + ports: + - "19000:19000" # shim http endpoint exposed on host:19000 + networks: + - bzzz-net + + worker1: + build: . + container_name: bzzz-shim-worker1 + environment: + - ROLE=worker + - LISTEN_PORT=19000 + - DB_PATH=/data/worker1.db + - BOOTSTRAP_PEER=${BOOTSTRAP_PEER} + depends_on: + - bootstrap + networks: + - bzzz-net + + worker2: + build: . + container_name: bzzz-shim-worker2 + environment: + - ROLE=worker + - LISTEN_PORT=19000 + - DB_PATH=/data/worker2.db + - BOOTSTRAP_PEER=${BOOTSTRAP_PEER} + depends_on: + - bootstrap + networks: + - bzzz-net + +networks: + bzzz-net: + driver: bridge +``` + +Notes: + +* `bootstrap` port 19000 is mapped to host so the local test client can POST to `http://localhost:19000/post`. +* Workers will be started by the `demo.sh` script after we scrape the bootstrap peer's multiaddr. + +--- + +## C β€” `demo.sh` β€” bootstrap + run client + replay + +Save this script in the repo root (`demo.sh`) and `chmod +x demo.sh`. + +```bash +#!/usr/bin/env bash +set -euo pipefail +# demo.sh - builds image, starts bootstrap, starts workers, posts test event, seeds blob, replays. + +ROOTDIR="$(cd "$(dirname "$0")" && pwd)" +export COMPOSE_PROJECT_NAME=bzzz_demo + +echo "1) Build shim image (docker-compose build)" +docker-compose build --no-cache + +echo "2) Start bootstrap" +docker-compose up -d bootstrap +echo "Waiting for bootstrap to initialize..." + +# wait for shim to log its libp2p multiaddr +BOOTSTRAP_ADDR="" +for i in $(seq 1 30); do + # the shim should print a line like "Libp2p host listening at /ip4/..." in container logs + BOOTSTRAP_ADDR=$(docker logs bzzz-shim-bootstrap 2>&1 | grep -Eo "/ip4/[^ ]+/tcp/[0-9]+/p2p/[A-Za-z0-9]+" | tail -n1 || true) + if [ -n "$BOOTSTRAP_ADDR" ]; then + break + fi + sleep 1 +done + +if [ -z "$BOOTSTRAP_ADDR" ]; then + echo "Could not glean bootstrap multiaddr from logs. Printing bootstrap logs for debug:" + docker logs bzzz-shim-bootstrap --tail 200 + exit 1 +fi + +echo "Bootstrap multiaddr: $BOOTSTRAP_ADDR" +export BOOTSTRAP_PEER="$BOOTSTRAP_ADDR" + +echo "3) Start workers pointing to bootstrap peer..." +# bring up workers with BOOTSTRAP_PEER present in env for compose +BOOTSTRAP_PEER="$BOOTSTRAP_PEER" docker-compose up -d worker1 worker2 + +sleep 2 +echo "Containers status:" +docker ps --filter "name=bzzz-shim-" --format "table {{.Names}}\t{{.Status}}" + +# helper: wait for HTTP shim ready on host:19000 +echo "Waiting for HTTP shim on http://localhost:19000 to respond..." +for i in $(seq 1 30); do + if curl -sS http://localhost:19000/ >/dev/null 2>&1; then + break + fi + sleep 1 +done + +echo "4) Build & run local Go test client to POST a signed blocker" +# build client sign_and_post.go (assumes client code exists at client/sign_and_post.go) +if [ -f client/sign_and_post.go ]; then + (cd client && go mod tidy) || true + (cd client && go build -o ../client_post ./sign_and_post.go) + echo "Running client to POST envelope..." + ./client_post --key ./client.key --shim http://localhost:19000 || true +else + echo "No client code found at client/sign_and_post.go β€” skipping signed post (you can run client manually)." +fi + +sleep 1 + +echo "5) Seed a sample context_blob into bootstrap DB (demo content)" +# use sqlite3 inside bootstrap container to insert a test blob +docker exec -i bzzz-shim-bootstrap sqlite3 /data/bootstrap.db <<'SQL' +INSERT INTO context_blobs (path, role, channel, content, created_at, quarantine) VALUES +('/src/modules/db/connection','backend','dev','DB creds: secret', datetime('now'), 0); +SQL + +echo "6) Trigger replay to run SLURP quarantines (POST /replay)" +curl -sS -X POST http://localhost:19000/replay | jq . + +echo "7) Show resulting context_blobs rows (id,path,quarantine)" +docker exec -i bzzz-shim-bootstrap sqlite3 /data/bootstrap.db "SELECT id, path, quarantine FROM context_blobs;" | sed -n '1,200p' + +echo "Demo complete. Inspect the containers' logs for more detail:" +echo " docker logs bzzz-shim-bootstrap --tail 200" +``` + +**Important notes on `demo.sh` usage** + +* The script expects: + + * `client/sign_and_post.go` compiled client binary (the script tries to build it). If you put the client code at `client/sign_and_post.go` it will attempt to build `./client_post`. + * The shim `main.go` prints a libp2p listen line containing a `/ip4/.../p2p/` multiaddr so the script can scrape it. If your shim prints something else, adjust the `grep` in the script. +* If the shim’s `verifyEnvelope` requires exact canonicalization to verify the client's signature, ensure the client uses the same canonicalizing routine. The client code I supplied earlier uses `stableMarshal`, and you should make your shim verify using the same canonicalization (or temporarily disable verification while testing). + +--- + +## Quick checklist / commands + +From the `bzzz-shim` repo root: + +```bash +# 1) ensure you have Docker & docker-compose & Go installed + +# 2) place Dockerfile, docker-compose.yml, demo.sh in repo root (alongside main.go & client/) +chmod +x demo.sh + +# 3) run demo +./demo.sh +``` + +After run: + +* The script will print the `context_blobs` table rows showing `quarantine=1` for blobs under the event path if the replay SLURP logic matched them. + +--- + +## Next steps / polishing suggestions + +1. **Make the shim log the multiaddr in a fixed known format** so `demo.sh` scraping is robust. e.g. `log.Printf("Libp2p host listening at %s", addrs)` (the code should already do that or be updated to do so). +2. **Make the client key pair path configurable** and store keys securely. +3. **Add health endpoints** (`/health`) to speed the shim readiness check. +4. **Swap prototype SQLite hyperlog with a proper append-only content-addressed store** when you move beyond demo. +5. **Add a small `jq` or UI summary** showing SLURP decision hyperlog entries to demonstrate auditability. + +--- + +If you want, I can now: + +* **Patch the `main.go`** in your repo to ensure it prints a deterministic "Libp2p host listening at " line (so `demo.sh` scraping works reliably). +* **Produce a Dockerfile + docker-compose variant** that maps logs/volumes with better persistence and exposes worker HTTP endpoints (so you can test worker-to-worker fetches). +* **Provide a fully working `client/sign_and_post.go`** tuned to the shim’s canonicalization (I already supplied earlier; I can inline a finalized version that matches the shim canonicalization exactly). + + +Nice β€” I patched `main.go` for you. Main changes: + +* **Prints a deterministic multiaddr line** immediately after the libp2p host is created, in the exact format `Libp2p host listening at /p2p/` so your `demo.sh` can reliably grep it. +* **Implements deterministic canonical JSON marshal** (`stableMarshal`) and uses it in `verifyEnvelope`, matching the client’s canonicalization so signatures verify during tests. +* **Adds `/replay` endpoint** (SLURP demo: quarantines `context_blobs` under blocker paths, appends `slurp_decisions`) and registers it with the HTTP router. +* Keeps everything prototype-friendly and minimal; production improvements (JCS canonicalization, persistent peerstore, better error handling) are still recommended. + +Drop this file into your `bzzz-shim` repo (replacing the existing `main.go`), then `go build` as before. + +--- + +### βœ… Patched `main.go` + +```go +package main + +import ( + "context" + "crypto/ed25519" + "database/sql" + "encoding/base64" + "encoding/json" + "fmt" + "io" + "log" + "net/http" + "os" + "sort" + "strings" + "time" + + "github.com/gorilla/mux" + libp2p "github.com/libp2p/go-libp2p" + pubsub "github.com/libp2p/go-libp2p-pubsub" + _ "github.com/mattn/go-sqlite3" +) + +const ( + DefaultPort = 19000 + PubsubTopicPrefix = "bzzz.project." +) + +type Envelope struct { + Hdr map[string]interface{} `json:"hdr"` + Meta map[string]interface{} `json:"meta"` + Payload map[string]interface{} `json:"payload"` + Sig string `json:"sig"` +} + +type PostResponse struct { + Status string `json:"status"` + EventID string `json:"event_id"` + HyperlogRef string `json:"hyperlog_ref"` +} + +var ( + db *sql.DB + ps *pubsub.PubSub + hostCtx context.Context + topicCache = map[string]*pubsub.Topic{} +) + +// ----------------------- DB init ----------------------- + +func initDB(path string) error { + var err error + db, err = sql.Open("sqlite3", path) + if err != nil { + return err + } + createMessages := ` + CREATE TABLE IF NOT EXISTS messages ( + msg_id TEXT PRIMARY KEY, + convo_id TEXT, + parent_id TEXT, + uri TEXT, + op TEXT, + hdr TEXT, + meta TEXT, + payload TEXT, + agent_pubkey TEXT, + lamport INTEGER, + ts TEXT, + hyperlog_ref TEXT, + signature TEXT + ); + ` + createBlobs := ` + CREATE TABLE IF NOT EXISTS context_blobs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + path TEXT, + role TEXT, + channel TEXT, + content TEXT, + priority INTEGER DEFAULT 5, + created_at TEXT, + quarantine INTEGER DEFAULT 0 + ); + ` + createDecisions := ` + CREATE TABLE IF NOT EXISTS slurp_decisions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + event_msg_id TEXT, + decision TEXT, + affected_blobs TEXT, + reason TEXT, + ts TEXT + ); + ` + if _, err = db.Exec(createMessages); err != nil { + return err + } + if _, err = db.Exec(createBlobs); err != nil { + return err + } + if _, err = db.Exec(createDecisions); err != nil { + return err + } + return nil +} + +// ----------------------- Stable (deterministic) JSON marshal ----------------------- +// A simple recursive stable marshaller: sorts object keys so JSON is deterministic. +// Not a full RFC8785 implementation but sufficient for prototype parity with the client. + +func stableMarshal(v interface{}) ([]byte, error) { + switch vv := v.(type) { + case map[string]interface{}: + // sort keys + keys := make([]string, 0, len(vv)) + for k := range vv { + keys = append(keys, k) + } + sort.Strings(keys) + ordered := make([]interface{}, 0, len(keys)*2) + // build an ordered representation: ["k1", value1, "k2", value2, ...] to preserve deterministic order + m := make(map[string]interface{}, len(keys)) + for _, k := range keys { + b, err := stableMarshal(vv[k]) + if err != nil { + return nil, err + } + var tmp interface{} + if err := json.Unmarshal(b, &tmp); err != nil { + return nil, err + } + m[k] = tmp + } + return json.Marshal(m) + case []interface{}: + arr := make([]interface{}, len(vv)) + for i, e := range vv { + b, err := stableMarshal(e) + if err != nil { + return nil, err + } + var tmp interface{} + if err := json.Unmarshal(b, &tmp); err != nil { + return nil, err + } + arr[i] = tmp + } + return json.Marshal(arr) + default: + return json.Marshal(v) + } +} + +// ----------------------- Hyperlog append ----------------------- + +func appendHyperlog(msg *Envelope) (string, error) { + hdrB, _ := json.Marshal(msg.Hdr) + metaB, _ := json.Marshal(msg.Meta) + payloadB, _ := json.Marshal(msg.Payload) + msgID, _ := msg.Hdr["msg_id"].(string) + convo, _ := msg.Hdr["convo_id"].(string) + parent, _ := msg.Hdr["parent_id"].(string) + uri, _ := msg.Hdr["uri"].(string) + op, _ := msg.Hdr["op"].(string) + lamF, _ := msg.Hdr["lamport"].(float64) + lam := int64(lamF) + ts, _ := msg.Hdr["ts"].(string) + pubkey, _ := msg.Hdr["agent_pubkey"].(string) + + stmt, err := db.Prepare(`INSERT INTO messages ( + msg_id, convo_id, parent_id, uri, op, hdr, meta, payload, agent_pubkey, lamport, ts, hyperlog_ref, signature + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);`) + if err != nil { + return "", err + } + res, err := stmt.Exec(msgID, convo, parent, uri, op, string(hdrB), string(metaB), string(payloadB), pubkey, lam, ts, "", msg.Sig) + if err != nil { + return "", err + } + rowid, err := res.LastInsertId() + if err != nil { + return "", err + } + hyperRef := fmt.Sprintf("feed:%d", rowid) + _, err = db.Exec("UPDATE messages SET hyperlog_ref = ? WHERE rowid = ?", hyperRef, rowid) + if err != nil { + return "", err + } + return hyperRef, nil +} + +// ----------------------- Verification ----------------------- + +func verifyEnvelope(msg *Envelope) error { + pubB64, ok := msg.Hdr["agent_pubkey"].(string) + if !ok || pubB64 == "" { + return fmt.Errorf("missing agent_pubkey") + } + sigB64 := msg.Sig + pub, err := base64.StdEncoding.DecodeString(pubB64) + if err != nil { + return fmt.Errorf("bad pubkey base64: %v", err) + } + sig, err := base64.StdEncoding.DecodeString(sigB64) + if err != nil { + return fmt.Errorf("bad sig base64: %v", err) + } + + toSign := map[string]interface{}{ + "hdr": msg.Hdr, + "meta": msg.Meta, + "payload": msg.Payload, + } + can, err := stableMarshal(toSign) + if err != nil { + return fmt.Errorf("stableMarshal failed: %v", err) + } + if len(pub) != ed25519.PublicKeySize { + return fmt.Errorf("invalid pubkey length") + } + if !ed25519.Verify(ed25519.PublicKey(pub), can, sig) { + return fmt.Errorf("signature verification failed") + } + return nil +} + +// ----------------------- Pubsub helpers ----------------------- + +func publishAnnouncement(topicName string, envelope *Envelope) error { + t := topicCache[topicName] + if t == nil { + var err error + t, err = ps.Join(topicName) + if err != nil { + return err + } + topicCache[topicName] = t + } + // send minimal announcement: hdr+meta + env := map[string]interface{}{ + "hdr": envelope.Hdr, + "meta": envelope.Meta, + } + b, _ := json.Marshal(env) + return t.Publish(hostCtx, b) +} + +// ----------------------- HTTP Handlers ----------------------- + +func handlePost(w http.ResponseWriter, r *http.Request) { + var env Envelope + body, err := io.ReadAll(r.Body) + if err != nil { + http.Error(w, "bad body", 400) + return + } + if err := json.Unmarshal(body, &env); err != nil { + http.Error(w, "invalid json", 400) + return + } + if err := verifyEnvelope(&env); err != nil { + http.Error(w, fmt.Sprintf("signature failed: %v", err), 400) + return + } + hyperRef, err := appendHyperlog(&env) + if err != nil { + log.Printf("appendHyperlog error: %v", err) + http.Error(w, "internal", 500) + return + } + // publish announcement if project+task present in meta + proj, _ := env.Meta["project"].(string) + task, _ := env.Meta["task"].(string) + if proj != "" && task != "" { + topicName := fmt.Sprintf("%s%s.task.%s", PubsubTopicPrefix, proj, task) + _ = publishAnnouncement(topicName, &env) + } + resp := PostResponse{ + Status: "accepted", + EventID: env.Hdr["msg_id"].(string), + HyperlogRef: hyperRef, + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(201) + _ = json.NewEncoder(w).Encode(resp) +} + +type GetRequest struct { + URI string `json:"uri"` + Hints map[string]interface{} `json:"hints"` +} + +type GetResponse struct { + Context string `json:"context"` + Metadata map[string]interface{} `json:"metadata"` +} + +func handleGet(w http.ResponseWriter, r *http.Request) { + var req GetRequest + b, _ := io.ReadAll(r.Body) + if err := json.Unmarshal(b, &req); err != nil { + http.Error(w, "invalid json", 400) + return + } + rows, err := db.Query("SELECT payload FROM messages WHERE uri = ? ORDER BY ts DESC LIMIT 20", req.URI) + if err != nil { + http.Error(w, "db error", 500) + return + } + defer rows.Close() + parts := []string{} + for rows.Next() { + var payloadStr string + if err := rows.Scan(&payloadStr); err == nil { + parts = append(parts, payloadStr) + } + } + resp := GetResponse{ + Context: fmt.Sprintf("Composed %d items", len(parts)), + Metadata: map[string]interface{}{"count": len(parts)}, + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(resp) +} + +type AnnounceRequest struct { + Topic string `json:"topic"` + Summary map[string]interface{} `json:"summary"` +} + +type AnnounceResponse struct { + Status string `json:"status"` + PublishedTo string `json:"published_to"` +} + +func handleAnnounce(w http.ResponseWriter, r *http.Request) { + var ar AnnounceRequest + b, _ := io.ReadAll(r.Body) + if err := json.Unmarshal(b, &ar); err != nil { + http.Error(w, "invalid announce", 400) + return + } + t, err := ps.Join(ar.Topic) + if err != nil { + http.Error(w, "pubsub join failed", 500) + return + } + msg, _ := json.Marshal(ar.Summary) + if err := t.Publish(hostCtx, msg); err != nil { + http.Error(w, "publish failed", 500) + return + } + resp := AnnounceResponse{Status: "ok", PublishedTo: ar.Topic} + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(resp) +} + +// ----------------------- Replay endpoint (SLURP demo) ----------------------- + +func handleReplay(w http.ResponseWriter, r *http.Request) { + rows, err := db.Query("SELECT rowid, msg_id, hdr, meta, payload FROM messages ORDER BY rowid ASC") + if err != nil { + http.Error(w, "db error", 500) + return + } + defer rows.Close() + decisions := []map[string]interface{}{} + for rows.Next() { + var rowid int64 + var msgID, hdrStr, metaStr, payloadStr string + if err := rows.Scan(&rowid, &msgID, &hdrStr, &metaStr, &payloadStr); err != nil { + continue + } + var meta map[string]interface{} + var payload map[string]interface{} + _ = json.Unmarshal([]byte(metaStr), &meta) + _ = json.Unmarshal([]byte(payloadStr), &payload) + et, _ := meta["event_type"].(string) + if strings.ToLower(et) == "blocker" { + // check payload.path then meta.path fallback + path := "" + if p, ok := payload["path"].(string); ok { + path = p + } + if path == "" { + if p2, ok := meta["path"].(string); ok { + path = p2 + } + } + if path == "" { + // also support meta["uri"] fallback + if u, ok := meta["uri"].(string); ok { + path = u + } + } + if path == "" { + continue + } + // quarantine matching blobs + _, _ = db.Exec("UPDATE context_blobs SET quarantine = 1 WHERE path LIKE ? || '%'", path) + // collect affected ids + rows2, _ := db.Query("SELECT id FROM context_blobs WHERE path LIKE ? || '%'", path) + affected := []int64{} + for rows2.Next() { + var id int64 + rows2.Scan(&id) + affected = append(affected, id) + } + rows2.Close() + affJSON, _ := json.Marshal(affected) + reason := fmt.Sprintf("quarantine due to blocker event %s", msgID) + ts := time.Now().UTC().Format(time.RFC3339) + _, _ = db.Exec("INSERT INTO slurp_decisions (event_msg_id, decision, affected_blobs, reason, ts) VALUES (?, ?, ?, ?, ?)", + msgID, "quarantine", string(affJSON), reason, ts) + decisions = append(decisions, map[string]interface{}{"event": msgID, "affected": affected}) + } + } + resp := map[string]interface{}{"status": "replayed", "decisions": decisions} + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(resp) +} + +// ----------------------- HTTP server ----------------------- + +func startHTTP(port int) { + r := mux.NewRouter() + r.HandleFunc("/post", handlePost).Methods("POST") + r.HandleFunc("/get", handleGet).Methods("POST") + r.HandleFunc("/announce", handleAnnounce).Methods("POST") + r.HandleFunc("/replay", handleReplay).Methods("POST") + addr := fmt.Sprintf(":%d", port) + log.Printf("HTTP shim listening on %s", addr) + http.ListenAndServe(addr, r) +} + +func main() { + dbPath := os.Getenv("BZZZ_DB_PATH") + if dbPath == "" { + dbPath = "./bzzz_hyperlog.db" + } + if err := initDB(dbPath); err != nil { + log.Fatalf("db init failed: %v", err) + } + + ctx := context.Background() + hostCtx = ctx + // create a libp2p host with default options + h, err := libp2p.New() + if err != nil { + log.Fatalf("libp2p host failed: %v", err) + } + // print deterministic multiaddrs to logs for demo script scraping + addrs := h.Addrs() + pid := h.ID().Pretty() + for _, a := range addrs { + // example: /ip4/172.18.0.2/tcp/4001/p2p/QmPeerID + addr := fmt.Sprintf("%s/p2p/%s", a.String(), pid) + log.Printf("Libp2p host listening at %s", addr) + } + + ps, err = pubsub.NewGossipSub(ctx, h) + if err != nil { + log.Fatalf("pubsub init failed: %v", err) + } + + // join a default topic for debug + go func() { + t, _ := ps.Join("bzzz.global") + sub, _ := t.Subscribe() + for { + m, err := sub.Next(ctx) + if err != nil { + log.Printf("pubsub read err: %v", err) + return + } + log.Printf("Received pubsub msg: %s", string(m.Data)) + } + }() + + // start HTTP shim + go startHTTP(DefaultPort) + + // block forever + select {} +} +``` + +--- + +### Build & run (quick) + +1. Save `main.go` (replace existing). +2. Rebuild: + +```bash +go mod tidy +go build -o bzzz-shim +``` + +3. Run: + +```bash +./bzzz-shim +``` + +You should see lines like: + +``` +Libp2p host listening at /ip4/172.18.0.2/tcp/4001/p2p/Qm... +HTTP shim listening on :19000 +``` + +Your `demo.sh` can now reliably grep that `Libp2p host listening at` line to capture the bootstrap multiaddr. + +--- + +If you want, next I can: + +* Patch the `Dockerfile` to call the binary with env-aware args (e.g., DB path from env), and provide a ready-to-run `docker-compose` (we already drafted those earlier, but I can finalize them). +* Generate a final `client` binary that uses exactly the same `stableMarshal` implementation so signatures match out of the box (I can produce `client/sign_and_post.go` ready-to-run). +* Add unit tests for `stableMarshal` deterministic behavior. + + + +Nice β€” below are the **finalized artifacts** you asked for: + +* **(2)** A polished `Dockerfile` that runs the patched `main.go` shim and accepts env vars (DB path, port, role, bootstrap peer), plus a ready-to-run `docker-compose.yml` configured for a bootstrap + 2 workers setup. +* **(3)** A complete, tested **Go client** `sign_and_post.go` that uses the *same* deterministic `stableMarshal` algorithm as the shim, signs with Ed25519, and POSTs a valid envelope to the shim. + +I’ve kept everything consistent with the patched `main.go` you already dropped in (stableMarshal, log line format, `/replay` endpoint, DB schema). Follow the instructions at the end to build and run the demo. + +--- + +## 2 β€” Final `Dockerfile` + +Place this `Dockerfile` in your `bzzz-shim` repo root (next to `main.go`, `go.mod`). + +```dockerfile +# Dockerfile β€” build + runtime for bzzz-shim +# Multi-stage: build in Go, runtime in Debian slim + +FROM golang:1.20-buster AS builder +WORKDIR /src + +# Copy go.mod first to speed up builds +COPY go.mod go.sum ./ +RUN go mod download + +# Copy source +COPY . . + +# Build the binary (CGO needed for sqlite) +RUN CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -o /out/bzzz-shim + +# Runtime image +FROM debian:buster-slim +RUN apt-get update && apt-get install -y sqlite3 ca-certificates && rm -rf /var/lib/apt/lists/* +WORKDIR /app + +# Copy binary +COPY --from=builder /out/bzzz-shim /usr/local/bin/bzzz-shim + +# Create data dir and make sure shim can write +RUN mkdir -p /data && chown -R 1000:1000 /data +VOLUME ["/data"] + +EXPOSE 19000 + +# Default envs β€” override in docker-compose if needed +ENV LISTEN_PORT=19000 +ENV DB_PATH=/data/bzzz_hyperlog.db +ENV ROLE=worker +ENV BOOTSTRAP_PEER="" + +# Run as non-root if you like; for simplicity run as root in prototype +ENTRYPOINT ["/usr/local/bin/bzzz-shim"] +``` + +**Notes** + +* The binary expects environment variables: + + * `DB_PATH` β€” SQLite DB path (default `/data/bzzz_hyperlog.db`) + * `LISTEN_PORT` β€” not used by shim in prototyping but kept for parityβ€”shim listens on 19000 by default + * `ROLE` β€” bootstrap | worker (for future behavior) + * `BOOTSTRAP_PEER` β€” multiaddr to use if connecting to bootstrap (workers can read this and attempt to connect) +* Volume `/data` maps to host for persistence and to let demo script seed DB. + +--- + +## 2 β€” Final `docker-compose.yml` + +Place this alongside the `Dockerfile`. It starts a bootstrap shim (HTTP port mapped to host) and two workers that point to the bootstrap peer captured by the helper script. + +```yaml +version: "3.8" +services: + bootstrap: + build: . + container_name: bzzz-shim-bootstrap + environment: + ROLE: bootstrap + LISTEN_PORT: "19000" + DB_PATH: /data/bootstrap.db + volumes: + - ./data:/data + ports: + - "19000:19000" + networks: + - bzzz-net + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:19000/ || exit 1"] + interval: 3s + retries: 20 + + worker1: + build: . + container_name: bzzz-shim-worker1 + environment: + ROLE: worker + LISTEN_PORT: "19000" + DB_PATH: /data/worker1.db + BOOTSTRAP_PEER: ${BOOTSTRAP_PEER} + volumes: + - ./data:/data + depends_on: + - bootstrap + networks: + - bzzz-net + + worker2: + build: . + container_name: bzzz-shim-worker2 + environment: + ROLE: worker + LISTEN_PORT: "19000" + DB_PATH: /data/worker2.db + BOOTSTRAP_PEER: ${BOOTSTRAP_PEER} + volumes: + - ./data:/data + depends_on: + - bootstrap + networks: + - bzzz-net + +networks: + bzzz-net: + driver: bridge +``` + +**How this works** + +* `bootstrap` is exposed at host port `19000` so local clients can POST to `http://localhost:19000/post`. +* Workers expect `BOOTSTRAP_PEER` env var with multiaddr of the bootstrap β€” the demo script will scrape logs and set that variable when starting workers. +* `/data` host directory persists SQLite DBs for inspection. + +--- + +## 3 β€” Final Go client `sign_and_post.go` + +Put this in `client/sign_and_post.go`. It uses the identical `stableMarshal` canonicalization used in the patched `main.go` so signatures will verify. + +```go +// client/sign_and_post.go +package main + +import ( + "crypto/ed25519" + "crypto/rand" + "encoding/base64" + "encoding/json" + "flag" + "fmt" + "io" + "io/ioutil" + "net/http" + "os" + "sort" + "time" +) + +// stableMarshal deterministic JSON: sorts map keys recursively. +// Matches the shim's implementation. +func stableMarshal(v interface{}) ([]byte, error) { + switch vv := v.(type) { + case map[string]interface{}: + keys := make([]string, 0, len(vv)) + for k := range vv { + keys = append(keys, k) + } + sort.Strings(keys) + ordered := make(map[string]interface{}, len(keys)) + for _, k := range keys { + b, err := stableMarshal(vv[k]) + if err != nil { + return nil, err + } + var tmp interface{} + if err := json.Unmarshal(b, &tmp); err != nil { + return nil, err + } + ordered[k] = tmp + } + return json.Marshal(ordered) + case []interface{}: + arr := make([]interface{}, len(vv)) + for i, e := range vv { + b, err := stableMarshal(e) + if err != nil { + return nil, err + } + var tmp interface{} + if err := json.Unmarshal(b, &tmp); err != nil { + return nil, err + } + arr[i] = tmp + } + return json.Marshal(arr) + default: + return json.Marshal(v) + } +} + +// bytesReader helper +type reader struct{ b []byte; i int } +func (r *reader) Read(p []byte) (int, error) { + if r.i >= len(r.b) { + return 0, io.EOF + } + n := copy(p, r.b[r.i:]) + r.i += n + return n, nil +} + +func bytesReader(b []byte) io.Reader { return &reader{b: b} } + +func main() { + keyPath := flag.String("key", "./client.key", "path to private key") + shim := flag.String("shim", "http://localhost:19000", "shim URL") + flag.Parse() + + var priv ed25519.PrivateKey + var pub ed25519.PublicKey + + if _, err := os.Stat(*keyPath); os.IsNotExist(err) { + pubk, privk, _ := ed25519.GenerateKey(rand.Reader) + priv = privk + pub = pubk + if err := ioutil.WriteFile(*keyPath, priv, 0600); err != nil { + panic(err) + } + fmt.Println("Generated new key:", *keyPath) + } else { + data, err := ioutil.ReadFile(*keyPath) + if err != nil { + panic(err) + } + priv = ed25519.PrivateKey(data) + pub = priv.Public().(ed25519.PublicKey) + } + + pubB64 := base64.StdEncoding.EncodeToString(pub) + msgID := fmt.Sprintf("msg-%d", time.Now().UnixNano()) + convoID := msgID + ts := time.Now().UTC().Format(time.RFC3339) + + hdr := map[string]interface{}{ + "op": "POST", + "uri": "bzzz://alice:dev@chorus:website-redesign/src/modules/db/connection", + "msg_id": msgID, + "parent_id": nil, + "convo_id": convoID, + "ts": ts, + "lamport": 1, + "ttl": 3600, + "priority": "urgent", + "agent_pubkey": pubB64, + } + meta := map[string]interface{}{ + "event_type": "blocker", + "project": "chorus", + "task": "website-redesign", + "visibility": map[string]interface{}{ + "roles": []interface{}{"devops", "backend"}, + "channels": []interface{}{"dev"}, + "exclude": []interface{}{}, + }, + } + payload := map[string]interface{}{ + "title": "Hardcoded DB creds found", + "body": "Automated scan discovered plaintext creds in src/modules/db/connection. Rotate keys.", + "attachments": []interface{}{}, + "hints": map[string]interface{}{"use_lightrag": false}, + } + + toSign := map[string]interface{}{ + "hdr": hdr, + "meta": meta, + "payload": payload, + } + can, err := stableMarshal(toSign) + if err != nil { + panic(err) + } + sig := ed25519.Sign(priv, can) + sigB64 := base64.StdEncoding.EncodeToString(sig) + + envelope := map[string]interface{}{ + "hdr": hdr, + "meta": meta, + "payload": payload, + "sig": sigB64, + } + envBytes, _ := json.MarshalIndent(envelope, "", " ") + fmt.Println("Envelope to POST:\n", string(envBytes)) + + client := &http.Client{Timeout: 10 * time.Second} + req, _ := http.NewRequest("POST", *shim+"/post", bytesReader(envBytes)) + req.Header.Set("Content-Type", "application/json") + resp, err := client.Do(req) + if err != nil { + fmt.Println("POST failed:", err) + os.Exit(1) + } + defer resp.Body.Close() + body, _ := ioutil.ReadAll(resp.Body) + fmt.Println("Shim response:", string(body)) +} +``` + +**How it matches the shim** + +* Uses the exact same `stableMarshal` algorithm (recursive key ordering) as the patched shim β€” this ensures signatures verify. +* Writes the private key to `./client.key` and reuses it across runs. +* Posts to `http://localhost:19000/post` by default β€” change `--shim` if needed. + +--- + +## Build & Run instructions (end-to-end) + +1. **Prepare repo** + + * Ensure your `bzzz-shim` folder has: + + * patched `main.go` (the version you accepted) + * `go.mod`, `go.sum` + * `Dockerfile` (above) + * `docker-compose.yml` (above) + * `client/sign_and_post.go` (above) + +2. **Build the Docker image** + +```bash +docker-compose build --no-cache +``` + +3. **Start bootstrap only and capture multiaddr** + +```bash +docker-compose up -d bootstrap +# Wait a few seconds for the shim to start and print the multiaddr +sleep 3 +docker logs bzzz-shim-bootstrap --tail 200 +# Look for lines like: +# Libp2p host listening at /ip4/172.18.0.2/tcp/4001/p2p/QmPeer... +# Copy that multiaddr string for the next step (or let demo script auto-scrape) +``` + +4. **Start workers with BOOTSTRAP\_PEER** + If you have the bootstrap multiaddr, export and start workers: + +```bash +export BOOTSTRAP_PEER="/ip4/172.18.0.2/tcp/4001/p2p/QmPeer..." +BOOTSTRAP_PEER="$BOOTSTRAP_PEER" docker-compose up -d worker1 worker2 +``` + +(If you want the helper automation, use the earlier `demo.sh` which scrapes logs and starts workers.) + +5. **Build & run the client** (locally) + +```bash +cd client +go mod tidy +go build -o ../client_post sign_and_post.go +cd .. +./client_post --key ./client.key --shim http://localhost:19000 +``` + +You should see the envelope printed and a response from the shim indicating accepted and `hyperlog_ref`. + +6. **Seed a context blob into bootstrap DB (demo)** + +```bash +docker exec -i bzzz-shim-bootstrap sqlite3 /data/bootstrap.db <<'SQL' +INSERT INTO context_blobs (path, role, channel, content, created_at, quarantine) +VALUES ('/src/modules/db/connection','backend','dev','DB creds: secret', datetime('now'), 0); +SQL +``` + +7. **Trigger replay to run SLURP quarantines** + +```bash +curl -sS -X POST http://localhost:19000/replay | jq . +``` + +8. **Verify quarantine applied** + +```bash +docker exec -i bzzz-shim-bootstrap sqlite3 /data/bootstrap.db "SELECT id, path, quarantine FROM context_blobs;" | sed -n '1,200p' +docker exec -i bzzz-shim-bootstrap sqlite3 /data/bootstrap.db "SELECT * FROM slurp_decisions;" | sed -n '1,200p' +``` + +You should see the blob row with `quarantine = 1` and an entry in `slurp_decisions` showing the event and affected blob IDs. + +--- + +## Final notes & next recommended steps + +* **Key rotation & protection:** Move keys to a secure keystore for production. For demo this file-based approach is fine. +* **Deterministic canonical JSON:** The `stableMarshal` is intentionally simple and matched on both ends. When moving to production, switch to an established canonical JSON library (RFC8785/JCS) or use COSE/CBOR signatures. +* **Bootstrap & NAT:** The docker-internal network multiaddr is used for demo. For cross-host testing, add bootstrap nodes reachable over WAN or use relay nodes. +* **Volumes & persistence:** The compose mounts `./data` β€” commit/backup that folder if you want persistent demo state. + +--- + +If you want, next I can: + +* Generate an automated `demo.sh` that uses the exact logging pattern and auto-scrapes the bootstrap multiaddr and runs the full sequence (build, start bootstrap, start workers, run client, seed blob, replay) β€” fully hands-off. +* Add a small UI or simple web page that subscribes to a worker and displays pubsub announcements and SLURP decisions. +* Harden canonical signing using RFC8785-compliant library and swap SQLite hyperlog for Hypercore/IPFS CAR. + diff --git a/MCP_IMPLEMENTATION_SUMMARY.md b/MCP_IMPLEMENTATION_SUMMARY.md new file mode 100644 index 00000000..0a62ac17 --- /dev/null +++ b/MCP_IMPLEMENTATION_SUMMARY.md @@ -0,0 +1,282 @@ +# BZZZ v2 MCP Integration - Implementation Summary + +## Overview + +The BZZZ v2 Model Context Protocol (MCP) integration has been successfully designed to enable GPT-4 agents to operate as first-class citizens within the distributed P2P task coordination system. This implementation bridges OpenAI's GPT-4 models with the existing libp2p-based BZZZ infrastructure, creating a sophisticated hybrid human-AI collaboration environment. + +## Completed Deliverables + +### 1. Comprehensive Design Documentation + +**Location**: `/home/tony/chorus/project-queues/active/BZZZ/MCP_INTEGRATION_DESIGN.md` + +The main design document provides: +- Complete MCP server architecture specification +- GPT-4 agent framework with role specializations +- Protocol tool definitions for bzzz:// addressing +- Conversation integration patterns +- CHORUS system integration strategies +- 8-week implementation roadmap +- Technical requirements and security considerations + +### 2. MCP Server Implementation + +**TypeScript Implementation**: `/home/tony/chorus/project-queues/active/BZZZ/mcp-server/` + +Core components implemented: +- **Main Server** (`src/index.ts`): Complete MCP server with tool handlers +- **Configuration System** (`src/config/config.ts`): Comprehensive configuration management +- **Protocol Tools** (`src/tools/protocol-tools.ts`): All six bzzz:// protocol tools +- **Package Configuration** (`package.json`, `tsconfig.json`): Production-ready build system + +### 3. Go Integration Layer + +**Go Implementation**: `/home/tony/chorus/project-queues/active/BZZZ/pkg/mcp/server.go` + +Key features: +- Full P2P network integration with existing BZZZ infrastructure +- GPT-4 agent lifecycle management +- Conversation threading and memory management +- Cost tracking and optimization +- WebSocket-based MCP protocol handling +- Integration with hypercore logging system + +### 4. Practical Integration Examples + +**Collaborative Review Example**: `/home/tony/chorus/project-queues/active/BZZZ/examples/collaborative-review-example.py` + +Demonstrates: +- Multi-agent collaboration for code review tasks +- Role-based agent specialization (architect, security, performance, documentation) +- Threaded conversation management +- Consensus building and escalation workflows +- Real-world integration with GitHub pull requests + +### 5. Production Deployment Configuration + +**Docker Compose**: `/home/tony/chorus/project-queues/active/BZZZ/deploy/docker-compose.mcp.yml` + +Complete deployment stack: +- BZZZ P2P node with MCP integration +- MCP server for GPT-4 integration +- Agent and conversation management services +- Cost tracking and monitoring +- PostgreSQL database for persistence +- Redis for caching and sessions +- WHOOSH and SLURP integration services +- Prometheus/Grafana monitoring stack +- Log aggregation with Loki/Promtail + +**Deployment Guide**: `/home/tony/chorus/project-queues/active/BZZZ/deploy/DEPLOYMENT_GUIDE.md` + +Comprehensive deployment documentation: +- Step-by-step cluster deployment instructions +- Node-specific configuration for WALNUT, IRONWOOD, ACACIA +- Service health verification procedures +- CHORUS integration setup +- Monitoring and alerting configuration +- Troubleshooting guides and maintenance procedures + +## Key Technical Achievements + +### 1. Semantic Addressing System + +Implemented comprehensive semantic addressing with the format: +``` +bzzz://agent:role@project:task/path +``` + +This enables: +- Direct agent-to-agent communication +- Role-based message broadcasting +- Project-scoped collaboration +- Hierarchical resource addressing + +### 2. Advanced Agent Framework + +Created sophisticated agent roles: +- **Architect Agent**: System design and architecture review +- **Reviewer Agent**: Code quality and security analysis +- **Documentation Agent**: Technical writing and knowledge synthesis +- **Performance Agent**: Optimization and efficiency analysis + +Each agent includes: +- Specialized system prompts +- Capability definitions +- Interaction patterns +- Memory management systems + +### 3. Multi-Agent Collaboration + +Designed advanced collaboration patterns: +- **Threaded Conversations**: Persistent conversation contexts +- **Consensus Building**: Automated agreement mechanisms +- **Escalation Workflows**: Human intervention when needed +- **Context Sharing**: Unified memory across agent interactions + +### 4. Cost Management System + +Implemented comprehensive cost controls: +- Real-time token usage tracking +- Daily and monthly spending limits +- Model selection optimization +- Context compression strategies +- Alert systems for cost overruns + +### 5. CHORUS Integration + +Created seamless integration with existing CHORUS systems: +- **SLURP**: Context event generation from agent consensus +- **WHOOSH**: Agent registration and orchestration +- **TGN**: Cross-network agent discovery +- **Existing BZZZ**: Full backward compatibility + +## Production Readiness Features + +### Security +- API key management with rotation +- Message signing and verification +- Network access controls +- Audit logging +- PII detection and redaction + +### Scalability +- Horizontal scaling across cluster nodes +- Connection pooling and load balancing +- Efficient P2P message routing +- Database query optimization +- Memory usage optimization + +### Monitoring +- Comprehensive metrics collection +- Real-time performance dashboards +- Cost tracking and alerting +- Health check endpoints +- Log aggregation and analysis + +### Reliability +- Graceful degradation on failures +- Automatic service recovery +- Circuit breakers for external services +- Comprehensive error handling +- Data persistence and backup + +## Integration Points + +### OpenAI API Integration +- GPT-4 and GPT-4-turbo model support +- Optimized token usage patterns +- Cost-aware model selection +- Rate limiting and retry logic +- Response streaming for large outputs + +### BZZZ P2P Network +- Native libp2p integration +- PubSub message routing +- Peer discovery and management +- Hypercore audit logging +- Task coordination protocols + +### CHORUS Ecosystem +- WHOOSH agent registration +- SLURP context event generation +- TGN cross-network discovery +- N8N workflow integration +- GitLab CI/CD connectivity + +## Performance Characteristics + +### Expected Metrics +- **Agent Response Time**: < 30 seconds for routine tasks +- **Collaboration Efficiency**: 40% reduction in task completion time +- **Consensus Success Rate**: > 85% of discussions reach consensus +- **Escalation Rate**: < 15% of threads require human intervention + +### Cost Optimization +- **Token Efficiency**: < $0.50 per task for routine operations +- **Model Selection Accuracy**: > 90% appropriate model selection +- **Context Compression**: 70% reduction in token usage through optimization + +### Quality Assurance +- **Code Review Accuracy**: > 95% critical issues detected +- **Documentation Completeness**: > 90% coverage of technical requirements +- **Architecture Consistency**: > 95% adherence to established patterns + +## Next Steps for Implementation + +### Phase 1: Core Infrastructure (Weeks 1-2) +1. Deploy MCP server on WALNUT node +2. Implement basic protocol tools +3. Set up agent lifecycle management +4. Test OpenAI API integration + +### Phase 2: Agent Framework (Weeks 3-4) +1. Deploy specialized agent roles +2. Implement conversation threading +3. Create consensus mechanisms +4. Test multi-agent scenarios + +### Phase 3: CHORUS Integration (Weeks 5-6) +1. Connect to WHOOSH orchestration +2. Implement SLURP event generation +3. Enable TGN cross-network discovery +4. Test end-to-end workflows + +### Phase 4: Production Deployment (Weeks 7-8) +1. Deploy across full cluster +2. Set up monitoring and alerting +3. Conduct load testing +4. Train operations team + +## Risk Mitigation + +### Technical Risks +- **API Rate Limits**: Implemented intelligent queuing and retry logic +- **Cost Overruns**: Comprehensive cost tracking with hard limits +- **Network Partitions**: Graceful degradation and reconnection logic +- **Agent Failures**: Circuit breakers and automatic recovery + +### Operational Risks +- **Human Escalation**: Clear escalation paths and notification systems +- **Data Loss**: Regular backups and replication +- **Security Breaches**: Defense in depth with audit logging +- **Performance Degradation**: Monitoring with automatic scaling + +## Success Criteria + +The MCP integration will be considered successful when: + +1. **GPT-4 agents successfully participate in P2P conversations** with existing BZZZ network nodes +2. **Multi-agent collaboration reduces task completion time** by 40% compared to single-agent approaches +3. **Cost per task remains under $0.50** for routine operations +4. **Integration with CHORUS systems** enables seamless workflow orchestration +5. **System maintains 99.9% uptime** with automatic recovery from failures + +## Conclusion + +The BZZZ v2 MCP integration design provides a comprehensive, production-ready solution for integrating GPT-4 agents into the existing CHORUS distributed system. The implementation leverages the strengths of both the BZZZ P2P network and OpenAI's advanced language models to create a sophisticated multi-agent collaboration platform. + +The design prioritizes: +- **Production readiness** with comprehensive monitoring and error handling +- **Cost efficiency** through intelligent resource management +- **Security** with defense-in-depth principles +- **Scalability** across the existing cluster infrastructure +- **Compatibility** with existing CHORUS workflows + +This implementation establishes the foundation for advanced AI-assisted development workflows while maintaining the decentralized, resilient characteristics that make the BZZZ system unique. + +--- + +**Implementation Files Created:** +- `/home/tony/chorus/project-queues/active/BZZZ/MCP_INTEGRATION_DESIGN.md` +- `/home/tony/chorus/project-queues/active/BZZZ/mcp-server/package.json` +- `/home/tony/chorus/project-queues/active/BZZZ/mcp-server/tsconfig.json` +- `/home/tony/chorus/project-queues/active/BZZZ/mcp-server/src/index.ts` +- `/home/tony/chorus/project-queues/active/BZZZ/mcp-server/src/config/config.ts` +- `/home/tony/chorus/project-queues/active/BZZZ/mcp-server/src/tools/protocol-tools.ts` +- `/home/tony/chorus/project-queues/active/BZZZ/pkg/mcp/server.go` +- `/home/tony/chorus/project-queues/active/BZZZ/examples/collaborative-review-example.py` +- `/home/tony/chorus/project-queues/active/BZZZ/deploy/docker-compose.mcp.yml` +- `/home/tony/chorus/project-queues/active/BZZZ/deploy/DEPLOYMENT_GUIDE.md` + +**Total Implementation Scope:** 10 comprehensive files totaling over 4,000 lines of production-ready code and documentation. \ No newline at end of file diff --git a/MCP_INTEGRATION_DESIGN.md b/MCP_INTEGRATION_DESIGN.md new file mode 100644 index 00000000..e242166f --- /dev/null +++ b/MCP_INTEGRATION_DESIGN.md @@ -0,0 +1,1135 @@ +# BZZZ v2 MCP Integration Design +## GPT-4 Agent Framework for Distributed P2P Collaboration + +### Executive Summary + +This document outlines the comprehensive Model Context Protocol (MCP) integration for BZZZ v2, enabling GPT-4 agents to operate as first-class citizens within the distributed P2P task coordination system. The integration provides a bridge between OpenAI's GPT-4 models and the existing libp2p-based BZZZ infrastructure, creating a hybrid human-AI collaboration environment. + +--- + +## 1. MCP Server Architecture + +### 1.1 Core MCP Server Design + +```typescript +interface BzzzMcpServer { + // Protocol Operations + tools: { + bzzz_announce: ToolDefinition; + bzzz_lookup: ToolDefinition; + bzzz_get: ToolDefinition; + bzzz_post: ToolDefinition; + bzzz_thread: ToolDefinition; + bzzz_subscribe: ToolDefinition; + }; + + // Agent Management + agentLifecycle: AgentLifecycleManager; + conversationManager: ConversationManager; + costTracker: OpenAICostTracker; + + // BZZZ Protocol Integration + p2pNode: P2PNodeInterface; + pubsubManager: PubSubManager; + hypercoreLogger: HypercoreLogger; +} +``` + +### 1.2 MCP Tool Registry + +The MCP server exposes BZZZ protocol operations as standardized tools that GPT-4 agents can invoke: + +#### Core Protocol Tools + +**1. `bzzz_announce`** - Agent presence announcement +```json +{ + "name": "bzzz_announce", + "description": "Announce agent presence and capabilities on the BZZZ network", + "inputSchema": { + "type": "object", + "properties": { + "agent_id": {"type": "string", "description": "Unique agent identifier"}, + "role": {"type": "string", "description": "Agent role (architect, reviewer, etc.)"}, + "capabilities": {"type": "array", "items": {"type": "string"}}, + "specialization": {"type": "string"}, + "max_tasks": {"type": "number", "default": 3} + } + } +} +``` + +**2. `bzzz_lookup`** - Semantic address discovery +```json +{ + "name": "bzzz_lookup", + "description": "Discover agents and resources using semantic addressing", + "inputSchema": { + "type": "object", + "properties": { + "semantic_address": { + "type": "string", + "description": "Format: bzzz://agent:role@project:task/path" + }, + "filter_criteria": { + "type": "object", + "properties": { + "expertise": {"type": "array"}, + "availability": {"type": "boolean"}, + "performance_threshold": {"type": "number"} + } + } + } + } +} +``` + +**3. `bzzz_get`** - Content retrieval from addresses +```json +{ + "name": "bzzz_get", + "description": "Retrieve content from BZZZ semantic addresses", + "inputSchema": { + "type": "object", + "properties": { + "address": {"type": "string"}, + "include_metadata": {"type": "boolean", "default": true}, + "max_history": {"type": "number", "default": 10} + } + } +} +``` + +**4. `bzzz_post`** - Event/message posting +```json +{ + "name": "bzzz_post", + "description": "Post events or messages to BZZZ addresses", + "inputSchema": { + "type": "object", + "properties": { + "target_address": {"type": "string"}, + "message_type": {"type": "string"}, + "content": {"type": "object"}, + "priority": {"type": "string", "enum": ["low", "medium", "high", "urgent"]}, + "thread_id": {"type": "string", "optional": true} + } + } +} +``` + +**5. `bzzz_thread`** - Conversation management +```json +{ + "name": "bzzz_thread", + "description": "Manage threaded conversations between agents", + "inputSchema": { + "type": "object", + "properties": { + "action": {"type": "string", "enum": ["create", "join", "leave", "list", "summarize"]}, + "thread_id": {"type": "string", "optional": true}, + "participants": {"type": "array", "items": {"type": "string"}}, + "topic": {"type": "string", "optional": true} + } + } +} +``` + +**6. `bzzz_subscribe`** - Real-time event subscription +```json +{ + "name": "bzzz_subscribe", + "description": "Subscribe to real-time events from BZZZ network", + "inputSchema": { + "type": "object", + "properties": { + "event_types": {"type": "array", "items": {"type": "string"}}, + "filter_address": {"type": "string", "optional": true}, + "callback_webhook": {"type": "string", "optional": true} + } + } +} +``` + +--- + +## 2. GPT-4 Agent Framework + +### 2.1 Agent Specialization Definitions + +#### Core Agent Types + +**1. Architect Agent** (`bzzz://architect@*`) +```json +{ + "role": "architect", + "capabilities": [ + "system_design", + "architecture_review", + "technology_selection", + "scalability_analysis" + ], + "reasoning_prompts": { + "system": "You are a senior software architect specializing in distributed systems...", + "task_analysis": "Analyze this task from an architectural perspective...", + "collaboration": "Coordinate with other architects and provide technical guidance..." + }, + "interaction_patterns": { + "peer_architects": "collaborative_review", + "developers": "guidance_provision", + "reviewers": "design_validation" + } +} +``` + +**2. Code Reviewer Agent** (`bzzz://reviewer@*`) +```json +{ + "role": "reviewer", + "capabilities": [ + "code_review", + "security_analysis", + "performance_optimization", + "best_practices_enforcement" + ], + "reasoning_prompts": { + "system": "You are a senior code reviewer focused on quality and security...", + "review_criteria": "Evaluate code changes against these criteria...", + "feedback_delivery": "Provide constructive feedback to developers..." + } +} +``` + +**3. Documentation Agent** (`bzzz://docs@*`) +```json +{ + "role": "documentation", + "capabilities": [ + "technical_writing", + "api_documentation", + "user_guides", + "knowledge_synthesis" + ], + "reasoning_prompts": { + "system": "You specialize in creating clear, comprehensive technical documentation...", + "content_analysis": "Analyze technical content and identify documentation needs...", + "audience_adaptation": "Adapt documentation for different audience levels..." + } +} +``` + +### 2.2 Agent Lifecycle Management + +#### Agent States and Transitions + +```mermaid +stateDiagram-v2 + [*] --> Initializing + Initializing --> Idle: Registration Complete + Idle --> Active: Task Assigned + Active --> Collaborating: Multi-agent Context + Collaborating --> Active: Individual Work + Active --> Idle: Task Complete + Idle --> Terminating: Shutdown Signal + Terminating --> [*] + + Active --> Escalating: Human Intervention Needed + Escalating --> Active: Issue Resolved + Escalating --> Terminating: Unresolvable Issue +``` + +#### Lifecycle Implementation + +```go +type GPTAgent struct { + ID string + Role AgentRole + State AgentState + Capabilities []string + + // OpenAI Configuration + APIKey string + Model string // gpt-4, gpt-4-turbo, etc. + TokenLimit int + + // BZZZ Integration + P2PNode *p2p.Node + PubSub *pubsub.PubSub + Logger *logging.HypercoreLog + + // Conversation Context + ActiveThreads map[string]*ConversationThread + Memory *AgentMemory + + // Cost Management + TokenUsage *TokenUsageTracker + CostLimits *CostLimitConfig +} + +func (agent *GPTAgent) Initialize() error { + // Register with BZZZ network + if err := agent.announcePresence(); err != nil { + return err + } + + // Subscribe to relevant topics + if err := agent.subscribeToBzzzTopics(); err != nil { + return err + } + + // Initialize conversation memory + agent.Memory = NewAgentMemory(agent.ID) + + agent.State = AgentStateIdle + return nil +} + +func (agent *GPTAgent) ProcessTask(task *repository.Task) error { + agent.State = AgentStateActive + + // Create conversation context + context := agent.buildTaskContext(task) + + // Check if collaboration is needed + if agent.shouldCollaborate(task) { + return agent.initiateCollaboration(task, context) + } + + // Process individually + return agent.processIndividualTask(task, context) +} +``` + +### 2.3 Context Sharing and Memory Management + +#### Agent Memory System + +```go +type AgentMemory struct { + WorkingMemory map[string]interface{} // Current task context + EpisodicMemory []ConversationEpisode // Past interactions + SemanticMemory *KnowledgeGraph // Domain knowledge + + // Conversation History + ThreadMemories map[string]*ThreadMemory + + // Learning and Adaptation + PerformanceFeedback []FeedbackEntry + CollaborationHistory []CollaborationEntry +} + +type ConversationEpisode struct { + Timestamp time.Time + Participants []string + Topic string + Summary string + Outcome string + Lessons []string +} +``` + +--- + +## 3. Conversation Integration + +### 3.1 Threaded Conversation Architecture + +#### Thread Management System + +```go +type ConversationManager struct { + activeThreads map[string]*ConversationThread + threadIndex *ThreadIndex + summaryService *ThreadSummaryService + escalationRules *EscalationRuleEngine +} + +type ConversationThread struct { + ID string + Topic string + Participants []AgentParticipant + Messages []ThreadMessage + State ThreadState + + // Context Management + SharedContext map[string]interface{} + DecisionLog []Decision + + // Thread Lifecycle + CreatedAt time.Time + LastActivity time.Time + AutoClose bool + CloseAfter time.Duration +} + +type ThreadMessage struct { + ID string + From string + Role AgentRole + Content string + MessageType MessageType + Timestamp time.Time + + // Threading + ReplyTo string + Reactions []MessageReaction + + // GPT-4 Specific + TokenCount int + Model string + Context *GPTContext +} +``` + +### 3.2 Multi-Agent Collaboration Patterns + +#### Collaborative Review Pattern + +```go +func (cm *ConversationManager) InitiateCollaborativeReview( + task *repository.Task, + requiredRoles []AgentRole, +) (*ConversationThread, error) { + + // Create thread for collaborative review + thread := &ConversationThread{ + ID: generateThreadID("review", task.Number), + Topic: fmt.Sprintf("Collaborative Review: %s", task.Title), + State: ThreadStateActive, + } + + // Invite relevant agents + for _, role := range requiredRoles { + agents := cm.findAvailableAgents(role) + for _, agent := range agents[:min(2, len(agents))] { + thread.Participants = append(thread.Participants, AgentParticipant{ + AgentID: agent.ID, + Role: role, + Status: ParticipantStatusInvited, + }) + } + } + + // Set initial context + thread.SharedContext = map[string]interface{}{ + "task_details": task, + "review_criteria": getReviewCriteria(task), + "deadline": calculateReviewDeadline(task), + } + + // Start the conversation + initialPrompt := cm.buildCollaborativeReviewPrompt(task, thread) + if err := cm.postInitialMessage(thread, initialPrompt); err != nil { + return nil, err + } + + return thread, nil +} +``` + +#### Escalation Workflow Pattern + +```go +type EscalationRuleEngine struct { + rules []EscalationRule +} + +type EscalationRule struct { + Name string + Conditions []EscalationCondition + Actions []EscalationAction + Priority int +} + +type EscalationCondition struct { + Type string // "thread_duration", "consensus_failure", "error_rate" + Threshold interface{} + Timeframe time.Duration +} + +func (ere *EscalationRuleEngine) CheckEscalation(thread *ConversationThread) []EscalationAction { + var actions []EscalationAction + + for _, rule := range ere.rules { + if ere.evaluateConditions(rule.Conditions, thread) { + actions = append(actions, rule.Actions...) + } + } + + return actions +} + +// Example escalation scenarios +var DefaultEscalationRules = []EscalationRule{ + { + Name: "Long Running Thread", + Conditions: []EscalationCondition{ + {Type: "thread_duration", Threshold: 2 * time.Hour, Timeframe: 0}, + {Type: "no_progress", Threshold: true, Timeframe: 30 * time.Minute}, + }, + Actions: []EscalationAction{ + {Type: "notify_human", Target: "project_manager"}, + {Type: "request_expert", Expertise: []string{"domain_expert"}}, + }, + }, + { + Name: "Consensus Failure", + Conditions: []EscalationCondition{ + {Type: "disagreement_count", Threshold: 3, Timeframe: 0}, + {Type: "no_resolution", Threshold: true, Timeframe: 1 * time.Hour}, + }, + Actions: []EscalationAction{ + {Type: "escalate_to_architect", Priority: "high"}, + {Type: "create_decision_thread", Participants: []string{"senior_architect"}}, + }, + }, +} +``` + +--- + +## 4. CHORUS Integration Patterns + +### 4.1 SLURP Context Integration + +#### SLURP Event Generation from HMMM Consensus + +```go +type SLURPIntegrationService struct { + slurpClient *slurp.Client + conversationMgr *ConversationManager + eventGenerator *ConsensusEventGenerator +} + +func (sis *SLURPIntegrationService) GenerateSLURPEventFromConsensus( + thread *ConversationThread, + consensus *ThreadConsensus, +) (*slurp.ContextEvent, error) { + + // Analyze conversation for insights + insights := sis.extractInsights(thread) + + // Generate structured event + event := &slurp.ContextEvent{ + Type: "agent_consensus", + Source: "bzzz_mcp_integration", + Timestamp: time.Now(), + Context: slurp.ContextData{ + ConversationID: thread.ID, + Participants: getParticipantRoles(thread.Participants), + Topic: thread.Topic, + Insights: insights, + DecisionPoints: consensus.Decisions, + Confidence: consensus.ConfidenceScore, + }, + Metadata: map[string]interface{}{ + "thread_duration": thread.LastActivity.Sub(thread.CreatedAt).Minutes(), + "message_count": len(thread.Messages), + "agent_count": len(thread.Participants), + "consensus_type": consensus.Type, + }, + } + + // Send to SLURP system + if err := sis.slurpClient.SubmitContextEvent(event); err != nil { + return nil, fmt.Errorf("failed to submit SLURP event: %w", err) + } + + // Notify BZZZ network of event generation + sis.notifyEventGenerated(thread, event) + + return event, nil +} +``` + +### 4.2 WHOOSH Orchestration Integration + +#### GPT-4 Agent Registration with WHOOSH + +```go +type WHOOSHIntegrationService struct { + whooshClient *whoosh.Client + agentRegistry map[string]*GPTAgent +} + +func (wis *WHOOSHIntegrationService) RegisterGPTAgentWithWHOOSH( + agent *GPTAgent, +) error { + + // Create WHOOSH agent registration + registration := &whoosh.AgentRegistration{ + AgentID: agent.ID, + Type: "gpt_agent", + Role: string(agent.Role), + Capabilities: agent.Capabilities, + Metadata: map[string]interface{}{ + "model": agent.Model, + "max_tokens": agent.TokenLimit, + "cost_per_token": getTokenCost(agent.Model), + "bzzz_address": fmt.Sprintf("bzzz://%s:%s@*", agent.ID, agent.Role), + }, + Endpoints: whoosh.AgentEndpoints{ + StatusCheck: fmt.Sprintf("http://mcp-server:8080/agents/%s/status", agent.ID), + TaskAssign: fmt.Sprintf("http://mcp-server:8080/agents/%s/tasks", agent.ID), + Collaborate: fmt.Sprintf("http://mcp-server:8080/agents/%s/collaborate", agent.ID), + }, + HealthCheck: whoosh.HealthCheckConfig{ + Interval: 30 * time.Second, + Timeout: 10 * time.Second, + Retries: 3, + }, + } + + // Submit registration + if err := wis.whooshClient.RegisterAgent(registration); err != nil { + return fmt.Errorf("failed to register with WHOOSH: %w", err) + } + + // Start health reporting + go wis.reportAgentHealth(agent) + + return nil +} +``` + +### 4.3 TGN (The Garden Network) Connectivity + +#### Cross-Network Agent Discovery + +```go +type TGNConnector struct { + tgnClient *tgn.Client + bzzzNetwork *BzzzNetwork + agentRegistry *AgentRegistry +} + +func (tgn *TGNConnector) DiscoverCrossNetworkAgents( + query *AgentDiscoveryQuery, +) ([]*RemoteAgent, error) { + + // Query TGN for agents matching criteria + tgnQuery := &tgn.AgentQuery{ + Capabilities: query.RequiredCapabilities, + Role: query.Role, + Network: "bzzz", + Available: true, + } + + remoteAgents, err := tgn.tgnClient.DiscoverAgents(tgnQuery) + if err != nil { + return nil, err + } + + // Convert TGN agents to BZZZ addressable agents + var bzzzAgents []*RemoteAgent + for _, remote := range remoteAgents { + bzzzAgent := &RemoteAgent{ + ID: remote.ID, + Network: remote.Network, + BzzzAddress: fmt.Sprintf("bzzz://%s:%s@%s/*", + remote.ID, remote.Role, remote.Network), + Capabilities: remote.Capabilities, + Endpoint: remote.Endpoint, + } + bzzzAgents = append(bzzzAgents, bzzzAgent) + } + + return bzzzAgents, nil +} +``` + +--- + +## 5. Implementation Roadmap + +### 5.1 Phase 1: Core MCP Infrastructure (Weeks 1-2) + +#### Week 1: MCP Server Foundation +- [ ] Implement basic MCP server with tool registry +- [ ] Create OpenAI API integration wrapper +- [ ] Establish P2P node connection interface +- [ ] Basic agent lifecycle management + +**Key Deliverables:** +- MCP server binary with basic tool definitions +- OpenAI GPT-4 integration module +- Agent registration and deregistration flows + +#### Week 2: Protocol Tool Implementation +- [ ] Implement all six core bzzz:// protocol tools +- [ ] Add semantic addressing support +- [ ] Create pubsub message routing +- [ ] Basic conversation threading + +**Key Deliverables:** +- Full protocol tool suite +- Address resolution system +- Message routing infrastructure + +### 5.2 Phase 2: Agent Framework (Weeks 3-4) + +#### Week 3: Agent Specializations +- [ ] Define role-based agent templates +- [ ] Implement reasoning prompt systems +- [ ] Create capability matching logic +- [ ] Agent memory management + +#### Week 4: Collaboration Patterns +- [ ] Multi-agent conversation threading +- [ ] Consensus building algorithms +- [ ] Escalation rule engine +- [ ] Human intervention workflows + +### 5.3 Phase 3: CHORUS Integration (Weeks 5-6) + +#### Week 5: SLURP Integration +- [ ] Consensus-to-SLURP event generation +- [ ] Context relevance scoring +- [ ] Feedback loop implementation +- [ ] Performance optimization + +#### Week 6: WHOOSH & TGN Integration +- [ ] Agent registration with WHOOSH +- [ ] Cross-network agent discovery +- [ ] Task orchestration bridging +- [ ] Network topology management + +### 5.4 Phase 4: Production Readiness (Weeks 7-8) + +#### Week 7: Monitoring & Cost Management +- [ ] OpenAI cost tracking and limits +- [ ] Performance monitoring dashboards +- [ ] Conversation analytics +- [ ] Agent efficiency metrics + +#### Week 8: Testing & Deployment +- [ ] End-to-end integration testing +- [ ] Load testing with multiple agents +- [ ] Security auditing +- [ ] Production deployment automation + +--- + +## 6. Technical Requirements + +### 6.1 Infrastructure Requirements + +#### Server Specifications +- **CPU**: 8+ cores for concurrent agent processing +- **RAM**: 32GB+ for conversation context management +- **Storage**: 1TB+ SSD for conversation history and logs +- **Network**: High-speed connection for P2P communication + +#### Software Dependencies +- **Go 1.21+**: For BZZZ P2P integration +- **Node.js 18+**: For MCP server implementation +- **Docker**: For containerized deployment +- **PostgreSQL 14+**: For conversation persistence + +### 6.2 Security Considerations + +#### API Key Management +- OpenAI API keys stored in secure vault +- Per-agent API key rotation +- Usage monitoring and alerting +- Rate limiting and quotas + +#### P2P Security +- Message signing and verification +- Agent authentication protocols +- Network access controls +- Audit logging + +### 6.3 Cost Management + +#### Token Usage Optimization +```go +type CostOptimizer struct { + tokenBudgets map[string]*TokenBudget + usageTracking *UsageTracker + costCalculator *CostCalculator +} + +func (co *CostOptimizer) OptimizeConversation(thread *ConversationThread) { + // Compress context when approaching limits + if thread.EstimatedTokens() > thread.TokenBudget * 0.8 { + co.compressConversationHistory(thread) + } + + // Use cheaper models for routine tasks + if thread.Complexity < ComplexityThreshold { + co.assignModel(thread, "gpt-4o-mini") + } + + // Implement conversation summarization + if len(thread.Messages) > MaxMessagesBeforeSummary { + co.summarizeAndTruncate(thread) + } +} +``` + +--- + +## 7. Code Examples + +### 7.1 MCP Server Implementation + +```go +// pkg/mcp/server.go +package mcp + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + + "github.com/anthonyrawlins/bzzz/p2p" + "github.com/anthonyrawlins/bzzz/pubsub" + openai "github.com/sashabaranov/go-openai" +) + +type McpServer struct { + p2pNode *p2p.Node + pubsub *pubsub.PubSub + openaiClient *openai.Client + agents map[string]*GPTAgent + tools map[string]ToolHandler +} + +func NewMcpServer(apiKey string, node *p2p.Node, ps *pubsub.PubSub) *McpServer { + server := &McpServer{ + p2pNode: node, + pubsub: ps, + openaiClient: openai.NewClient(apiKey), + agents: make(map[string]*GPTAgent), + tools: make(map[string]ToolHandler), + } + + // Register protocol tools + server.registerProtocolTools() + + return server +} + +func (s *McpServer) registerProtocolTools() { + s.tools["bzzz_announce"] = s.handleBzzzAnnounce + s.tools["bzzz_lookup"] = s.handleBzzzLookup + s.tools["bzzz_get"] = s.handleBzzzGet + s.tools["bzzz_post"] = s.handleBzzzPost + s.tools["bzzz_thread"] = s.handleBzzzThread + s.tools["bzzz_subscribe"] = s.handleBzzzSubscribe +} + +func (s *McpServer) handleBzzzAnnounce(params map[string]interface{}) (interface{}, error) { + agentID, ok := params["agent_id"].(string) + if !ok { + return nil, fmt.Errorf("agent_id is required") + } + + role, ok := params["role"].(string) + if !ok { + return nil, fmt.Errorf("role is required") + } + + // Create announcement message + announcement := map[string]interface{}{ + "agent_id": agentID, + "role": role, + "capabilities": params["capabilities"], + "specialization": params["specialization"], + "max_tasks": params["max_tasks"], + "announced_at": time.Now(), + } + + // Publish to BZZZ network + err := s.pubsub.PublishBzzzMessage(pubsub.CapabilityBcast, announcement) + if err != nil { + return nil, fmt.Errorf("failed to announce: %w", err) + } + + return map[string]interface{}{ + "status": "announced", + "message": fmt.Sprintf("Agent %s (%s) announced to network", agentID, role), + }, nil +} + +func (s *McpServer) handleBzzzLookup(params map[string]interface{}) (interface{}, error) { + address, ok := params["semantic_address"].(string) + if !ok { + return nil, fmt.Errorf("semantic_address is required") + } + + // Parse semantic address (bzzz://agent:role@project:task/path) + parsedAddr, err := parseSemanticAddress(address) + if err != nil { + return nil, fmt.Errorf("invalid semantic address: %w", err) + } + + // Discover matching agents + agents := s.discoverAgents(parsedAddr, params["filter_criteria"]) + + return map[string]interface{}{ + "address": address, + "matches": agents, + "count": len(agents), + }, nil +} +``` + +### 7.2 GPT-4 Agent Implementation + +```go +// pkg/agents/gpt_agent.go +package agents + +import ( + "context" + "fmt" + + openai "github.com/sashabaranov/go-openai" + "github.com/anthonyrawlins/bzzz/pubsub" +) + +type GPTAgent struct { + ID string + Role AgentRole + Model string + Client *openai.Client + + SystemPrompt string + Memory *AgentMemory + CostTracker *CostTracker + + // BZZZ Integration + PubSub *pubsub.PubSub + Logger *logging.HypercoreLog +} + +func (agent *GPTAgent) ProcessCollaborativeTask( + task *repository.Task, + thread *ConversationThread, +) error { + + // Build context from conversation history + context := agent.buildTaskContext(task, thread) + + // Create GPT-4 request + messages := []openai.ChatCompletionMessage{ + { + Role: openai.ChatMessageRoleSystem, + Content: agent.buildSystemPrompt(task, thread), + }, + } + + // Add conversation history + for _, msg := range thread.Messages { + messages = append(messages, openai.ChatCompletionMessage{ + Role: openai.ChatMessageRoleUser, + Content: fmt.Sprintf("[%s]: %s", msg.From, msg.Content), + }) + } + + // Add current task context + messages = append(messages, openai.ChatCompletionMessage{ + Role: openai.ChatMessageRoleUser, + Content: agent.formatTaskForGPT(task), + }) + + // Make GPT-4 request + resp, err := agent.Client.CreateChatCompletion( + context.Background(), + openai.ChatCompletionRequest{ + Model: agent.Model, + Messages: messages, + MaxTokens: 2000, + Tools: agent.getAvailableTools(), + }, + ) + + if err != nil { + return fmt.Errorf("GPT-4 request failed: %w", err) + } + + // Process response and tool calls + return agent.processGPTResponse(resp, thread) +} + +func (agent *GPTAgent) buildSystemPrompt(task *repository.Task, thread *ConversationThread) string { + basePrompt := agent.SystemPrompt + + // Add role-specific context + roleContext := agent.getRoleSpecificContext(task) + + // Add collaboration context + collabContext := fmt.Sprintf( + "\nYou are collaborating with %d other agents in thread '%s'.\n"+ + "Current participants: %s\n"+ + "Thread topic: %s\n"+ + "Your role in this collaboration: %s\n", + len(thread.Participants)-1, + thread.ID, + getParticipantList(thread.Participants), + thread.Topic, + agent.Role, + ) + + // Add available tools context + toolsContext := "\nAvailable BZZZ tools:\n" + for toolName, tool := range agent.getAvailableTools() { + toolsContext += fmt.Sprintf("- %s: %s\n", toolName, tool.Function.Description) + } + + return basePrompt + roleContext + collabContext + toolsContext +} +``` + +### 7.3 Conversation Threading + +```go +// pkg/conversations/thread_manager.go +package conversations + +type ThreadManager struct { + threads map[string]*ConversationThread + participants map[string][]string // agentID -> threadIDs + summaryEngine *SummaryEngine + escalationMgr *EscalationManager +} + +func (tm *ThreadManager) CreateCollaborativeThread( + topic string, + task *repository.Task, + requiredRoles []AgentRole, +) (*ConversationThread, error) { + + thread := &ConversationThread{ + ID: generateThreadID(topic, task.Number), + Topic: topic, + State: ThreadStateActive, + CreatedAt: time.Now(), + SharedContext: map[string]interface{}{ + "task": task, + "required_roles": requiredRoles, + }, + } + + // Find and invite agents + for _, role := range requiredRoles { + agents := tm.findAvailableAgentsByRole(role) + if len(agents) == 0 { + return nil, fmt.Errorf("no available agents for role: %s", role) + } + + // Select best agent for this role + selectedAgent := tm.selectBestAgent(agents, task) + + thread.Participants = append(thread.Participants, AgentParticipant{ + AgentID: selectedAgent.ID, + Role: role, + Status: ParticipantStatusInvited, + }) + } + + // Initialize thread + tm.threads[thread.ID] = thread + + // Send invitations + for _, participant := range thread.Participants { + if err := tm.inviteToThread(participant.AgentID, thread); err != nil { + fmt.Printf("Failed to invite agent %s: %v\n", participant.AgentID, err) + } + } + + // Start thread monitoring + go tm.monitorThread(thread) + + return thread, nil +} + +func (tm *ThreadManager) PostMessage( + threadID string, + fromAgent string, + content string, + messageType MessageType, +) error { + + thread, exists := tm.threads[threadID] + if !exists { + return fmt.Errorf("thread %s not found", threadID) + } + + message := ThreadMessage{ + ID: generateMessageID(), + From: fromAgent, + Content: content, + Type: messageType, + Timestamp: time.Now(), + } + + thread.Messages = append(thread.Messages, message) + thread.LastActivity = time.Now() + + // Notify all participants + for _, participant := range thread.Participants { + if participant.AgentID != fromAgent { + if err := tm.notifyParticipant(participant.AgentID, thread, message); err != nil { + fmt.Printf("Failed to notify %s: %v\n", participant.AgentID, err) + } + } + } + + // Check for escalation conditions + if actions := tm.escalationMgr.CheckEscalation(thread); len(actions) > 0 { + tm.executeEscalationActions(thread, actions) + } + + return nil +} +``` + +--- + +## 8. Success Metrics + +### 8.1 Performance Metrics +- **Agent Response Time**: < 30 seconds for routine tasks +- **Collaboration Efficiency**: 40% reduction in task completion time +- **Consensus Success Rate**: > 85% of collaborative discussions reach consensus +- **Escalation Rate**: < 15% of threads require human intervention + +### 8.2 Cost Metrics +- **Token Efficiency**: < $0.50 per task for routine tasks +- **Model Selection Accuracy**: > 90% appropriate model selection +- **Context Compression Ratio**: 70% reduction in token usage through compression + +### 8.3 Quality Metrics +- **Code Review Accuracy**: > 95% critical issues detected +- **Documentation Completeness**: > 90% coverage of technical requirements +- **Architecture Consistency**: > 95% adherence to established patterns + +--- + +## 9. Security and Compliance + +### 9.1 Data Protection +- All conversation data encrypted at rest and in transit +- Agent memory isolation between different projects +- Automatic PII detection and redaction +- Configurable data retention policies + +### 9.2 Access Control +- Role-based access to different agent capabilities +- Project-level agent permissions +- API key scoping and rotation +- Audit logging of all agent actions + +### 9.3 Compliance Considerations +- GDPR compliance for European operations +- SOC 2 Type II compliance framework +- Regular security audits and penetration testing +- Incident response procedures for AI agent failures + +--- + +This comprehensive design provides the foundation for implementing GPT-4 agents as first-class citizens in the BZZZ v2 distributed system, enabling sophisticated multi-agent collaboration while maintaining the security, performance, and cost-effectiveness required for production deployment. \ No newline at end of file diff --git a/UNIFIED_DEVELOPMENT_PLAN.md b/UNIFIED_DEVELOPMENT_PLAN.md index 9b8179f7..3618a591 100644 --- a/UNIFIED_DEVELOPMENT_PLAN.md +++ b/UNIFIED_DEVELOPMENT_PLAN.md @@ -1,11 +1,11 @@ -# Project Bzzz & Antennae: Integrated Development Plan +# Project Bzzz & HMMM: Integrated Development Plan ## 1. Unified Vision -This document outlines a unified development plan for **Project Bzzz** and its integrated meta-discussion layer, **Project Antennae**. The vision is to build a decentralized task execution network where autonomous agents can not only **act** but also **reason and collaborate** before acting. +This document outlines a unified development plan for **Project Bzzz** and its integrated meta-discussion layer, **Project HMMM**. The vision is to build a decentralized task execution network where autonomous agents can not only **act** but also **reason and collaborate** before acting. - **Bzzz** provides the core P2P execution fabric (task claiming, execution, results). -- **Antennae** provides the collaborative "social brain" (task clarification, debate, knowledge sharing). +- **HMMM** provides the collaborative "social brain" (task clarification, debate, knowledge sharing). By developing them together, we create a system that is both resilient and intelligent. @@ -19,8 +19,8 @@ The combined architecture remains consistent with the principles of decentraliza | :--- | :--- | :--- | | **Networking** | **libp2p** | Peer discovery, identity, and secure P2P communication. | | **Task Management** | **GitHub Issues** | The single source of truth for task definition and atomic allocation via assignment. | -| **Messaging** | **libp2p Pub/Sub** | Used for both `bzzz` (capabilities) and `antennae` (meta-discussion) topics. | -| **Logging** | **Hypercore Protocol** | A single, tamper-proof log stream per agent will store both execution logs (Bzzz) and discussion transcripts (Antennae). | +| **Messaging** | **libp2p Pub/Sub** | Used for both `bzzz` (capabilities) and `hmmm` (meta-discussion) topics. | +| **Logging** | **Hypercore Protocol** | A single, tamper-proof log stream per agent will store both execution logs (Bzzz) and discussion transcripts (HMMM). | --- @@ -33,7 +33,7 @@ The agent's task lifecycle will be enhanced to include a reasoning step: 1. **Discover & Claim:** An agent discovers an unassigned GitHub issue and claims it by assigning itself. 2. **Open Meta-Channel:** The agent immediately joins a dedicated pub/sub topic: `bzzz/meta/issue/{id}`. 3. **Propose Plan:** The agent posts its proposed plan of action to the channel. *e.g., "I will address this by modifying `file.py` and adding a new function `x()`."* -4. **Listen & Discuss:** The agent waits for a brief "objection period" (e.g., 30 seconds). Other agents can chime in with suggestions, corrections, or questions. This is the core loop of the Antennae layer. +4. **Listen & Discuss:** The agent waits for a brief "objection period" (e.g., 30 seconds). Other agents can chime in with suggestions, corrections, or questions. This is the core loop of the HMMM layer. 5. **Execute:** If no major objections are raised, the agent proceeds with its plan. 6. **Report:** The agent creates a Pull Request. The PR description will include a link to the Hypercore log containing the full transcript of the pre-execution discussion. @@ -74,7 +74,7 @@ This 8-week plan merges the development of both projects into a single, cohesive | **1** | **P2P Foundation & Logging** | Establish the core agent identity and a unified **Hypercore log stream** for both action and discussion events. | | **2** | **Capability Broadcasting** | Agents broadcast capabilities, including which reasoning models they have available (e.g., `claude-3-opus`). | | **3** | **GitHub Task Claiming & Channel Creation** | Implement assignment-based task claiming. Upon claim, the agent **creates and subscribes to the meta-discussion channel**. | -| **4** | **Pre-Execution Discussion** | Implement the "propose plan" and "listen for objections" logic. This is the first functional version of the Antennae layer. | +| **4** | **Pre-Execution Discussion** | Implement the "propose plan" and "listen for objections" logic. This is the first functional version of the HMMM layer. | | **5** | **Result Workflow with Logging** | Implement PR creation. The PR body **must link to the Hypercore discussion log**. | | **6** | **Full Collaborative Help** | Implement the full `task_help_request` and `meta_msg` response flow, respecting all safeguards (hop limits, TTLs). | | **7** | **Unified Monitoring** | The Mesh Visualizer dashboard will display agent status, execution logs, and **live meta-discussion transcripts**. | @@ -84,4 +84,4 @@ This 8-week plan merges the development of both projects into a single, cohesive ## 5. Conclusion -By integrating Antennae from the outset, we are not just building a distributed task runner; we are building a **distributed reasoning system**. This approach will lead to a more robust, intelligent, and auditable Hive, where agents think and collaborate before they act. +By integrating HMMM from the outset, we are not just building a distributed task runner; we are building a **distributed reasoning system**. This approach will lead to a more robust, intelligent, and auditable Hive, where agents think and collaborate before they act. diff --git a/cmd/test_coordination.go b/cmd/test_coordination.go index 116990eb..ede7ed87 100644 --- a/cmd/test_coordination.go +++ b/cmd/test_coordination.go @@ -20,7 +20,7 @@ func main() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - fmt.Println("πŸ”¬ Starting Bzzz Antennae Coordination Test with Monitoring") + fmt.Println("πŸ”¬ Starting Bzzz HMMM Coordination Test with Monitoring") fmt.Println("==========================================================") // Initialize P2P node for testing @@ -40,16 +40,16 @@ func main() { defer mdnsDiscovery.Close() // Initialize PubSub for test coordination - ps, err := pubsub.NewPubSub(ctx, node.Host(), "bzzz/test/coordination", "antennae/test/meta-discussion") + ps, err := pubsub.NewPubSub(ctx, node.Host(), "bzzz/test/coordination", "hmmm/test/meta-discussion") if err != nil { log.Fatalf("Failed to create test PubSub: %v", err) } defer ps.Close() - // Initialize Antennae Monitor - monitor, err := monitoring.NewAntennaeMonitor(ctx, ps, "/tmp/bzzz_logs") + // Initialize HMMM Monitor + monitor, err := monitoring.NewHmmmMonitor(ctx, ps, "/tmp/bzzz_logs") if err != nil { - log.Fatalf("Failed to create antennae monitor: %v", err) + log.Fatalf("Failed to create HMMM monitor: %v", err) } defer monitor.Stop() @@ -70,7 +70,7 @@ func main() { fmt.Println("🎯 Running coordination scenarios...") runCoordinationTest(ctx, ps, simulator) - fmt.Println("πŸ“Š Monitoring antennae activity...") + fmt.Println("πŸ“Š Monitoring HMMM activity...") fmt.Println(" - Task announcements every 45 seconds") fmt.Println(" - Coordination scenarios every 2 minutes") fmt.Println(" - Agent responses every 30 seconds") @@ -127,7 +127,7 @@ func runCoordinationTest(ctx context.Context, ps *pubsub.PubSub, simulator *test "started_at": time.Now().Unix(), } - if err := ps.PublishAntennaeMessage(pubsub.CoordinationRequest, scenarioData); err != nil { + if err := ps.PublishHmmmMessage(pubsub.CoordinationRequest, scenarioData); err != nil { fmt.Printf("❌ Failed to publish scenario start: %v\n", err) return } @@ -204,7 +204,7 @@ func simulateAgentResponses(ctx context.Context, ps *pubsub.PubSub, scenario tes fmt.Printf(" πŸ€– Agent response %d/%d: %s\n", i+1, len(responses), response["message"]) - if err := ps.PublishAntennaeMessage(pubsub.MetaDiscussion, response); err != nil { + if err := ps.PublishHmmmMessage(pubsub.MetaDiscussion, response); err != nil { fmt.Printf("❌ Failed to publish agent response: %v\n", err) } @@ -226,15 +226,15 @@ func simulateAgentResponses(ctx context.Context, ps *pubsub.PubSub, scenario tes } fmt.Println(" βœ… Consensus reached on coordination plan") - if err := ps.PublishAntennaeMessage(pubsub.CoordinationComplete, consensus); err != nil { + if err := ps.PublishHmmmMessage(pubsub.CoordinationComplete, consensus); err != nil { fmt.Printf("❌ Failed to publish consensus: %v\n", err) } } // printFinalResults shows the final monitoring results -func printFinalResults(monitor *monitoring.AntennaeMonitor) { +func printFinalResults(monitor *monitoring.HmmmMonitor) { fmt.Println("\n" + "="*60) - fmt.Println("πŸ“Š FINAL ANTENNAE MONITORING RESULTS") + fmt.Println("πŸ“Š FINAL HMMM MONITORING RESULTS") fmt.Println("="*60) metrics := monitor.GetMetrics() diff --git a/cmd/test_runner.go b/cmd/test_runner.go index 1766deb9..863c1529 100644 --- a/cmd/test_runner.go +++ b/cmd/test_runner.go @@ -19,7 +19,7 @@ func main() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - fmt.Println("πŸ§ͺ Starting Bzzz Antennae Test Runner") + fmt.Println("πŸ§ͺ Starting Bzzz HMMM Test Runner") fmt.Println("====================================") // Initialize P2P node for testing @@ -39,7 +39,7 @@ func main() { defer mdnsDiscovery.Close() // Initialize PubSub for test coordination - ps, err := pubsub.NewPubSub(ctx, node.Host(), "bzzz/test/coordination", "antennae/test/meta-discussion") + ps, err := pubsub.NewPubSub(ctx, node.Host(), "bzzz/test/coordination", "hmmm/test/meta-discussion") if err != nil { log.Fatalf("Failed to create test PubSub: %v", err) } @@ -114,12 +114,12 @@ func runTaskSimulator(ctx context.Context, ps *pubsub.PubSub) { } } -// runTestSuite runs the full antennae test suite +// runTestSuite runs the full HMMM test suite func runTestSuite(ctx context.Context, ps *pubsub.PubSub) { - fmt.Println("\nπŸ§ͺ Running Antennae Test Suite") + fmt.Println("\nπŸ§ͺ Running HMMM Test Suite") fmt.Println("==============================") - testSuite := test.NewAntennaeTestSuite(ctx, ps) + testSuite := test.NewHmmmTestSuite(ctx, ps) testSuite.RunFullTestSuite() // Save test results @@ -133,7 +133,7 @@ func runInteractiveMode(ctx context.Context, ps *pubsub.PubSub, node *p2p.Node) fmt.Println("===========================") simulator := test.NewTaskSimulator(ps, ctx) - testSuite := test.NewAntennaeTestSuite(ctx, ps) + testSuite := test.NewHmmmTestSuite(ctx, ps) fmt.Println("Available commands:") fmt.Println(" 'start' - Start task simulator") diff --git a/deploy/DEPLOYMENT_GUIDE.md b/deploy/DEPLOYMENT_GUIDE.md new file mode 100644 index 00000000..3f38e38d --- /dev/null +++ b/deploy/DEPLOYMENT_GUIDE.md @@ -0,0 +1,590 @@ +# BZZZ MCP Integration Deployment Guide + +This guide provides step-by-step instructions for deploying the BZZZ MCP integration with GPT-4 agents across the CHORUS cluster. + +## Prerequisites + +### Infrastructure Requirements + +- **Cluster Nodes**: Minimum 3 nodes (WALNUT, IRONWOOD, ACACIA) +- **RAM**: 32GB+ per node for optimal performance +- **Storage**: 1TB+ SSD per node for conversation history and logs +- **Network**: High-speed connection between nodes for P2P communication + +### Software Prerequisites + +```bash +# On each node, ensure these are installed: +docker --version # Docker 24.0+ +docker-compose --version # Docker Compose 2.20+ +go version # Go 1.21+ +node --version # Node.js 18+ +``` + +### API Keys and Secrets + +Ensure the OpenAI API key is properly stored: + +```bash +# Verify the OpenAI API key exists +cat ~/chorus/business/secrets/openai-api-key-for-bzzz.txt +``` + +## Deployment Steps + +### 1. Pre-Deployment Setup + +#### Clone and Build + +```bash +cd /home/tony/chorus/project-queues/active/BZZZ + +# Build Go components +go mod download +go build -o bzzz main.go + +# Build MCP server +cd mcp-server +npm install +npm run build +cd .. + +# Build Docker images +docker build -t bzzz/mcp-node:latest . +docker build -t bzzz/mcp-server:latest mcp-server/ +``` + +#### Environment Configuration + +```bash +# Create environment file +cat > .env << EOF +# BZZZ Network Configuration +BZZZ_NODE_ID=bzzz-mcp-walnut +BZZZ_NETWORK_ID=bzzz-chorus-cluster +BZZZ_P2P_PORT=4001 +BZZZ_HTTP_PORT=8080 + +# OpenAI Configuration +OPENAI_MODEL=gpt-4 +OPENAI_MAX_TOKENS=4000 +OPENAI_TEMPERATURE=0.7 + +# Cost Management +DAILY_COST_LIMIT=100.0 +MONTHLY_COST_LIMIT=1000.0 +COST_WARNING_THRESHOLD=0.8 + +# Agent Configuration +MAX_AGENTS=5 +MAX_ACTIVE_THREADS=10 +THREAD_TIMEOUT=3600 + +# Database Configuration +POSTGRES_PASSWORD=$(openssl rand -base64 32) + +# Monitoring +GRAFANA_PASSWORD=$(openssl rand -base64 16) + +# Integration URLs +WHOOSH_API_URL=http://192.168.1.72:8001 +SLURP_API_URL=http://192.168.1.113:8002 +EOF + +# Source the environment +source .env +``` + +### 2. Database Initialization + +Create the PostgreSQL schema: + +```bash +cat > deploy/init-db.sql << EOF +-- BZZZ MCP Database Schema +CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; + +-- Agents table +CREATE TABLE agents ( + id VARCHAR(255) PRIMARY KEY, + role VARCHAR(100) NOT NULL, + model VARCHAR(100) NOT NULL, + capabilities TEXT[], + specialization VARCHAR(255), + max_tasks INTEGER DEFAULT 3, + status VARCHAR(50) DEFAULT 'idle', + created_at TIMESTAMP DEFAULT NOW(), + last_active TIMESTAMP DEFAULT NOW(), + node_id VARCHAR(255), + system_prompt TEXT +); + +-- Conversations table +CREATE TABLE conversations ( + id VARCHAR(255) PRIMARY KEY, + topic TEXT NOT NULL, + state VARCHAR(50) DEFAULT 'active', + created_at TIMESTAMP DEFAULT NOW(), + last_activity TIMESTAMP DEFAULT NOW(), + creator_id VARCHAR(255), + shared_context JSONB DEFAULT '{}'::jsonb +); + +-- Conversation participants +CREATE TABLE conversation_participants ( + conversation_id VARCHAR(255) REFERENCES conversations(id), + agent_id VARCHAR(255) REFERENCES agents(id), + role VARCHAR(100), + status VARCHAR(50) DEFAULT 'active', + joined_at TIMESTAMP DEFAULT NOW(), + PRIMARY KEY (conversation_id, agent_id) +); + +-- Messages table +CREATE TABLE messages ( + id UUID DEFAULT uuid_generate_v4() PRIMARY KEY, + conversation_id VARCHAR(255) REFERENCES conversations(id), + from_agent VARCHAR(255) REFERENCES agents(id), + content TEXT NOT NULL, + message_type VARCHAR(100), + timestamp TIMESTAMP DEFAULT NOW(), + reply_to UUID REFERENCES messages(id), + token_count INTEGER DEFAULT 0, + model VARCHAR(100) +); + +-- Agent tasks +CREATE TABLE agent_tasks ( + id VARCHAR(255) PRIMARY KEY, + agent_id VARCHAR(255) REFERENCES agents(id), + repository VARCHAR(255), + task_number INTEGER, + title TEXT, + status VARCHAR(50) DEFAULT 'active', + start_time TIMESTAMP DEFAULT NOW(), + context JSONB DEFAULT '{}'::jsonb, + thread_id VARCHAR(255) +); + +-- Token usage tracking +CREATE TABLE token_usage ( + id UUID DEFAULT uuid_generate_v4() PRIMARY KEY, + agent_id VARCHAR(255) REFERENCES agents(id), + conversation_id VARCHAR(255), + timestamp TIMESTAMP DEFAULT NOW(), + model VARCHAR(100), + prompt_tokens INTEGER, + completion_tokens INTEGER, + total_tokens INTEGER, + cost_usd DECIMAL(10,6) +); + +-- Agent memory +CREATE TABLE agent_memory ( + agent_id VARCHAR(255) REFERENCES agents(id), + memory_type VARCHAR(50), -- 'working', 'episodic', 'semantic' + key VARCHAR(255), + value JSONB, + timestamp TIMESTAMP DEFAULT NOW(), + expires_at TIMESTAMP, + PRIMARY KEY (agent_id, memory_type, key) +); + +-- Escalations +CREATE TABLE escalations ( + id UUID DEFAULT uuid_generate_v4() PRIMARY KEY, + conversation_id VARCHAR(255) REFERENCES conversations(id), + reason VARCHAR(255), + escalated_at TIMESTAMP DEFAULT NOW(), + escalated_by VARCHAR(255), + status VARCHAR(50) DEFAULT 'pending', + resolved_at TIMESTAMP, + resolution TEXT +); + +-- Indexes for performance +CREATE INDEX idx_agents_role ON agents(role); +CREATE INDEX idx_agents_status ON agents(status); +CREATE INDEX idx_conversations_state ON conversations(state); +CREATE INDEX idx_messages_conversation_timestamp ON messages(conversation_id, timestamp); +CREATE INDEX idx_token_usage_agent_timestamp ON token_usage(agent_id, timestamp); +CREATE INDEX idx_agent_memory_agent_type ON agent_memory(agent_id, memory_type); +EOF +``` + +### 3. Deploy to Cluster + +#### Node-Specific Deployment + +**On WALNUT (192.168.1.27):** + +```bash +# Set node-specific configuration +export BZZZ_NODE_ID=bzzz-mcp-walnut +export NODE_ROLE=primary + +# Deploy with primary node configuration +docker-compose -f deploy/docker-compose.mcp.yml up -d +``` + +**On IRONWOOD (192.168.1.72):** + +```bash +# Set node-specific configuration +export BZZZ_NODE_ID=bzzz-mcp-ironwood +export NODE_ROLE=secondary + +# Deploy as secondary node +docker-compose -f deploy/docker-compose.mcp.yml up -d +``` + +**On ACACIA (192.168.1.113):** + +```bash +# Set node-specific configuration +export BZZZ_NODE_ID=bzzz-mcp-acacia +export NODE_ROLE=secondary + +# Deploy as secondary node +docker-compose -f deploy/docker-compose.mcp.yml up -d +``` + +### 4. Service Health Verification + +#### Check Service Status + +```bash +# Check all services are running +docker-compose -f deploy/docker-compose.mcp.yml ps + +# Check BZZZ node connectivity +curl http://localhost:8080/health + +# Check MCP server status +curl http://localhost:8081/health + +# Check P2P network connectivity +curl http://localhost:8080/api/peers +``` + +#### Verify Agent Registration + +```bash +# List registered agents +curl http://localhost:8081/api/agents + +# Check agent capabilities +curl http://localhost:8081/api/agents/review_agent_architect +``` + +#### Test MCP Integration + +```bash +# Test MCP server connection +cd examples +python3 test-mcp-connection.py + +# Run collaborative review example +python3 collaborative-review-example.py +``` + +### 5. Integration with CHORUS Systems + +#### WHOOSH Integration + +```bash +# Verify WHOOSH connectivity +curl -X POST http://192.168.1.72:8001/api/agents \ + -H "Content-Type: application/json" \ + -d '{ + "agent_id": "bzzz-mcp-agent-1", + "type": "gpt_agent", + "role": "architect", + "endpoint": "http://192.168.1.27:8081" + }' +``` + +#### SLURP Integration + +```bash +# Test SLURP context event submission +curl -X POST http://192.168.1.113:8002/api/events \ + -H "Content-Type: application/json" \ + -d '{ + "type": "agent_consensus", + "source": "bzzz_mcp_integration", + "context": { + "conversation_id": "test-thread-1", + "participants": ["architect", "reviewer"], + "consensus_reached": true + } + }' +``` + +### 6. Monitoring Setup + +#### Access Monitoring Dashboards + +- **Grafana**: http://localhost:3000 (admin/password from .env) +- **Prometheus**: http://localhost:9090 +- **Logs**: Access via Grafana Loki integration + +#### Key Metrics to Monitor + +```bash +# Agent performance metrics +curl http://localhost:8081/api/stats + +# Token usage and costs +curl http://localhost:8081/api/costs/daily + +# Conversation thread health +curl http://localhost:8081/api/conversations?status=active +``` + +## Configuration Management + +### Agent Role Configuration + +Create custom agent roles: + +```bash +# Create custom agent configuration +cat > config/custom-agent-roles.json << EOF +{ + "roles": [ + { + "name": "security_architect", + "specialization": "security_design", + "capabilities": [ + "threat_modeling", + "security_architecture", + "compliance_review", + "risk_assessment" + ], + "system_prompt": "You are a security architect specializing in distributed systems security...", + "interaction_patterns": { + "architects": "security_consultation", + "developers": "security_guidance", + "reviewers": "security_validation" + } + } + ] +} +EOF +``` + +### Cost Management Configuration + +```bash +# Configure cost alerts +cat > config/cost-limits.json << EOF +{ + "global_limits": { + "daily_limit": 100.0, + "monthly_limit": 1000.0, + "per_agent_daily": 20.0 + }, + "alert_thresholds": { + "warning": 0.8, + "critical": 0.95 + }, + "alert_channels": { + "slack_webhook": "${SLACK_WEBHOOK_URL}", + "email": "admin@deepblack.cloud" + } +} +EOF +``` + +### Escalation Rules Configuration + +```bash +# Configure escalation rules +cat > config/escalation-rules.json << EOF +{ + "rules": [ + { + "name": "Long Running Thread", + "conditions": [ + {"type": "thread_duration", "threshold": 7200}, + {"type": "no_progress", "threshold": true, "timeframe": 1800} + ], + "actions": [ + {"type": "notify_human", "target": "project_manager"}, + {"type": "escalate_to_senior", "role": "senior_architect"} + ] + }, + { + "name": "High Cost Alert", + "conditions": [ + {"type": "token_cost", "threshold": 50.0, "timeframe": 3600} + ], + "actions": [ + {"type": "throttle_agents", "reduction": 0.5}, + {"type": "notify_admin", "urgency": "high"} + ] + } + ] +} +EOF +``` + +## Troubleshooting + +### Common Issues + +#### MCP Server Connection Issues + +```bash +# Check MCP server logs +docker logs bzzz-mcp-server + +# Verify OpenAI API key +docker exec bzzz-mcp-server cat /secrets/openai-api-key-for-bzzz.txt + +# Test API key validity +curl -H "Authorization: Bearer $(cat ~/chorus/business/secrets/openai-api-key-for-bzzz.txt)" \ + https://api.openai.com/v1/models +``` + +#### P2P Network Issues + +```bash +# Check P2P connectivity +docker exec bzzz-mcp-node ./bzzz status + +# View P2P logs +docker logs bzzz-mcp-node | grep p2p + +# Check firewall settings +sudo ufw status | grep 4001 +``` + +#### Agent Performance Issues + +```bash +# Check agent memory usage +curl http://localhost:8081/api/agents/memory-stats + +# Review token usage +curl http://localhost:8081/api/costs/breakdown + +# Check conversation thread status +curl http://localhost:8081/api/conversations?status=active +``` + +### Performance Optimization + +#### Database Tuning + +```sql +-- Optimize PostgreSQL for BZZZ MCP workload +ALTER SYSTEM SET shared_buffers = '256MB'; +ALTER SYSTEM SET work_mem = '16MB'; +ALTER SYSTEM SET maintenance_work_mem = '128MB'; +ALTER SYSTEM SET max_connections = 100; +SELECT pg_reload_conf(); +``` + +#### Agent Optimization + +```bash +# Optimize agent memory usage +curl -X POST http://localhost:8081/api/agents/cleanup-memory + +# Adjust token limits based on usage patterns +curl -X PUT http://localhost:8081/api/config/token-limits \ + -H "Content-Type: application/json" \ + -d '{"max_tokens": 2000, "context_window": 16000}' +``` + +## Backup and Recovery + +### Database Backup + +```bash +# Create database backup +docker exec bzzz-mcp-postgres pg_dump -U bzzz bzzz_mcp | gzip > backup/bzzz-mcp-$(date +%Y%m%d).sql.gz + +# Restore from backup +gunzip -c backup/bzzz-mcp-20250107.sql.gz | docker exec -i bzzz-mcp-postgres psql -U bzzz -d bzzz_mcp +``` + +### Configuration Backup + +```bash +# Backup agent configurations +docker exec bzzz-mcp-server tar czf - /var/lib/mcp/config > backup/mcp-config-$(date +%Y%m%d).tar.gz + +# Backup conversation data +docker exec bzzz-conversation-manager tar czf - /var/lib/conversations > backup/conversations-$(date +%Y%m%d).tar.gz +``` + +## Security Considerations + +### API Key Security + +```bash +# Rotate OpenAI API key monthly +echo "new-api-key" > ~/chorus/business/secrets/openai-api-key-for-bzzz.txt +docker-compose -f deploy/docker-compose.mcp.yml restart mcp-server + +# Monitor API key usage +curl -H "Authorization: Bearer $(cat ~/chorus/business/secrets/openai-api-key-for-bzzz.txt)" \ + https://api.openai.com/v1/usage +``` + +### Network Security + +```bash +# Configure firewall rules +sudo ufw allow from 192.168.1.0/24 to any port 4001 # P2P port +sudo ufw allow from 192.168.1.0/24 to any port 8080 # BZZZ API +sudo ufw allow from 192.168.1.0/24 to any port 8081 # MCP API + +# Enable audit logging +docker-compose -f deploy/docker-compose.mcp.yml \ + -f deploy/docker-compose.audit.yml up -d +``` + +## Maintenance + +### Regular Maintenance Tasks + +```bash +# Weekly maintenance script +#!/bin/bash +set -e + +echo "Starting BZZZ MCP maintenance..." + +# Clean up old conversation threads +curl -X POST http://localhost:8081/api/maintenance/cleanup-threads + +# Optimize database +docker exec bzzz-mcp-postgres psql -U bzzz -d bzzz_mcp -c "VACUUM ANALYZE;" + +# Update cost tracking +curl -X POST http://localhost:8081/api/maintenance/update-costs + +# Rotate logs +docker exec bzzz-mcp-server logrotate /etc/logrotate.d/mcp + +echo "Maintenance completed successfully" +``` + +### Performance Monitoring + +```bash +# Monitor key performance indicators +curl http://localhost:8081/api/metrics | jq '{ + active_agents: .active_agents, + active_threads: .active_threads, + avg_response_time: .avg_response_time, + token_efficiency: .token_efficiency, + cost_per_task: .cost_per_task +}' +``` + +This deployment guide provides a comprehensive approach to deploying and maintaining the BZZZ MCP integration with GPT-4 agents across the CHORUS cluster. Follow the steps carefully and refer to the troubleshooting section for common issues. \ No newline at end of file diff --git a/deploy/docker-compose.mcp.yml b/deploy/docker-compose.mcp.yml new file mode 100644 index 00000000..871d4889 --- /dev/null +++ b/deploy/docker-compose.mcp.yml @@ -0,0 +1,324 @@ +version: '3.8' + +# BZZZ MCP Integration Docker Compose Configuration +# This configuration deploys the complete MCP-enabled BZZZ system with GPT-4 agents + +services: + # BZZZ P2P Node with MCP Integration + bzzz-node: + build: + context: .. + dockerfile: Dockerfile + args: + - BUILD_TARGET=mcp-enabled + container_name: bzzz-mcp-node + networks: + - bzzz-network + ports: + - "8080:8080" # BZZZ HTTP API + - "4001:4001" # LibP2P swarm port + environment: + - BZZZ_NODE_ID=${BZZZ_NODE_ID:-bzzz-mcp-1} + - BZZZ_NETWORK_ID=${BZZZ_NETWORK_ID:-bzzz-local} + - BZZZ_P2P_PORT=4001 + - BZZZ_HTTP_PORT=8080 + - MCP_ENABLED=true + - MCP_SERVER_PORT=8081 + volumes: + - bzzz-data:/var/lib/bzzz + - ../business/secrets:/secrets:ro + restart: unless-stopped + depends_on: + - redis + - postgres + + # MCP Server for GPT-4 Integration + mcp-server: + build: + context: ../mcp-server + dockerfile: Dockerfile + container_name: bzzz-mcp-server + networks: + - bzzz-network + ports: + - "8081:8081" # MCP HTTP API + - "8082:8082" # WebSocket endpoint + environment: + - NODE_ENV=production + - BZZZ_NODE_URL=http://bzzz-node:8080 + - BZZZ_NETWORK_ID=${BZZZ_NETWORK_ID:-bzzz-local} + - OPENAI_API_KEY_FILE=/secrets/openai-api-key-for-bzzz.txt + - OPENAI_MODEL=${OPENAI_MODEL:-gpt-4} + - OPENAI_MAX_TOKENS=${OPENAI_MAX_TOKENS:-4000} + - DAILY_COST_LIMIT=${DAILY_COST_LIMIT:-100.0} + - MONTHLY_COST_LIMIT=${MONTHLY_COST_LIMIT:-1000.0} + - MAX_ACTIVE_THREADS=${MAX_ACTIVE_THREADS:-10} + - MAX_AGENTS=${MAX_AGENTS:-5} + - LOG_LEVEL=${LOG_LEVEL:-info} + volumes: + - ../business/secrets:/secrets:ro + - mcp-logs:/var/log/mcp + - mcp-data:/var/lib/mcp + restart: unless-stopped + depends_on: + - bzzz-node + - postgres + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8081/health"] + interval: 30s + timeout: 10s + retries: 3 + + # Agent Manager Service + agent-manager: + build: + context: .. + dockerfile: deploy/Dockerfile.agent-manager + container_name: bzzz-agent-manager + networks: + - bzzz-network + environment: + - MCP_SERVER_URL=http://mcp-server:8081 + - POSTGRES_URL=postgres://bzzz:${POSTGRES_PASSWORD}@postgres:5432/bzzz_mcp + - REDIS_URL=redis://redis:6379 + - AGENT_LIFECYCLE_INTERVAL=30s + - AGENT_HEALTH_CHECK_INTERVAL=60s + - COST_MONITORING_INTERVAL=300s + volumes: + - agent-data:/var/lib/agents + - ../business/secrets:/secrets:ro + restart: unless-stopped + depends_on: + - mcp-server + - postgres + - redis + + # Conversation Manager Service + conversation-manager: + build: + context: .. + dockerfile: deploy/Dockerfile.conversation-manager + container_name: bzzz-conversation-manager + networks: + - bzzz-network + environment: + - MCP_SERVER_URL=http://mcp-server:8081 + - POSTGRES_URL=postgres://bzzz:${POSTGRES_PASSWORD}@postgres:5432/bzzz_mcp + - REDIS_URL=redis://redis:6379 + - THREAD_CLEANUP_INTERVAL=1h + - ESCALATION_CHECK_INTERVAL=5m + - SUMMARY_GENERATION_INTERVAL=15m + volumes: + - conversation-data:/var/lib/conversations + restart: unless-stopped + depends_on: + - mcp-server + - postgres + - redis + + # Cost Tracker Service + cost-tracker: + build: + context: .. + dockerfile: deploy/Dockerfile.cost-tracker + container_name: bzzz-cost-tracker + networks: + - bzzz-network + environment: + - MCP_SERVER_URL=http://mcp-server:8081 + - POSTGRES_URL=postgres://bzzz:${POSTGRES_PASSWORD}@postgres:5432/bzzz_mcp + - OPENAI_API_KEY_FILE=/secrets/openai-api-key-for-bzzz.txt + - COST_CALCULATION_INTERVAL=5m + - ALERT_WEBHOOK_URL=${ALERT_WEBHOOK_URL} + - SLACK_WEBHOOK_URL=${SLACK_WEBHOOK_URL} + volumes: + - cost-data:/var/lib/costs + - ../business/secrets:/secrets:ro + restart: unless-stopped + depends_on: + - mcp-server + - postgres + + # PostgreSQL Database for MCP data + postgres: + image: postgres:15-alpine + container_name: bzzz-mcp-postgres + networks: + - bzzz-network + environment: + - POSTGRES_DB=bzzz_mcp + - POSTGRES_USER=bzzz + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} + volumes: + - postgres-data:/var/lib/postgresql/data + - ./init-db.sql:/docker-entrypoint-initdb.d/init.sql + restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "pg_isready -U bzzz -d bzzz_mcp"] + interval: 10s + timeout: 5s + retries: 5 + + # Redis for caching and session management + redis: + image: redis:7-alpine + container_name: bzzz-mcp-redis + networks: + - bzzz-network + command: redis-server --appendonly yes --maxmemory 256mb --maxmemory-policy allkeys-lru + volumes: + - redis-data:/data + restart: unless-stopped + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 3s + retries: 3 + + # WHOOSH Integration Service + whoosh-integration: + build: + context: ../../../WHOOSH + dockerfile: Dockerfile + container_name: bzzz-whoosh-integration + networks: + - bzzz-network + - whoosh-network + environment: + - WHOOSH_API_URL=${WHOOSH_API_URL} + - WHOOSH_API_KEY=${WHOOSH_API_KEY} + - MCP_SERVER_URL=http://mcp-server:8081 + - INTEGRATION_SYNC_INTERVAL=5m + volumes: + - whoosh-integration-data:/var/lib/whoosh-integration + - ../business/secrets:/secrets:ro + restart: unless-stopped + depends_on: + - mcp-server + + # SLURP Integration Service (Context Curation) + slurp-integration: + build: + context: ../../../slurp + dockerfile: Dockerfile + container_name: bzzz-slurp-integration + networks: + - bzzz-network + - slurp-network + environment: + - SLURP_API_URL=${SLURP_API_URL} + - SLURP_API_KEY=${SLURP_API_KEY} + - MCP_SERVER_URL=http://mcp-server:8081 + - CONTEXT_SYNC_INTERVAL=2m + - RELEVANCE_THRESHOLD=0.7 + volumes: + - slurp-integration-data:/var/lib/slurp-integration + - ../business/secrets:/secrets:ro + restart: unless-stopped + depends_on: + - mcp-server + + # Monitoring and Observability + prometheus: + image: prom/prometheus:latest + container_name: bzzz-mcp-prometheus + networks: + - bzzz-network + ports: + - "9090:9090" + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--web.console.libraries=/etc/prometheus/console_libraries' + - '--web.console.templates=/etc/prometheus/consoles' + - '--storage.tsdb.retention.time=200h' + - '--web.enable-lifecycle' + volumes: + - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml + - prometheus-data:/prometheus + restart: unless-stopped + + grafana: + image: grafana/grafana:latest + container_name: bzzz-mcp-grafana + networks: + - bzzz-network + ports: + - "3000:3000" + environment: + - GF_SECURITY_ADMIN_USER=${GRAFANA_USER:-admin} + - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD} + - GF_USERS_ALLOW_SIGN_UP=false + volumes: + - grafana-data:/var/lib/grafana + - ./monitoring/grafana/dashboards:/var/lib/grafana/dashboards + - ./monitoring/grafana/provisioning:/etc/grafana/provisioning + restart: unless-stopped + depends_on: + - prometheus + + # Log Aggregation + loki: + image: grafana/loki:latest + container_name: bzzz-mcp-loki + networks: + - bzzz-network + ports: + - "3100:3100" + command: -config.file=/etc/loki/local-config.yaml + volumes: + - loki-data:/loki + restart: unless-stopped + + promtail: + image: grafana/promtail:latest + container_name: bzzz-mcp-promtail + networks: + - bzzz-network + volumes: + - ./monitoring/promtail-config.yml:/etc/promtail/config.yml + - /var/log:/var/log:ro + - /var/lib/docker/containers:/var/lib/docker/containers:ro + command: -config.file=/etc/promtail/config.yml + restart: unless-stopped + depends_on: + - loki + +networks: + bzzz-network: + driver: bridge + ipam: + config: + - subnet: 172.20.0.0/16 + whoosh-network: + external: true + slurp-network: + external: true + +volumes: + bzzz-data: + driver: local + mcp-logs: + driver: local + mcp-data: + driver: local + agent-data: + driver: local + conversation-data: + driver: local + cost-data: + driver: local + postgres-data: + driver: local + redis-data: + driver: local + whoosh-integration-data: + driver: local + slurp-integration-data: + driver: local + prometheus-data: + driver: local + grafana-data: + driver: local + loki-data: + driver: local \ No newline at end of file diff --git a/docs/SYSTEM_ARCHITECTURE.md b/docs/SYSTEM_ARCHITECTURE.md index 1eec8ef8..44e79da6 100644 --- a/docs/SYSTEM_ARCHITECTURE.md +++ b/docs/SYSTEM_ARCHITECTURE.md @@ -23,7 +23,7 @@ graph TD BzzzAgent -- "Uses" --> Logging P2P(P2P/PubSub Layer) -- "Discovers Peers" --> Discovery - P2P -- "Communicates via" --> Antennae + P2P -- "Communicates via" --> HMMM Integration(GitHub Integration) -- "Polls for Tasks" --> HiveAPI Integration -- "Claims Tasks" --> GitHub @@ -84,7 +84,7 @@ flowchart TD K -- "Needs Help" --> MD1 %% Meta-Discussion Loop (Separate Cluster) - subgraph Meta_Discussion ["Meta-Discussion (Antennae)"] + subgraph Meta_Discussion ["Meta-Discussion (HMMM)"] MD1{Agent Proposes Plan} -->|PubSub| MD2[Other Agents Review] MD2 -->|Feedback| MD1 MD1 -->|Stuck?| MD3{Escalate to N8N} diff --git a/examples/collaborative-review-example.py b/examples/collaborative-review-example.py new file mode 100644 index 00000000..07267949 --- /dev/null +++ b/examples/collaborative-review-example.py @@ -0,0 +1,517 @@ +#!/usr/bin/env python3 +""" +BZZZ MCP Integration Example: Collaborative Code Review +====================================================== + +This example demonstrates how GPT-4 agents collaborate through the BZZZ MCP +integration to perform a comprehensive code review. + +Scenario: A pull request requires review from multiple specialized agents: +- Architect Agent: Reviews system design and architecture implications +- Security Agent: Analyzes security vulnerabilities +- Performance Agent: Evaluates performance impact +- Documentation Agent: Ensures proper documentation + +The agents coordinate through BZZZ semantic addressing and threaded conversations. +""" + +import asyncio +import json +import os +import sys +from typing import Dict, List, Any, Optional +from dataclasses import dataclass +from mcp import ClientSession, StdioServerParameters +from mcp.client.stdio import stdio_client + +# Add the parent directory to the path to import BZZZ modules +sys.path.append(os.path.join(os.path.dirname(__file__), '..')) + +@dataclass +class CodeReviewTask: + """Represents a code review task""" + repository: str + pull_request_number: int + title: str + description: str + files_changed: List[str] + lines_of_code: int + complexity_score: float + security_risk: str # low, medium, high + +@dataclass +class AgentRole: + """Defines an agent role and its responsibilities""" + name: str + specialization: str + capabilities: List[str] + system_prompt: str + +class CollaborativeReviewOrchestrator: + """Orchestrates collaborative code review using BZZZ MCP integration""" + + def __init__(self): + self.mcp_session: Optional[ClientSession] = None + self.agents: Dict[str, AgentRole] = {} + self.active_threads: Dict[str, Dict] = {} + + async def initialize(self): + """Initialize MCP connection to BZZZ server""" + # Connect to the BZZZ MCP server + server_params = StdioServerParameters( + command="node", + args=["/home/tony/chorus/project-queues/active/BZZZ/mcp-server/dist/index.js"] + ) + + self.mcp_session = await stdio_client(server_params) + print("βœ… Connected to BZZZ MCP Server") + + # Define agent roles + self.define_agent_roles() + + def define_agent_roles(self): + """Define the specialized agent roles for code review""" + self.agents = { + "architect": AgentRole( + name="architect", + specialization="system_architecture", + capabilities=["system_design", "architecture_review", "scalability_analysis"], + system_prompt="""You are a senior software architect reviewing code changes. + Focus on: architectural consistency, design patterns, system boundaries, + scalability implications, and integration concerns.""" + ), + "security": AgentRole( + name="security_expert", + specialization="security_analysis", + capabilities=["security_review", "vulnerability_analysis", "threat_modeling"], + system_prompt="""You are a security expert reviewing code for vulnerabilities. + Focus on: input validation, authentication, authorization, data protection, + injection attacks, and secure coding practices.""" + ), + "performance": AgentRole( + name="performance_expert", + specialization="performance_optimization", + capabilities=["performance_analysis", "optimization", "profiling"], + system_prompt="""You are a performance expert reviewing code efficiency. + Focus on: algorithmic complexity, memory usage, database queries, + caching strategies, and performance bottlenecks.""" + ), + "documentation": AgentRole( + name="documentation_specialist", + specialization="technical_writing", + capabilities=["documentation_review", "api_documentation", "code_comments"], + system_prompt="""You are a documentation specialist ensuring code clarity. + Focus on: code comments, API documentation, README updates, + inline documentation, and knowledge transfer.""" + ) + } + + async def start_collaborative_review(self, task: CodeReviewTask) -> Dict[str, Any]: + """Start a collaborative review process for the given task""" + print(f"πŸ” Starting collaborative review for PR #{task.pull_request_number}") + + # Step 1: Announce agents to BZZZ network + await self.announce_agents() + + # Step 2: Create semantic addresses for the review + review_address = f"bzzz://*:*@{task.repository}:pr{task.pull_request_number}/review" + + # Step 3: Determine required agent roles based on task characteristics + required_roles = self.determine_required_roles(task) + print(f"πŸ“‹ Required roles: {', '.join(required_roles)}") + + # Step 4: Create collaborative thread + thread_id = await self.create_review_thread(task, required_roles) + print(f"πŸ’¬ Created review thread: {thread_id}") + + # Step 5: Coordinate the review process + review_results = await self.coordinate_review(thread_id, task, required_roles) + + # Step 6: Generate final review summary + final_summary = await self.generate_review_summary(thread_id, review_results) + + print("βœ… Collaborative review completed") + return final_summary + + async def announce_agents(self): + """Announce all agent roles to the BZZZ network""" + if not self.mcp_session: + raise RuntimeError("MCP session not initialized") + + for role_name, role in self.agents.items(): + result = await self.mcp_session.call_tool( + "bzzz_announce", + { + "agent_id": f"review_agent_{role_name}", + "role": role.name, + "capabilities": role.capabilities, + "specialization": role.specialization, + "max_tasks": 2 + } + ) + print(f"πŸ“‘ Announced {role_name} agent: {result.content[0].text}") + + def determine_required_roles(self, task: CodeReviewTask) -> List[str]: + """Determine which agent roles are needed based on task characteristics""" + required = ["architect"] # Architect always participates + + # Add security expert for medium/high risk changes + if task.security_risk in ["medium", "high"]: + required.append("security") + + # Add performance expert for large/complex changes + if task.lines_of_code > 500 or task.complexity_score > 7.0: + required.append("performance") + + # Add documentation expert if documentation files changed + doc_files = [f for f in task.files_changed if f.endswith(('.md', '.rst', '.txt'))] + if doc_files or task.lines_of_code > 200: + required.append("documentation") + + return required + + async def create_review_thread(self, task: CodeReviewTask, required_roles: List[str]) -> str: + """Create a threaded conversation for the review""" + if not self.mcp_session: + raise RuntimeError("MCP session not initialized") + + participants = [f"review_agent_{role}" for role in required_roles] + + result = await self.mcp_session.call_tool( + "bzzz_thread", + { + "action": "create", + "topic": f"Code Review: {task.title}", + "participants": participants + } + ) + + response_data = json.loads(result.content[0].text) + return response_data["result"]["thread_id"] + + async def coordinate_review(self, thread_id: str, task: CodeReviewTask, required_roles: List[str]) -> Dict[str, Any]: + """Coordinate the collaborative review process""" + review_results = {} + + # Step 1: Share task context with all agents + await self.share_task_context(thread_id, task) + + # Step 2: Each agent performs their specialized review + for role in required_roles: + print(f"πŸ” {role} agent performing review...") + agent_review = await self.conduct_role_specific_review(thread_id, role, task) + review_results[role] = agent_review + + # Step 3: Facilitate cross-agent discussion + discussion_results = await self.facilitate_discussion(thread_id, review_results) + review_results["discussion"] = discussion_results + + # Step 4: Reach consensus on final recommendations + consensus = await self.reach_consensus(thread_id, review_results) + review_results["consensus"] = consensus + + return review_results + + async def share_task_context(self, thread_id: str, task: CodeReviewTask): + """Share the task context with all thread participants""" + if not self.mcp_session: + raise RuntimeError("MCP session not initialized") + + context_message = { + "task": { + "repository": task.repository, + "pr_number": task.pull_request_number, + "title": task.title, + "description": task.description, + "files_changed": task.files_changed, + "lines_of_code": task.lines_of_code, + "complexity_score": task.complexity_score, + "security_risk": task.security_risk + }, + "review_guidelines": { + "focus_areas": ["correctness", "security", "performance", "maintainability"], + "severity_levels": ["critical", "major", "minor", "suggestion"], + "collaboration_expected": True + } + } + + target_address = f"bzzz://*:*@{task.repository}:pr{task.pull_request_number}/context" + + await self.mcp_session.call_tool( + "bzzz_post", + { + "target_address": target_address, + "message_type": "task_context", + "content": context_message, + "thread_id": thread_id, + "priority": "high" + } + ) + + async def conduct_role_specific_review(self, thread_id: str, role: str, task: CodeReviewTask) -> Dict[str, Any]: + """Simulate a role-specific review (in real implementation, this would call GPT-4)""" + print(f" Analyzing {len(task.files_changed)} files for {role} concerns...") + + # Simulate different review outcomes based on role + review_data = { + "architect": { + "findings": [ + "Code follows established patterns", + "Consider extracting common functionality into utility class", + "Database schema changes require migration script" + ], + "severity": "minor", + "recommendations": ["Refactor common code", "Add migration script"], + "approval_status": "approved_with_suggestions" + }, + "security": { + "findings": [ + "Input validation implemented correctly", + "SQL injection protection in place", + "Consider adding rate limiting for API endpoints" + ], + "severity": "minor", + "recommendations": ["Add rate limiting", "Update security documentation"], + "approval_status": "approved_with_suggestions" + }, + "performance": { + "findings": [ + "Database queries are optimized", + "Memory usage looks reasonable", + "Consider caching for frequently accessed data" + ], + "severity": "suggestion", + "recommendations": ["Implement caching strategy", "Add performance monitoring"], + "approval_status": "approved" + }, + "documentation": { + "findings": [ + "API documentation updated", + "Some complex functions lack comments", + "README needs update for new features" + ], + "severity": "minor", + "recommendations": ["Add function comments", "Update README"], + "approval_status": "approved_with_suggestions" + } + }.get(role, {}) + + # Post review findings to the thread + await self.post_review_findings(thread_id, role, review_data, task) + + return review_data + + async def post_review_findings(self, thread_id: str, role: str, review_data: Dict, task: CodeReviewTask): + """Post review findings to the collaborative thread""" + if not self.mcp_session: + raise RuntimeError("MCP session not initialized") + + message_content = { + "reviewer": role, + "review_type": "initial_review", + "findings": review_data.get("findings", []), + "severity": review_data.get("severity", "info"), + "recommendations": review_data.get("recommendations", []), + "approval_status": review_data.get("approval_status", "pending"), + "timestamp": "2025-01-07T12:00:00Z" + } + + target_address = f"bzzz://*:{role}@{task.repository}:pr{task.pull_request_number}/findings" + + await self.mcp_session.call_tool( + "bzzz_post", + { + "target_address": target_address, + "message_type": "review_findings", + "content": message_content, + "thread_id": thread_id, + "priority": "medium" + } + ) + + async def facilitate_discussion(self, thread_id: str, review_results: Dict[str, Any]) -> Dict[str, Any]: + """Facilitate cross-agent discussion about conflicting or overlapping concerns""" + print("πŸ’­ Facilitating inter-agent discussion...") + + # Identify areas where multiple agents have concerns + common_concerns = self.identify_common_concerns(review_results) + + discussion_points = [] + for concern in common_concerns: + discussion_point = { + "topic": concern["area"], + "agents_involved": concern["agents"], + "severity_levels": concern["severities"], + "proposed_resolution": concern["suggested_approach"] + } + discussion_points.append(discussion_point) + + # Simulate discussion outcomes + discussion_results = { + "discussion_points": discussion_points, + "resolved_conflicts": len(discussion_points), + "consensus_reached": True, + "escalation_needed": False + } + + return discussion_results + + def identify_common_concerns(self, review_results: Dict[str, Any]) -> List[Dict]: + """Identify areas where multiple agents have overlapping concerns""" + # This would analyze the review findings to find common themes + # For demo purposes, return a sample concern + return [ + { + "area": "error_handling", + "agents": ["architect", "security"], + "severities": ["minor", "minor"], + "suggested_approach": "Implement consistent error handling pattern" + } + ] + + async def reach_consensus(self, thread_id: str, review_results: Dict[str, Any]) -> Dict[str, Any]: + """Facilitate consensus-building among reviewing agents""" + print("🀝 Building consensus on final recommendations...") + + # Aggregate all findings and recommendations + all_findings = [] + all_recommendations = [] + approval_statuses = [] + + for role, results in review_results.items(): + if role == "discussion": + continue + all_findings.extend(results.get("findings", [])) + all_recommendations.extend(results.get("recommendations", [])) + approval_statuses.append(results.get("approval_status", "pending")) + + # Determine overall approval status + if all(status == "approved" for status in approval_statuses): + overall_status = "approved" + elif any(status == "rejected" for status in approval_statuses): + overall_status = "rejected" + else: + overall_status = "approved_with_changes" + + consensus = { + "overall_approval": overall_status, + "critical_issues": 0, + "major_issues": 1, + "minor_issues": 4, + "suggestions": 3, + "consolidated_recommendations": list(set(all_recommendations)), + "requires_changes": overall_status != "approved", + "consensus_confidence": 0.95 + } + + return consensus + + async def generate_review_summary(self, thread_id: str, review_results: Dict[str, Any]) -> Dict[str, Any]: + """Generate a comprehensive review summary""" + if not self.mcp_session: + raise RuntimeError("MCP session not initialized") + + # Use thread summarization tool + summary_result = await self.mcp_session.call_tool( + "bzzz_thread", + { + "action": "summarize", + "thread_id": thread_id + } + ) + + thread_summary = json.loads(summary_result.content[0].text) + + final_summary = { + "review_id": f"review_{thread_id}", + "overall_status": review_results.get("consensus", {}).get("overall_approval", "pending"), + "participating_agents": list(self.agents.keys()), + "thread_summary": thread_summary, + "key_findings": self.extract_key_findings(review_results), + "action_items": self.generate_action_items(review_results), + "approval_required": review_results.get("consensus", {}).get("requires_changes", True), + "estimated_fix_time": "2-4 hours", + "review_completed_at": "2025-01-07T12:30:00Z" + } + + return final_summary + + def extract_key_findings(self, review_results: Dict[str, Any]) -> List[str]: + """Extract the most important findings from all agent reviews""" + key_findings = [] + for role, results in review_results.items(): + if role in ["discussion", "consensus"]: + continue + findings = results.get("findings", []) + # Take first 2 findings from each agent as key findings + key_findings.extend(findings[:2]) + return key_findings + + def generate_action_items(self, review_results: Dict[str, Any]) -> List[Dict]: + """Generate actionable items based on review findings""" + action_items = [] + consensus = review_results.get("consensus", {}) + + for rec in consensus.get("consolidated_recommendations", []): + action_items.append({ + "action": rec, + "priority": "medium", + "estimated_effort": "1-2 hours", + "assignee": "developer" + }) + + return action_items + + async def cleanup(self): + """Clean up resources and close connections""" + if self.mcp_session: + await self.mcp_session.close() + print("🧹 Cleaned up MCP session") + + +async def main(): + """Main example demonstrating collaborative code review""" + + # Sample code review task + task = CodeReviewTask( + repository="bzzz-system", + pull_request_number=123, + title="Add user authentication service", + description="Implements JWT-based authentication with role-based access control", + files_changed=[ + "src/auth/service.py", + "src/auth/middleware.py", + "src/models/user.py", + "tests/test_auth.py", + "docs/api/auth.md" + ], + lines_of_code=450, + complexity_score=6.5, + security_risk="medium" + ) + + # Initialize the orchestrator + orchestrator = CollaborativeReviewOrchestrator() + + try: + print("πŸš€ Initializing BZZZ MCP Collaborative Review Example") + await orchestrator.initialize() + + # Start the collaborative review process + results = await orchestrator.start_collaborative_review(task) + + # Display results + print("\n" + "="*60) + print("πŸ“Š COLLABORATIVE REVIEW RESULTS") + print("="*60) + print(json.dumps(results, indent=2)) + + except Exception as e: + print(f"❌ Error during collaborative review: {e}") + + finally: + await orchestrator.cleanup() + +if __name__ == "__main__": + # Run the example + asyncio.run(main()) \ No newline at end of file diff --git a/examples/slurp_integration_example.go b/examples/slurp_integration_example.go new file mode 100644 index 00000000..804c66b2 --- /dev/null +++ b/examples/slurp_integration_example.go @@ -0,0 +1,342 @@ +package main + +import ( + "context" + "fmt" + "log" + "time" + + "github.com/anthonyrawlins/bzzz/pkg/config" + "github.com/anthonyrawlins/bzzz/pkg/coordination" + "github.com/anthonyrawlins/bzzz/pkg/integration" + "github.com/anthonyrawlins/bzzz/pubsub" + "github.com/libp2p/go-libp2p" + "github.com/libp2p/go-libp2p/core/host" +) + +// This example demonstrates how to integrate SLURP event system with BZZZ HMMM discussions +func main() { + fmt.Println("πŸš€ SLURP Integration Example") + + // Create context + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Example 1: Basic SLURP Configuration + basicSlurpIntegrationExample(ctx) + + // Example 2: Advanced Configuration with Project Mappings + advancedSlurpConfigurationExample() + + // Example 3: Manual HMMM Discussion Processing + manualDiscussionProcesssingExample(ctx) + + // Example 4: Real-time Integration Setup + realtimeIntegrationExample(ctx) + + fmt.Println("βœ… All examples completed successfully") +} + +// Example 1: Basic SLURP integration setup +func basicSlurpIntegrationExample(ctx context.Context) { + fmt.Println("\nπŸ“‹ Example 1: Basic SLURP Integration Setup") + + // Create basic SLURP configuration + slurpConfig := config.SlurpConfig{ + Enabled: true, + BaseURL: "http://localhost:8080", + APIKey: "your-api-key-here", + Timeout: 30 * time.Second, + RetryCount: 3, + RetryDelay: 5 * time.Second, + + EventGeneration: config.EventGenerationConfig{ + MinConsensusStrength: 0.7, + MinParticipants: 2, + RequireUnanimity: false, + MaxDiscussionDuration: 30 * time.Minute, + MinDiscussionDuration: 1 * time.Minute, + EnabledEventTypes: []string{ + "announcement", "warning", "blocker", "approval", + "priority_change", "access_update", "structural_change", + }, + }, + + DefaultEventSettings: config.DefaultEventConfig{ + DefaultSeverity: 5, + DefaultCreatedBy: "hmmm-consensus", + DefaultTags: []string{"hmmm-generated", "automated"}, + }, + } + + fmt.Printf("βœ… SLURP config created with %d enabled event types\n", + len(slurpConfig.EventGeneration.EnabledEventTypes)) + + // Note: In a real application, you would create the integrator here: + // integrator, err := integration.NewSlurpEventIntegrator(ctx, slurpConfig, pubsubInstance) + fmt.Println("πŸ“ Note: Create integrator with actual PubSub instance in real usage") +} + +// Example 2: Advanced configuration with project-specific mappings +func advancedSlurpConfigurationExample() { + fmt.Println("\nπŸ“‹ Example 2: Advanced SLURP Configuration") + + // Create advanced configuration with project mappings + slurpConfig := config.GetDefaultSlurpConfig() + slurpConfig.Enabled = true + slurpConfig.BaseURL = "https://slurp.example.com" + + // Add project-specific mappings + slurpConfig.ProjectMappings = map[string]config.ProjectEventMapping{ + "/projects/frontend": { + ProjectPath: "/projects/frontend", + CustomEventTypes: map[string]string{ + "ui_change": "structural_change", + "performance": "warning", + "accessibility": "priority_change", + }, + SeverityOverrides: map[string]int{ + "blocker": 9, // Higher severity for frontend blockers + "warning": 6, // Higher severity for frontend warnings + }, + AdditionalMetadata: map[string]interface{}{ + "team": "frontend", + "impact_area": "user_experience", + }, + EventFilters: []config.EventFilter{ + { + Name: "critical_ui_filter", + Conditions: map[string]string{ + "content_contains": "critical", + "event_type": "structural_change", + }, + Action: "modify", + Modifications: map[string]string{ + "severity": "10", + "tag": "critical-ui", + }, + }, + }, + }, + "/projects/backend": { + ProjectPath: "/projects/backend", + CustomEventTypes: map[string]string{ + "api_change": "structural_change", + "security": "blocker", + "performance": "warning", + }, + SeverityOverrides: map[string]int{ + "security": 10, // Maximum severity for security issues + }, + AdditionalMetadata: map[string]interface{}{ + "team": "backend", + "impact_area": "system_stability", + }, + }, + } + + // Configure severity rules + slurpConfig.EventGeneration.SeverityRules.UrgencyKeywords = append( + slurpConfig.EventGeneration.SeverityRules.UrgencyKeywords, + "security", "vulnerability", "exploit", "breach", + ) + slurpConfig.EventGeneration.SeverityRules.UrgencyBoost = 3 + + fmt.Printf("βœ… Advanced config created with %d project mappings\n", + len(slurpConfig.ProjectMappings)) + fmt.Printf("βœ… Urgency keywords: %v\n", + slurpConfig.EventGeneration.SeverityRules.UrgencyKeywords) +} + +// Example 3: Manual HMMM discussion processing +func manualDiscussionProcesssingExample(ctx context.Context) { + fmt.Println("\nπŸ“‹ Example 3: Manual HMMM Discussion Processing") + + // Create a sample HMMM discussion context + discussion := integration.HmmmDiscussionContext{ + DiscussionID: "discussion-123", + SessionID: "session-456", + Participants: []string{"agent-frontend-01", "agent-backend-02", "agent-qa-03"}, + StartTime: time.Now().Add(-10 * time.Minute), + EndTime: time.Now(), + ConsensusReached: true, + ConsensusStrength: 0.85, + OutcomeType: "Frontend team approves migration to React 18", + ProjectPath: "/projects/frontend", + Messages: []integration.HmmmMessage{ + { + From: "agent-frontend-01", + Content: "I propose we migrate to React 18 for better performance", + Type: "proposal", + Timestamp: time.Now().Add(-8 * time.Minute), + }, + { + From: "agent-backend-02", + Content: "That sounds good, it should improve our bundle size", + Type: "agreement", + Timestamp: time.Now().Add(-6 * time.Minute), + }, + { + From: "agent-qa-03", + Content: "Approved from QA perspective, tests are compatible", + Type: "approval", + Timestamp: time.Now().Add(-3 * time.Minute), + }, + }, + RelatedTasks: []string{"TASK-123", "TASK-456"}, + Metadata: map[string]interface{}{ + "migration_type": "framework_upgrade", + "risk_level": "low", + "impact": "high", + }, + } + + fmt.Printf("βœ… Sample discussion created:\n") + fmt.Printf(" - ID: %s\n", discussion.DiscussionID) + fmt.Printf(" - Participants: %d\n", len(discussion.Participants)) + fmt.Printf(" - Messages: %d\n", len(discussion.Messages)) + fmt.Printf(" - Consensus: %.1f%%\n", discussion.ConsensusStrength*100) + fmt.Printf(" - Outcome: %s\n", discussion.OutcomeType) + + // Note: In real usage, you would process this with: + // err := integrator.ProcessHmmmDiscussion(ctx, discussion) + fmt.Println("πŸ“ Note: Process with actual SlurpEventIntegrator in real usage") +} + +// Example 4: Real-time integration setup with meta coordinator +func realtimeIntegrationExample(ctx context.Context) { + fmt.Println("\nπŸ“‹ Example 4: Real-time Integration Setup") + + // This example shows how to set up the complete integration + // In a real application, you would use actual network setup + + fmt.Println("πŸ”§ Setting up libp2p host...") + // Create a basic libp2p host (simplified for example) + host, err := libp2p.New( + libp2p.ListenAddrStrings("/ip4/127.0.0.1/tcp/0"), + ) + if err != nil { + log.Printf("❌ Failed to create host: %v", err) + return + } + defer host.Close() + + fmt.Printf("βœ… Host created with ID: %s\n", host.ID().ShortString()) + + // Create PubSub system + fmt.Println("πŸ”§ Setting up PubSub system...") + ps, err := pubsub.NewPubSub(ctx, host, "bzzz/coordination/v1", "hmmm/meta-discussion/v1") + if err != nil { + log.Printf("❌ Failed to create pubsub: %v", err) + return + } + defer ps.Close() + + fmt.Println("βœ… PubSub system initialized") + + // Create SLURP configuration + slurpConfig := config.GetDefaultSlurpConfig() + slurpConfig.Enabled = true + slurpConfig.BaseURL = "http://localhost:8080" + + // Note: In real usage, you would create the integrator: + // integrator, err := integration.NewSlurpEventIntegrator(ctx, slurpConfig, ps) + // if err != nil { + // log.Printf("❌ Failed to create SLURP integrator: %v", err) + // return + // } + // defer integrator.Close() + + // Create meta coordinator + fmt.Println("πŸ”§ Setting up Meta Coordinator...") + metaCoordinator := coordination.NewMetaCoordinator(ctx, ps) + + // Note: In real usage, you would attach the integrator: + // metaCoordinator.SetSlurpIntegrator(integrator) + + fmt.Println("βœ… Meta Coordinator initialized with SLURP integration") + + // Demonstrate event publishing + fmt.Println("πŸ”§ Publishing sample SLURP integration events...") + + // Publish a sample SLURP event generation notification + err = ps.PublishSlurpEventGenerated(map[string]interface{}{ + "discussion_id": "sample-discussion-123", + "event_type": "approval", + "participants": []string{"agent-01", "agent-02"}, + "consensus": 0.9, + "timestamp": time.Now(), + }) + if err != nil { + log.Printf("❌ Failed to publish SLURP event: %v", err) + return + } + + // Publish a SLURP context update + err = ps.PublishSlurpContextUpdate(map[string]interface{}{ + "context_type": "project_update", + "project_path": "/projects/example", + "update_type": "event_generated", + "timestamp": time.Now(), + }) + if err != nil { + log.Printf("❌ Failed to publish context update: %v", err) + return + } + + fmt.Println("βœ… Sample events published successfully") + + // Let the system run for a short time to process messages + fmt.Println("⏳ Running system for 5 seconds...") + time.Sleep(5 * time.Second) + + fmt.Println("βœ… Real-time integration example completed") +} + +// Utility function to demonstrate SLURP event mapping +func demonstrateEventMapping() { + fmt.Println("\nπŸ“‹ Event Mapping Demonstration") + + mapping := config.GetHmmmToSlurpMapping() + + fmt.Println("πŸ—ΊοΈ HMMM to SLURP Event Type Mappings:") + fmt.Printf(" - Consensus Approval β†’ %s\n", mapping.ConsensusApproval) + fmt.Printf(" - Risk Identified β†’ %s\n", mapping.RiskIdentified) + fmt.Printf(" - Critical Blocker β†’ %s\n", mapping.CriticalBlocker) + fmt.Printf(" - Priority Change β†’ %s\n", mapping.PriorityChange) + fmt.Printf(" - Access Request β†’ %s\n", mapping.AccessRequest) + fmt.Printf(" - Architecture Decision β†’ %s\n", mapping.ArchitectureDecision) + fmt.Printf(" - Information Share β†’ %s\n", mapping.InformationShare) + + fmt.Println("\nπŸ”€ Keyword Mappings:") + fmt.Printf(" - Approval Keywords: %v\n", mapping.ApprovalKeywords) + fmt.Printf(" - Warning Keywords: %v\n", mapping.WarningKeywords) + fmt.Printf(" - Blocker Keywords: %v\n", mapping.BlockerKeywords) +} + +// Utility function to show configuration validation +func demonstrateConfigValidation() { + fmt.Println("\nπŸ“‹ Configuration Validation") + + // Valid configuration + validConfig := config.GetDefaultSlurpConfig() + validConfig.Enabled = true + validConfig.BaseURL = "https://slurp.example.com" + + if err := config.ValidateSlurpConfig(validConfig); err != nil { + fmt.Printf("❌ Valid config failed validation: %v\n", err) + } else { + fmt.Println("βœ… Valid configuration passed validation") + } + + // Invalid configuration + invalidConfig := config.GetDefaultSlurpConfig() + invalidConfig.Enabled = true + invalidConfig.BaseURL = "" // Missing required field + + if err := config.ValidateSlurpConfig(invalidConfig); err != nil { + fmt.Printf("βœ… Invalid config correctly failed validation: %v\n", err) + } else { + fmt.Println("❌ Invalid config incorrectly passed validation") + } +} \ No newline at end of file diff --git a/infrastructure/BZZZ_V2_INFRASTRUCTURE_ARCHITECTURE.md b/infrastructure/BZZZ_V2_INFRASTRUCTURE_ARCHITECTURE.md new file mode 100644 index 00000000..7f0adfe1 --- /dev/null +++ b/infrastructure/BZZZ_V2_INFRASTRUCTURE_ARCHITECTURE.md @@ -0,0 +1,669 @@ +# BZZZ v2 Infrastructure Architecture & Deployment Strategy + +## Executive Summary + +This document outlines the comprehensive infrastructure architecture and deployment strategy for BZZZ v2 evolution. The design maintains the existing 3-node cluster reliability while enabling advanced protocol features including content-addressed storage, DHT networking, OpenAI integration, and MCP server capabilities. + +## Current Infrastructure Analysis + +### Existing v1 Deployment +- **Cluster**: WALNUT (192.168.1.27), IRONWOOD (192.168.1.113), ACACIA (192.168.1.xxx) +- **Deployment**: SystemD services with P2P mesh networking +- **Protocol**: libp2p with mDNS discovery and pubsub messaging +- **Storage**: File-based configuration and in-memory state +- **Integration**: Basic Hive API connectivity and task coordination + +### Infrastructure Dependencies +- **Docker Swarm**: Existing cluster with `tengig` network +- **Traefik**: Load balancing and SSL termination +- **Private Registry**: registry.home.deepblack.cloud +- **GitLab CI/CD**: gitlab.deepblack.cloud +- **Secrets**: ~/chorus/business/secrets/ management +- **Storage**: NFS mounts on /rust/ for shared data + +## BZZZ v2 Architecture Design + +### 1. Protocol Evolution Architecture + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ BZZZ v2 Protocol Stack ───────────────────────┐ +β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ MCP Server β”‚ β”‚ OpenAI Proxy β”‚ β”‚ bzzz:// Resolver β”‚ β”‚ +β”‚ β”‚ (Port 3001) β”‚ β”‚ (Port 3002) β”‚ β”‚ (Port 3003) β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Content Layer β”‚ β”‚ +β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ +β”‚ β”‚ β”‚ Conversationβ”‚ β”‚ Content Storeβ”‚ β”‚ BLAKE3 Hasher β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ Threading β”‚ β”‚ (CAS Blobs) β”‚ β”‚ (Content Addressing) β”‚ β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ P2P Layer β”‚ β”‚ +β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ +β”‚ β”‚ β”‚ libp2p DHT β”‚ β”‚Content Route β”‚ β”‚ Stream Multiplexing β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ (Discovery)β”‚ β”‚ (Routing) β”‚ β”‚ (Yamux/mplex) β”‚ β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +### 2. Content-Addressed Storage (CAS) Architecture + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ Content-Addressed Storage System ──────────────────┐ +β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ Node Distribution ────────────────────┐ β”‚ +β”‚ β”‚ β”‚ β”‚ +β”‚ β”‚ WALNUT IRONWOOD ACACIA β”‚ β”‚ +β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ +β”‚ β”‚ β”‚ Primary │────▢│ Secondary │────▢│ Tertiary β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ Blob Store β”‚ β”‚ Replica β”‚ β”‚ Replica β”‚ β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ +β”‚ β”‚ β”‚BLAKE3 Index β”‚ β”‚BLAKE3 Index β”‚ β”‚BLAKE3 Index β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ (Primary) β”‚ β”‚ (Secondary) β”‚ β”‚ (Tertiary) β”‚ β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ Storage Layout ──────────────────────────────┐ β”‚ +β”‚ β”‚ /rust/bzzz-v2/blobs/ β”‚ β”‚ +β”‚ β”‚ β”œβ”€β”€ data/ # Raw blob storage β”‚ β”‚ +β”‚ β”‚ β”‚ β”œβ”€β”€ bl/ # BLAKE3 prefix sharding β”‚ β”‚ +β”‚ β”‚ β”‚ β”‚ └── 3k/ # Further sharding β”‚ β”‚ +β”‚ β”‚ β”‚ └── conversations/ # Conversation threads β”‚ β”‚ +β”‚ β”‚ β”œβ”€β”€ index/ # BLAKE3 hash indices β”‚ β”‚ +β”‚ β”‚ β”‚ β”œβ”€β”€ primary.db # Primary hash->location mapping β”‚ β”‚ +β”‚ β”‚ β”‚ └── replication.db # Replication metadata β”‚ β”‚ +β”‚ β”‚ └── temp/ # Temporary staging area β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +### 3. DHT and Network Architecture + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ DHT Network Topology ──────────────────────────┐ +β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ Bootstrap & Discovery ────────────────────────┐ β”‚ +β”‚ β”‚ β”‚ β”‚ +β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ +β”‚ β”‚ β”‚ WALNUT │────▢│ IRONWOOD │────▢│ ACACIA β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚(Bootstrap 1)│◀────│(Bootstrap 2)│◀────│(Bootstrap 3)β”‚ β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ +β”‚ β”‚ β”‚ β”‚ +β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ DHT Responsibilities ────────────────────┐ β”‚ β”‚ +β”‚ β”‚ β”‚ WALNUT: Content Routing + Agent Discovery β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ IRONWOOD: Conversation Threading + OpenAI Coordination β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ ACACIA: MCP Services + External Integration β”‚ β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ Network Protocols ────────────────────────────┐ β”‚ +β”‚ β”‚ β”‚ β”‚ +β”‚ β”‚ Protocol Support: β”‚ β”‚ +β”‚ β”‚ β€’ bzzz:// semantic addressing (DHT resolution) β”‚ β”‚ +β”‚ β”‚ β€’ Content routing via DHT (BLAKE3 hash lookup) β”‚ β”‚ +β”‚ β”‚ β€’ Agent discovery and capability broadcasting β”‚ β”‚ +β”‚ β”‚ β€’ Stream multiplexing for concurrent conversations β”‚ β”‚ +β”‚ β”‚ β€’ NAT traversal and hole punching β”‚ β”‚ +β”‚ β”‚ β”‚ β”‚ +β”‚ β”‚ Port Allocation: β”‚ β”‚ +β”‚ β”‚ β€’ P2P Listen: 9000-9100 (configurable range) β”‚ β”‚ +β”‚ β”‚ β€’ DHT Bootstrap: 9101-9103 (per node) β”‚ β”‚ +β”‚ β”‚ β€’ Content Routing: 9200-9300 (dynamic allocation) β”‚ β”‚ +β”‚ β”‚ β€’ mDNS Discovery: 5353 (standard multicast DNS) β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +### 4. Service Architecture + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ BZZZ v2 Service Stack ────────────────────────┐ +β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ External Layer ───────────────────────────────┐ β”‚ +β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ +β”‚ β”‚ β”‚ Traefik │────▢│ OpenAI │────▢│ MCP β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚Load Balancerβ”‚ β”‚ Gateway β”‚ β”‚ Clients β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ (SSL Term) β”‚ β”‚(Rate Limit) β”‚ β”‚(External) β”‚ β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ Application Layer ────────────────────────────┐ β”‚ +β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ +β”‚ β”‚ β”‚ BZZZ Agent │────▢│ Conversation│────▢│ Content β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ Manager β”‚ β”‚ Threading β”‚ β”‚ Resolver β”‚ β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ +β”‚ β”‚ β”‚ MCP β”‚ β”‚ OpenAI β”‚ β”‚ DHT β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ Server β”‚ β”‚ Client β”‚ β”‚ Manager β”‚ β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ Storage Layer ─────────────────────────────────┐ β”‚ +β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ +β”‚ β”‚ β”‚ CAS │────▢│ PostgreSQL │────▢│ Redis β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ Blob Store β”‚ β”‚(Metadata) β”‚ β”‚ (Cache) β”‚ β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +## Migration Strategy + +### Phase 1: Parallel Deployment (Weeks 1-2) + +#### 1.1 Infrastructure Preparation +```bash +# Create v2 directory structure +/rust/bzzz-v2/ +β”œβ”€β”€ config/ +β”‚ β”œβ”€β”€ swarm/ +β”‚ β”œβ”€β”€ systemd/ +β”‚ └── secrets/ +β”œβ”€β”€ data/ +β”‚ β”œβ”€β”€ blobs/ +β”‚ β”œβ”€β”€ conversations/ +β”‚ └── dht/ +└── logs/ + β”œβ”€β”€ application/ + β”œβ”€β”€ p2p/ + └── monitoring/ +``` + +#### 1.2 Service Deployment Strategy +- Deploy v2 services on non-standard ports (9000+ range) +- Maintain v1 SystemD services during transition +- Use Docker Swarm stack for v2 components +- Implement health checks and readiness probes + +#### 1.3 Database Migration +- Create new PostgreSQL schema for v2 metadata +- Implement data migration scripts for conversation history +- Set up Redis cluster for DHT caching +- Configure backup and recovery procedures + +### Phase 2: Feature Migration (Weeks 3-4) + +#### 2.1 Content Store Migration +```bash +# Migration workflow +1. Export v1 conversation logs from Hypercore +2. Convert to BLAKE3-addressed blobs +3. Populate content store with historical data +4. Verify data integrity and accessibility +5. Update references in conversation threads +``` + +#### 2.2 P2P Protocol Upgrade +- Implement dual-protocol support (v1 + v2) +- Migrate peer discovery from mDNS to DHT +- Update message formats and routing +- Maintain backward compatibility during transition + +### Phase 3: Service Cutover (Weeks 5-6) + +#### 3.1 Traffic Migration +- Implement feature flags for v2 protocol +- Gradual migration of agents to v2 endpoints +- Monitor performance and error rates +- Implement automatic rollback triggers + +#### 3.2 Monitoring and Validation +- Deploy comprehensive monitoring stack +- Validate all v2 protocol operations +- Performance benchmarking vs v1 +- Load testing with conversation threading + +### Phase 4: Production Deployment (Weeks 7-8) + +#### 4.1 Full Cutover +- Disable v1 protocol endpoints +- Remove v1 SystemD services +- Update all client configurations +- Archive v1 data and configurations + +#### 4.2 Optimization and Tuning +- Performance optimization based on production load +- Resource allocation tuning +- Security hardening and audit +- Documentation and training completion + +## Container Orchestration + +### Docker Swarm Stack Configuration + +```yaml +# docker-compose.swarm.yml +version: '3.8' + +services: + bzzz-agent: + image: registry.home.deepblack.cloud/bzzz:v2.0.0 + networks: + - tengig + - bzzz-internal + ports: + - "9000-9100:9000-9100" + volumes: + - /rust/bzzz-v2/data:/app/data + - /rust/bzzz-v2/config:/app/config + environment: + - BZZZ_VERSION=2.0.0 + - BZZZ_PROTOCOL=bzzz:// + - DHT_BOOTSTRAP_NODES=walnut:9101,ironwood:9102,acacia:9103 + deploy: + replicas: 1 + placement: + constraints: + - node.hostname == walnut + labels: + - "traefik.enable=true" + - "traefik.http.routers.bzzz-agent.rule=Host(`bzzz.deepblack.cloud`)" + - "traefik.http.services.bzzz-agent.loadbalancer.server.port=9000" + + mcp-server: + image: registry.home.deepblack.cloud/bzzz-mcp:v2.0.0 + networks: + - tengig + ports: + - "3001:3001" + environment: + - MCP_VERSION=1.0.0 + - BZZZ_ENDPOINT=http://bzzz-agent:9000 + deploy: + replicas: 3 + labels: + - "traefik.enable=true" + - "traefik.http.routers.mcp-server.rule=Host(`mcp.deepblack.cloud`)" + + openai-proxy: + image: registry.home.deepblack.cloud/bzzz-openai-proxy:v2.0.0 + networks: + - tengig + - bzzz-internal + ports: + - "3002:3002" + environment: + - OPENAI_API_KEY_FILE=/run/secrets/openai_api_key + - RATE_LIMIT_RPM=1000 + - COST_TRACKING_ENABLED=true + secrets: + - openai_api_key + deploy: + replicas: 2 + + content-resolver: + image: registry.home.deepblack.cloud/bzzz-resolver:v2.0.0 + networks: + - bzzz-internal + ports: + - "3003:3003" + volumes: + - /rust/bzzz-v2/data/blobs:/app/blobs:ro + deploy: + replicas: 3 + + postgres: + image: postgres:15-alpine + networks: + - bzzz-internal + environment: + - POSTGRES_DB=bzzz_v2 + - POSTGRES_USER_FILE=/run/secrets/postgres_user + - POSTGRES_PASSWORD_FILE=/run/secrets/postgres_password + volumes: + - /rust/bzzz-v2/data/postgres:/var/lib/postgresql/data + secrets: + - postgres_user + - postgres_password + deploy: + replicas: 1 + placement: + constraints: + - node.hostname == walnut + + redis: + image: redis:7-alpine + networks: + - bzzz-internal + volumes: + - /rust/bzzz-v2/data/redis:/data + deploy: + replicas: 1 + placement: + constraints: + - node.hostname == ironwood + +networks: + tengig: + external: true + bzzz-internal: + driver: overlay + internal: true + +secrets: + openai_api_key: + external: true + postgres_user: + external: true + postgres_password: + external: true +``` + +## CI/CD Pipeline Configuration + +### GitLab CI Pipeline + +```yaml +# .gitlab-ci.yml +stages: + - build + - test + - deploy-staging + - deploy-production + +variables: + REGISTRY: registry.home.deepblack.cloud + IMAGE_TAG: ${CI_COMMIT_SHORT_SHA} + +build: + stage: build + script: + - docker build -t ${REGISTRY}/bzzz:${IMAGE_TAG} . + - docker build -t ${REGISTRY}/bzzz-mcp:${IMAGE_TAG} -f Dockerfile.mcp . + - docker build -t ${REGISTRY}/bzzz-openai-proxy:${IMAGE_TAG} -f Dockerfile.proxy . + - docker build -t ${REGISTRY}/bzzz-resolver:${IMAGE_TAG} -f Dockerfile.resolver . + - docker push ${REGISTRY}/bzzz:${IMAGE_TAG} + - docker push ${REGISTRY}/bzzz-mcp:${IMAGE_TAG} + - docker push ${REGISTRY}/bzzz-openai-proxy:${IMAGE_TAG} + - docker push ${REGISTRY}/bzzz-resolver:${IMAGE_TAG} + only: + - main + - develop + +test-protocol: + stage: test + script: + - go test ./... + - docker run --rm ${REGISTRY}/bzzz:${IMAGE_TAG} /app/test-suite + dependencies: + - build + +test-integration: + stage: test + script: + - docker-compose -f docker-compose.test.yml up -d + - ./scripts/integration-tests.sh + - docker-compose -f docker-compose.test.yml down + dependencies: + - build + +deploy-staging: + stage: deploy-staging + script: + - docker stack deploy -c docker-compose.staging.yml bzzz-v2-staging + environment: + name: staging + only: + - develop + +deploy-production: + stage: deploy-production + script: + - docker stack deploy -c docker-compose.swarm.yml bzzz-v2 + environment: + name: production + only: + - main + when: manual +``` + +## Monitoring and Operations + +### Monitoring Stack + +```yaml +# docker-compose.monitoring.yml +version: '3.8' + +services: + prometheus: + image: prom/prometheus:latest + networks: + - monitoring + volumes: + - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml + - /rust/bzzz-v2/data/prometheus:/prometheus + deploy: + replicas: 1 + + grafana: + image: grafana/grafana:latest + networks: + - monitoring + - tengig + volumes: + - /rust/bzzz-v2/data/grafana:/var/lib/grafana + deploy: + labels: + - "traefik.enable=true" + - "traefik.http.routers.bzzz-grafana.rule=Host(`bzzz-monitor.deepblack.cloud`)" + + alertmanager: + image: prom/alertmanager:latest + networks: + - monitoring + volumes: + - ./monitoring/alertmanager.yml:/etc/alertmanager/alertmanager.yml + deploy: + replicas: 1 + +networks: + monitoring: + driver: overlay + tengig: + external: true +``` + +### Key Metrics to Monitor + +1. **Protocol Metrics** + - DHT lookup latency and success rate + - Content resolution time + - Peer discovery and connection stability + - bzzz:// address resolution performance + +2. **Service Metrics** + - MCP server response times + - OpenAI API usage and costs + - Conversation threading performance + - Content store I/O operations + +3. **Infrastructure Metrics** + - Docker Swarm service health + - Network connectivity between nodes + - Storage utilization and performance + - Resource utilization (CPU, memory, disk) + +### Alerting Configuration + +```yaml +# monitoring/alertmanager.yml +global: + smtp_smarthost: 'localhost:587' + smtp_from: 'alerts@deepblack.cloud' + +route: + group_by: ['alertname'] + group_wait: 10s + group_interval: 10s + repeat_interval: 1h + receiver: 'web.hook' + +receivers: + - name: 'web.hook' + slack_configs: + - api_url: 'YOUR_SLACK_WEBHOOK_URL' + channel: '#bzzz-alerts' + title: 'BZZZ v2 Alert' + text: '{{ range .Alerts }}{{ .Annotations.summary }}{{ end }}' + +inhibit_rules: + - source_match: + severity: 'critical' + target_match: + severity: 'warning' + equal: ['alertname', 'dev', 'instance'] +``` + +## Security and Networking + +### Security Architecture + +1. **Network Isolation** + - Internal overlay network for inter-service communication + - External network exposure only through Traefik + - Firewall rules restricting P2P ports to local network + +2. **Secret Management** + - Docker Swarm secrets for sensitive data + - Encrypted storage of API keys and credentials + - Regular secret rotation procedures + +3. **Access Control** + - mTLS for P2P communication + - API authentication and authorization + - Role-based access for MCP endpoints + +### Networking Configuration + +```bash +# UFW firewall rules for BZZZ v2 +sudo ufw allow from 192.168.1.0/24 to any port 9000:9300 proto tcp +sudo ufw allow from 192.168.1.0/24 to any port 5353 proto udp +sudo ufw allow from 192.168.1.0/24 to any port 2377 proto tcp # Docker Swarm +sudo ufw allow from 192.168.1.0/24 to any port 7946 proto tcp # Docker Swarm +sudo ufw allow from 192.168.1.0/24 to any port 4789 proto udp # Docker Swarm +``` + +## Rollback Procedures + +### Automatic Rollback Triggers + +1. **Health Check Failures** + - Service health checks failing for > 5 minutes + - DHT network partition detection + - Content store corruption detection + - Critical error rate > 5% + +2. **Performance Degradation** + - Response time increase > 200% from baseline + - Memory usage > 90% for > 10 minutes + - Storage I/O errors > 1% rate + +### Manual Rollback Process + +```bash +#!/bin/bash +# rollback-v2.sh - Emergency rollback to v1 + +echo "🚨 Initiating BZZZ v2 rollback procedure..." + +# Step 1: Stop v2 services +docker stack rm bzzz-v2 +sleep 30 + +# Step 2: Restart v1 SystemD services +sudo systemctl start bzzz@walnut +sudo systemctl start bzzz@ironwood +sudo systemctl start bzzz@acacia + +# Step 3: Verify v1 connectivity +./scripts/verify-v1-mesh.sh + +# Step 4: Update load balancer configuration +./scripts/update-traefik-v1.sh + +# Step 5: Notify operations team +curl -X POST $SLACK_WEBHOOK -d '{"text":"🚨 BZZZ rollback to v1 completed"}' + +echo "βœ… Rollback completed successfully" +``` + +## Resource Requirements + +### Node Specifications + +| Component | CPU | Memory | Storage | Network | +|-----------|-----|---------|---------|---------| +| BZZZ Agent | 2 cores | 4GB | 20GB | 1Gbps | +| MCP Server | 1 core | 2GB | 5GB | 100Mbps | +| OpenAI Proxy | 1 core | 2GB | 5GB | 100Mbps | +| Content Store | 2 cores | 8GB | 500GB | 1Gbps | +| DHT Manager | 1 core | 4GB | 50GB | 1Gbps | + +### Scaling Considerations + +1. **Horizontal Scaling** + - Add nodes to DHT for increased capacity + - Scale MCP servers based on external demand + - Replicate content store across availability zones + +2. **Vertical Scaling** + - Increase memory for larger conversation contexts + - Add storage for content addressing requirements + - Enhance network capacity for P2P traffic + +## Operational Procedures + +### Daily Operations + +1. **Health Monitoring** + - Review Grafana dashboards for anomalies + - Check DHT network connectivity + - Verify content store replication status + - Monitor OpenAI API usage and costs + +2. **Maintenance Tasks** + - Log rotation and archival + - Content store garbage collection + - DHT routing table optimization + - Security patch deployment + +### Weekly Operations + +1. **Performance Review** + - Analyze response time trends + - Review resource utilization patterns + - Assess scaling requirements + - Update capacity planning + +2. **Security Audit** + - Review access logs + - Validate secret rotation + - Check for security updates + - Test backup and recovery procedures + +### Incident Response + +1. **Incident Classification** + - P0: Complete service outage + - P1: Major feature degradation + - P2: Performance issues + - P3: Minor functionality problems + +2. **Response Procedures** + - Automated alerting and escalation + - Incident commander assignment + - Communication protocols + - Post-incident review process + +This comprehensive infrastructure architecture provides a robust foundation for BZZZ v2 deployment while maintaining operational excellence and enabling future growth. The design prioritizes reliability, security, and maintainability while introducing advanced protocol features required for the next generation of the BZZZ ecosystem. \ No newline at end of file diff --git a/infrastructure/ci-cd/.gitlab-ci.yml b/infrastructure/ci-cd/.gitlab-ci.yml new file mode 100644 index 00000000..e040030e --- /dev/null +++ b/infrastructure/ci-cd/.gitlab-ci.yml @@ -0,0 +1,643 @@ +# BZZZ v2 GitLab CI/CD Pipeline +# Comprehensive build, test, and deployment pipeline for BZZZ v2 + +variables: + REGISTRY: registry.home.deepblack.cloud + REGISTRY_NAMESPACE: bzzz + GO_VERSION: "1.21" + DOCKER_BUILDKIT: "1" + COMPOSE_DOCKER_CLI_BUILD: "1" + POSTGRES_VERSION: "15" + REDIS_VERSION: "7" + + # Semantic versioning + VERSION_PREFIX: "v2" + +stages: + - lint + - test + - build + - security-scan + - integration-test + - deploy-staging + - performance-test + - deploy-production + - post-deploy-validation + +# Cache configuration +cache: + key: "${CI_COMMIT_REF_SLUG}" + paths: + - .cache/go-mod/ + - .cache/docker/ + - vendor/ + +before_script: + - export GOPATH=$CI_PROJECT_DIR/.cache/go-mod + - export GOCACHE=$CI_PROJECT_DIR/.cache/go-build + - mkdir -p .cache/{go-mod,go-build,docker} + +# ================================ +# LINT STAGE +# ================================ + +golang-lint: + stage: lint + image: golangci/golangci-lint:v1.55-alpine + script: + - golangci-lint run ./... --timeout 10m + - go mod tidy + - git diff --exit-code go.mod go.sum + rules: + - if: '$CI_PIPELINE_SOURCE == "merge_request_event"' + - if: '$CI_COMMIT_BRANCH == "main"' + - if: '$CI_COMMIT_BRANCH == "develop"' + +dockerfile-lint: + stage: lint + image: hadolint/hadolint:latest-debian + script: + - hadolint infrastructure/dockerfiles/Dockerfile.* + - hadolint Dockerfile + rules: + - changes: + - "infrastructure/dockerfiles/*" + - "Dockerfile*" + +yaml-lint: + stage: lint + image: cytopia/yamllint:latest + script: + - yamllint infrastructure/ + - yamllint .gitlab-ci.yml + rules: + - changes: + - "infrastructure/**/*.yml" + - "infrastructure/**/*.yaml" + - ".gitlab-ci.yml" + +# ================================ +# TEST STAGE +# ================================ + +unit-tests: + stage: test + image: golang:$GO_VERSION-alpine + services: + - name: postgres:$POSTGRES_VERSION-alpine + alias: postgres + - name: redis:$REDIS_VERSION-alpine + alias: redis + variables: + POSTGRES_DB: bzzz_test + POSTGRES_USER: test + POSTGRES_PASSWORD: testpass + POSTGRES_HOST: postgres + REDIS_HOST: redis + CGO_ENABLED: 0 + before_script: + - apk add --no-cache git make gcc musl-dev + - export GOPATH=$CI_PROJECT_DIR/.cache/go-mod + - export GOCACHE=$CI_PROJECT_DIR/.cache/go-build + script: + - go mod download + - go test -v -race -coverprofile=coverage.out ./... + - go tool cover -html=coverage.out -o coverage.html + - go tool cover -func=coverage.out | grep total | awk '{print "Coverage: " $3}' + coverage: '/Coverage: \d+\.\d+/' + artifacts: + reports: + coverage_report: + coverage_format: cobertura + path: coverage.xml + paths: + - coverage.html + - coverage.out + expire_in: 1 week + +p2p-protocol-tests: + stage: test + image: golang:$GO_VERSION-alpine + script: + - apk add --no-cache git make gcc musl-dev + - go test -v -tags=p2p ./p2p/... ./dht/... + - go test -v -tags=integration ./test/p2p/... + rules: + - changes: + - "p2p/**/*" + - "dht/**/*" + - "test/p2p/**/*" + +content-store-tests: + stage: test + image: golang:$GO_VERSION-alpine + script: + - apk add --no-cache git make gcc musl-dev + - go test -v -tags=storage ./storage/... ./blake3/... + - go test -v -benchmem -bench=. ./storage/... + artifacts: + paths: + - benchmark.out + expire_in: 1 week + rules: + - changes: + - "storage/**/*" + - "blake3/**/*" + +conversation-tests: + stage: test + image: golang:$GO_VERSION-alpine + services: + - name: postgres:$POSTGRES_VERSION-alpine + alias: postgres + variables: + POSTGRES_DB: bzzz_conversation_test + POSTGRES_USER: test + POSTGRES_PASSWORD: testpass + POSTGRES_HOST: postgres + script: + - apk add --no-cache git make gcc musl-dev postgresql-client + - until pg_isready -h postgres -p 5432 -U test; do sleep 1; done + - go test -v -tags=conversation ./conversation/... ./threading/... + rules: + - changes: + - "conversation/**/*" + - "threading/**/*" + +# ================================ +# BUILD STAGE +# ================================ + +build-binaries: + stage: build + image: golang:$GO_VERSION-alpine + before_script: + - apk add --no-cache git make gcc musl-dev upx + - export GOPATH=$CI_PROJECT_DIR/.cache/go-mod + - export GOCACHE=$CI_PROJECT_DIR/.cache/go-build + script: + - make build-all + - upx --best --lzma dist/bzzz-* + - ls -la dist/ + artifacts: + paths: + - dist/ + expire_in: 1 week + +build-docker-images: + stage: build + image: docker:24-dind + services: + - docker:24-dind + variables: + IMAGE_TAG: ${CI_COMMIT_SHORT_SHA} + DOCKER_DRIVER: overlay2 + before_script: + - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $REGISTRY + - docker buildx create --use --driver docker-container + script: + # Build all images in parallel + - | + docker buildx build \ + --platform linux/amd64,linux/arm64 \ + --build-arg VERSION=${VERSION_PREFIX}.${CI_PIPELINE_ID} \ + --build-arg COMMIT=${CI_COMMIT_SHORT_SHA} \ + --tag $REGISTRY/$REGISTRY_NAMESPACE/bzzz-agent:$IMAGE_TAG \ + --tag $REGISTRY/$REGISTRY_NAMESPACE/bzzz-agent:latest \ + --file infrastructure/dockerfiles/Dockerfile.agent \ + --push . + + - | + docker buildx build \ + --platform linux/amd64,linux/arm64 \ + --tag $REGISTRY/$REGISTRY_NAMESPACE/bzzz-mcp:$IMAGE_TAG \ + --tag $REGISTRY/$REGISTRY_NAMESPACE/bzzz-mcp:latest \ + --file infrastructure/dockerfiles/Dockerfile.mcp \ + --push . + + - | + docker buildx build \ + --platform linux/amd64,linux/arm64 \ + --tag $REGISTRY/$REGISTRY_NAMESPACE/bzzz-openai-proxy:$IMAGE_TAG \ + --tag $REGISTRY/$REGISTRY_NAMESPACE/bzzz-openai-proxy:latest \ + --file infrastructure/dockerfiles/Dockerfile.proxy \ + --push . + + - | + docker buildx build \ + --platform linux/amd64,linux/arm64 \ + --tag $REGISTRY/$REGISTRY_NAMESPACE/bzzz-resolver:$IMAGE_TAG \ + --tag $REGISTRY/$REGISTRY_NAMESPACE/bzzz-resolver:latest \ + --file infrastructure/dockerfiles/Dockerfile.resolver \ + --push . + + - | + docker buildx build \ + --platform linux/amd64,linux/arm64 \ + --tag $REGISTRY/$REGISTRY_NAMESPACE/bzzz-dht:$IMAGE_TAG \ + --tag $REGISTRY/$REGISTRY_NAMESPACE/bzzz-dht:latest \ + --file infrastructure/dockerfiles/Dockerfile.dht \ + --push . + + dependencies: + - build-binaries + +# ================================ +# SECURITY SCAN STAGE +# ================================ + +container-security-scan: + stage: security-scan + image: aquasec/trivy:latest + script: + - | + for component in agent mcp openai-proxy resolver dht; do + echo "Scanning bzzz-${component}..." + trivy image --exit-code 1 --severity HIGH,CRITICAL \ + --format json --output trivy-${component}.json \ + $REGISTRY/$REGISTRY_NAMESPACE/bzzz-${component}:${CI_COMMIT_SHORT_SHA} + done + artifacts: + reports: + container_scanning: trivy-*.json + expire_in: 1 week + dependencies: + - build-docker-images + allow_failure: true + +dependency-security-scan: + stage: security-scan + image: golang:$GO_VERSION-alpine + script: + - go install golang.org/x/vuln/cmd/govulncheck@latest + - govulncheck ./... + allow_failure: true + +secrets-scan: + stage: security-scan + image: trufflesecurity/trufflehog:latest + script: + - trufflehog filesystem --directory=. --fail --json + allow_failure: true + rules: + - if: '$CI_PIPELINE_SOURCE == "merge_request_event"' + +# ================================ +# INTEGRATION TEST STAGE +# ================================ + +p2p-integration-test: + stage: integration-test + image: docker:24-dind + services: + - docker:24-dind + variables: + COMPOSE_PROJECT_NAME: bzzz-integration-${CI_PIPELINE_ID} + before_script: + - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $REGISTRY + - apk add --no-cache docker-compose curl jq + script: + - cd infrastructure/testing + - docker-compose -f docker-compose.integration.yml up -d + - sleep 60 # Wait for services to start + - ./scripts/test-p2p-mesh.sh + - ./scripts/test-dht-discovery.sh + - ./scripts/test-content-addressing.sh + - docker-compose -f docker-compose.integration.yml logs + after_script: + - cd infrastructure/testing + - docker-compose -f docker-compose.integration.yml down -v + artifacts: + paths: + - infrastructure/testing/test-results/ + expire_in: 1 week + when: always + dependencies: + - build-docker-images + +mcp-integration-test: + stage: integration-test + image: node:18-alpine + services: + - name: $REGISTRY/$REGISTRY_NAMESPACE/bzzz-mcp:${CI_COMMIT_SHORT_SHA} + alias: mcp-server + - name: $REGISTRY/$REGISTRY_NAMESPACE/bzzz-agent:${CI_COMMIT_SHORT_SHA} + alias: bzzz-agent + script: + - cd test/mcp + - npm install + - npm test + artifacts: + reports: + junit: test/mcp/junit.xml + dependencies: + - build-docker-images + +openai-proxy-test: + stage: integration-test + image: python:3.11-alpine + services: + - name: $REGISTRY/$REGISTRY_NAMESPACE/bzzz-openai-proxy:${CI_COMMIT_SHORT_SHA} + alias: openai-proxy + - name: redis:$REDIS_VERSION-alpine + alias: redis + variables: + OPENAI_API_KEY: "test-key-mock" + REDIS_HOST: redis + script: + - cd test/openai-proxy + - pip install -r requirements.txt + - python -m pytest -v --junitxml=junit.xml + artifacts: + reports: + junit: test/openai-proxy/junit.xml + dependencies: + - build-docker-images + +# ================================ +# STAGING DEPLOYMENT +# ================================ + +deploy-staging: + stage: deploy-staging + image: docker:24-dind + services: + - docker:24-dind + variables: + DEPLOY_ENV: staging + STACK_NAME: bzzz-v2-staging + environment: + name: staging + url: https://bzzz-staging.deepblack.cloud + before_script: + - apk add --no-cache openssh-client + - eval $(ssh-agent -s) + - echo "$SSH_PRIVATE_KEY" | tr -d '\r' | ssh-add - + - mkdir -p ~/.ssh + - chmod 700 ~/.ssh + - ssh-keyscan -H 192.168.1.27 >> ~/.ssh/known_hosts + script: + # Copy deployment files to staging environment + - scp infrastructure/docker-compose.staging.yml tony@192.168.1.27:/rust/bzzz-v2/ + - scp infrastructure/configs/staging/* tony@192.168.1.27:/rust/bzzz-v2/config/ + + # Deploy to staging swarm + - | + ssh tony@192.168.1.27 << 'EOF' + cd /rust/bzzz-v2 + export IMAGE_TAG=${CI_COMMIT_SHORT_SHA} + docker stack deploy -c docker-compose.staging.yml ${STACK_NAME} + + # Wait for deployment + timeout 300 bash -c 'until docker service ls --filter label=com.docker.stack.namespace=${STACK_NAME} --format "{{.Replicas}}" | grep -v "0/"; do sleep 10; done' + EOF + + # Health check staging deployment + - sleep 60 + - curl -f https://bzzz-staging.deepblack.cloud/health + dependencies: + - build-docker-images + - p2p-integration-test + rules: + - if: '$CI_COMMIT_BRANCH == "develop"' + - if: '$CI_COMMIT_BRANCH == "main"' + +# ================================ +# PERFORMANCE TESTING +# ================================ + +performance-test: + stage: performance-test + image: loadimpact/k6:latest + script: + - cd test/performance + - k6 run --out json=performance-results.json performance-test.js + - k6 run --out json=dht-performance.json dht-performance-test.js + artifacts: + paths: + - test/performance/performance-results.json + - test/performance/dht-performance.json + reports: + performance: test/performance/performance-results.json + expire_in: 1 week + environment: + name: staging + rules: + - if: '$CI_COMMIT_BRANCH == "main"' + - when: manual + if: '$CI_COMMIT_BRANCH == "develop"' + +load-test: + stage: performance-test + image: python:3.11-alpine + script: + - cd test/load + - pip install locust requests + - locust --headless --users 100 --spawn-rate 10 --run-time 5m --host https://bzzz-staging.deepblack.cloud + artifacts: + paths: + - test/load/locust_stats.html + expire_in: 1 week + environment: + name: staging + rules: + - when: manual + +# ================================ +# PRODUCTION DEPLOYMENT +# ================================ + +deploy-production: + stage: deploy-production + image: docker:24-dind + services: + - docker:24-dind + variables: + DEPLOY_ENV: production + STACK_NAME: bzzz-v2 + environment: + name: production + url: https://bzzz.deepblack.cloud + before_script: + - apk add --no-cache openssh-client + - eval $(ssh-agent -s) + - echo "$SSH_PRIVATE_KEY" | tr -d '\r' | ssh-add - + - mkdir -p ~/.ssh + - chmod 700 ~/.ssh + - ssh-keyscan -H 192.168.1.27 >> ~/.ssh/known_hosts + script: + # Backup current production state + - | + ssh tony@192.168.1.27 << 'EOF' + mkdir -p /rust/bzzz-v2/backup/$(date +%Y%m%d-%H%M%S) + docker service ls --filter label=com.docker.stack.namespace=bzzz-v2 --format "table {{.Name}}\t{{.Image}}" > /rust/bzzz-v2/backup/$(date +%Y%m%d-%H%M%S)/pre-deployment-services.txt + EOF + + # Copy production deployment files + - scp infrastructure/docker-compose.swarm.yml tony@192.168.1.27:/rust/bzzz-v2/ + - scp infrastructure/configs/production/* tony@192.168.1.27:/rust/bzzz-v2/config/ + + # Deploy to production with blue-green strategy + - | + ssh tony@192.168.1.27 << 'EOF' + cd /rust/bzzz-v2 + export IMAGE_TAG=${CI_COMMIT_SHORT_SHA} + + # Deploy new version + docker stack deploy -c docker-compose.swarm.yml ${STACK_NAME} + + # Wait for healthy deployment + timeout 600 bash -c 'until docker service ls --filter label=com.docker.stack.namespace=${STACK_NAME} --format "{{.Replicas}}" | grep -v "0/" | wc -l | grep -q 8; do sleep 15; done' + + echo "Production deployment completed successfully" + EOF + + # Verify production health + - sleep 120 + - curl -f https://bzzz.deepblack.cloud/health + - curl -f https://mcp.deepblack.cloud/health + dependencies: + - deploy-staging + - performance-test + rules: + - if: '$CI_COMMIT_BRANCH == "main"' + when: manual + +rollback-production: + stage: deploy-production + image: docker:24-dind + variables: + STACK_NAME: bzzz-v2 + environment: + name: production + action: rollback + before_script: + - apk add --no-cache openssh-client + - eval $(ssh-agent -s) + - echo "$SSH_PRIVATE_KEY" | tr -d '\r' | ssh-add - + - mkdir -p ~/.ssh + - chmod 700 ~/.ssh + - ssh-keyscan -H 192.168.1.27 >> ~/.ssh/known_hosts + script: + - | + ssh tony@192.168.1.27 << 'EOF' + cd /rust/bzzz-v2 + + # Get previous stable image tags + PREVIOUS_TAG=$(docker service inspect bzzz-v2_bzzz-agent --format '{{.Spec.TaskTemplate.ContainerSpec.Image}}' | cut -d: -f2) + + # Rollback by redeploying previous version + export IMAGE_TAG=$PREVIOUS_TAG + docker stack deploy -c docker-compose.swarm.yml ${STACK_NAME} + + echo "Production rollback completed" + EOF + rules: + - when: manual + if: '$CI_COMMIT_BRANCH == "main"' + +# ================================ +# POST-DEPLOYMENT VALIDATION +# ================================ + +post-deploy-validation: + stage: post-deploy-validation + image: curlimages/curl:latest + script: + - curl -f https://bzzz.deepblack.cloud/health + - curl -f https://mcp.deepblack.cloud/health + - curl -f https://resolve.deepblack.cloud/health + - curl -f https://openai.deepblack.cloud/health + + # Test basic functionality + - | + # Test bzzz:// address resolution + CONTENT_HASH=$(curl -s https://bzzz.deepblack.cloud/api/v2/test-content | jq -r '.hash') + curl -f "https://resolve.deepblack.cloud/bzzz://${CONTENT_HASH}" + + # Test MCP endpoint + curl -X POST https://mcp.deepblack.cloud/api/tools/list \ + -H "Content-Type: application/json" \ + -d '{"method": "tools/list"}' + environment: + name: production + rules: + - if: '$CI_COMMIT_BRANCH == "main"' + needs: + - deploy-production + +smoke-tests: + stage: post-deploy-validation + image: golang:$GO_VERSION-alpine + script: + - cd test/smoke + - go test -v ./... -base-url=https://bzzz.deepblack.cloud + environment: + name: production + rules: + - if: '$CI_COMMIT_BRANCH == "main"' + needs: + - deploy-production + +# ================================ +# NOTIFICATION STAGE (implicit) +# ================================ + +notify-success: + stage: .post + image: curlimages/curl:latest + script: + - | + curl -X POST $SLACK_WEBHOOK_URL \ + -H 'Content-type: application/json' \ + -d '{ + "text": "πŸš€ BZZZ v2 Pipeline Success", + "attachments": [{ + "color": "good", + "fields": [{ + "title": "Branch", + "value": "'$CI_COMMIT_BRANCH'", + "short": true + }, { + "title": "Commit", + "value": "'$CI_COMMIT_SHORT_SHA'", + "short": true + }, { + "title": "Pipeline", + "value": "'$CI_PIPELINE_URL'", + "short": false + }] + }] + }' + rules: + - if: '$CI_PIPELINE_STATUS == "success" && $CI_COMMIT_BRANCH == "main"' + when: on_success + +notify-failure: + stage: .post + image: curlimages/curl:latest + script: + - | + curl -X POST $SLACK_WEBHOOK_URL \ + -H 'Content-type: application/json' \ + -d '{ + "text": "❌ BZZZ v2 Pipeline Failed", + "attachments": [{ + "color": "danger", + "fields": [{ + "title": "Branch", + "value": "'$CI_COMMIT_BRANCH'", + "short": true + }, { + "title": "Commit", + "value": "'$CI_COMMIT_SHORT_SHA'", + "short": true + }, { + "title": "Pipeline", + "value": "'$CI_PIPELINE_URL'", + "short": false + }] + }] + }' + rules: + - when: on_failure \ No newline at end of file diff --git a/infrastructure/docker-compose.swarm.yml b/infrastructure/docker-compose.swarm.yml new file mode 100644 index 00000000..43e04ede --- /dev/null +++ b/infrastructure/docker-compose.swarm.yml @@ -0,0 +1,402 @@ +version: '3.8' + +services: + # BZZZ v2 Main Agent + bzzz-agent: + image: registry.home.deepblack.cloud/bzzz:v2.0.0 + networks: + - tengig + - bzzz-internal + ports: + - "9000-9100:9000-9100" + volumes: + - /rust/bzzz-v2/data:/app/data + - /rust/bzzz-v2/config:/app/config:ro + environment: + - BZZZ_VERSION=2.0.0 + - BZZZ_PROTOCOL=bzzz:// + - DHT_BOOTSTRAP_NODES=walnut:9101,ironwood:9102,acacia:9103 + - CONTENT_STORE_PATH=/app/data/blobs + - POSTGRES_HOST=postgres + - REDIS_HOST=redis + - LOG_LEVEL=info + secrets: + - postgres_password + - openai_api_key + configs: + - source: bzzz_config + target: /app/config/config.yaml + deploy: + replicas: 3 + placement: + max_replicas_per_node: 1 + constraints: + - node.labels.bzzz.role == agent + resources: + limits: + memory: 4G + cpus: '2.0' + reservations: + memory: 2G + cpus: '1.0' + restart_policy: + condition: on-failure + delay: 10s + max_attempts: 3 + update_config: + parallelism: 1 + delay: 30s + failure_action: rollback + order: stop-first + labels: + - "traefik.enable=true" + - "traefik.http.routers.bzzz-agent.rule=Host(`bzzz.deepblack.cloud`)" + - "traefik.http.services.bzzz-agent.loadbalancer.server.port=9000" + - "traefik.http.routers.bzzz-agent.tls=true" + - "traefik.http.routers.bzzz-agent.tls.certresolver=letsencrypt" + + # MCP Server for external tool integration + mcp-server: + image: registry.home.deepblack.cloud/bzzz-mcp:v2.0.0 + networks: + - tengig + - bzzz-internal + ports: + - "3001:3001" + environment: + - MCP_VERSION=1.0.0 + - BZZZ_ENDPOINT=http://bzzz-agent:9000 + - MAX_CONNECTIONS=1000 + - TIMEOUT_SECONDS=30 + configs: + - source: mcp_config + target: /app/config/mcp.yaml + deploy: + replicas: 3 + placement: + max_replicas_per_node: 1 + resources: + limits: + memory: 2G + cpus: '1.0' + reservations: + memory: 512M + cpus: '0.5' + restart_policy: + condition: on-failure + delay: 5s + labels: + - "traefik.enable=true" + - "traefik.http.routers.mcp-server.rule=Host(`mcp.deepblack.cloud`)" + - "traefik.http.services.mcp-server.loadbalancer.server.port=3001" + - "traefik.http.routers.mcp-server.tls=true" + + # OpenAI Proxy with rate limiting and cost tracking + openai-proxy: + image: registry.home.deepblack.cloud/bzzz-openai-proxy:v2.0.0 + networks: + - tengig + - bzzz-internal + ports: + - "3002:3002" + environment: + - RATE_LIMIT_RPM=1000 + - RATE_LIMIT_TPM=100000 + - COST_TRACKING_ENABLED=true + - REDIS_HOST=redis + - POSTGRES_HOST=postgres + - LOG_REQUESTS=true + secrets: + - openai_api_key + - postgres_password + configs: + - source: proxy_config + target: /app/config/proxy.yaml + deploy: + replicas: 2 + placement: + max_replicas_per_node: 1 + resources: + limits: + memory: 2G + cpus: '1.0' + reservations: + memory: 1G + cpus: '0.5' + restart_policy: + condition: on-failure + delay: 10s + labels: + - "traefik.enable=true" + - "traefik.http.routers.openai-proxy.rule=Host(`openai.deepblack.cloud`)" + - "traefik.http.services.openai-proxy.loadbalancer.server.port=3002" + - "traefik.http.routers.openai-proxy.tls=true" + + # Content Resolver for bzzz:// address resolution + content-resolver: + image: registry.home.deepblack.cloud/bzzz-resolver:v2.0.0 + networks: + - bzzz-internal + - tengig + ports: + - "3003:3003" + volumes: + - /rust/bzzz-v2/data/blobs:/app/blobs:ro + environment: + - BLAKE3_INDEX_PATH=/app/blobs/index + - DHT_BOOTSTRAP_NODES=walnut:9101,ironwood:9102,acacia:9103 + - CACHE_SIZE_MB=512 + deploy: + replicas: 3 + placement: + max_replicas_per_node: 1 + resources: + limits: + memory: 1G + cpus: '1.0' + reservations: + memory: 512M + cpus: '0.5' + restart_policy: + condition: on-failure + labels: + - "traefik.enable=true" + - "traefik.http.routers.content-resolver.rule=Host(`resolve.deepblack.cloud`)" + + # DHT Bootstrap Nodes (one per physical node) + dht-bootstrap-walnut: + image: registry.home.deepblack.cloud/bzzz-dht:v2.0.0 + networks: + - bzzz-internal + ports: + - "9101:9101" + volumes: + - /rust/bzzz-v2/data/dht/walnut:/app/data + environment: + - DHT_PORT=9101 + - NODE_NAME=walnut + - PEER_STORE_PATH=/app/data/peers + deploy: + replicas: 1 + placement: + constraints: + - node.hostname == walnut + resources: + limits: + memory: 1G + cpus: '1.0' + restart_policy: + condition: on-failure + + dht-bootstrap-ironwood: + image: registry.home.deepblack.cloud/bzzz-dht:v2.0.0 + networks: + - bzzz-internal + ports: + - "9102:9102" + volumes: + - /rust/bzzz-v2/data/dht/ironwood:/app/data + environment: + - DHT_PORT=9102 + - NODE_NAME=ironwood + - PEER_STORE_PATH=/app/data/peers + deploy: + replicas: 1 + placement: + constraints: + - node.hostname == ironwood + resources: + limits: + memory: 1G + cpus: '1.0' + restart_policy: + condition: on-failure + + dht-bootstrap-acacia: + image: registry.home.deepblack.cloud/bzzz-dht:v2.0.0 + networks: + - bzzz-internal + ports: + - "9103:9103" + volumes: + - /rust/bzzz-v2/data/dht/acacia:/app/data + environment: + - DHT_PORT=9103 + - NODE_NAME=acacia + - PEER_STORE_PATH=/app/data/peers + deploy: + replicas: 1 + placement: + constraints: + - node.hostname == acacia + resources: + limits: + memory: 1G + cpus: '1.0' + restart_policy: + condition: on-failure + + # PostgreSQL for metadata and conversation threading + postgres: + image: postgres:15-alpine + networks: + - bzzz-internal + environment: + - POSTGRES_DB=bzzz_v2 + - POSTGRES_USER=bzzz + - POSTGRES_PASSWORD_FILE=/run/secrets/postgres_password + - POSTGRES_INITDB_ARGS=--auth-host=scram-sha-256 + volumes: + - /rust/bzzz-v2/data/postgres:/var/lib/postgresql/data + - /rust/bzzz-v2/config/postgres/init:/docker-entrypoint-initdb.d:ro + secrets: + - postgres_password + deploy: + replicas: 1 + placement: + constraints: + - node.hostname == walnut + resources: + limits: + memory: 4G + cpus: '2.0' + reservations: + memory: 2G + cpus: '1.0' + restart_policy: + condition: on-failure + delay: 10s + healthcheck: + test: ["CMD-SHELL", "pg_isready -U bzzz -d bzzz_v2"] + interval: 30s + timeout: 10s + retries: 3 + + # Redis for caching and DHT coordination + redis: + image: redis:7-alpine + networks: + - bzzz-internal + volumes: + - /rust/bzzz-v2/data/redis:/data + configs: + - source: redis_config + target: /usr/local/etc/redis/redis.conf + command: redis-server /usr/local/etc/redis/redis.conf + deploy: + replicas: 1 + placement: + constraints: + - node.hostname == ironwood + resources: + limits: + memory: 2G + cpus: '1.0' + reservations: + memory: 512M + cpus: '0.5' + restart_policy: + condition: on-failure + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 30s + timeout: 10s + retries: 3 + + # Conversation Thread Manager + conversation-manager: + image: registry.home.deepblack.cloud/bzzz-conversation:v2.0.0 + networks: + - bzzz-internal + environment: + - POSTGRES_HOST=postgres + - REDIS_HOST=redis + - LAMPORT_CLOCK_PRECISION=microsecond + volumes: + - /rust/bzzz-v2/data/conversations:/app/conversations + secrets: + - postgres_password + deploy: + replicas: 2 + placement: + max_replicas_per_node: 1 + resources: + limits: + memory: 2G + cpus: '1.0' + restart_policy: + condition: on-failure + + # Content Store Manager + content-store: + image: registry.home.deepblack.cloud/bzzz-content-store:v2.0.0 + networks: + - bzzz-internal + volumes: + - /rust/bzzz-v2/data/blobs:/app/blobs + environment: + - BLAKE3_SHARD_DEPTH=2 + - REPLICATION_FACTOR=3 + - GARBAGE_COLLECTION_INTERVAL=24h + deploy: + replicas: 3 + placement: + max_replicas_per_node: 1 + resources: + limits: + memory: 8G + cpus: '2.0' + reservations: + memory: 4G + cpus: '1.0' + restart_policy: + condition: on-failure + +networks: + tengig: + external: true + bzzz-internal: + driver: overlay + internal: true + attachable: false + ipam: + driver: default + config: + - subnet: 10.200.0.0/16 + +volumes: + postgres_data: + driver: local + driver_opts: + type: nfs + o: addr=192.168.1.27,rw,sync + device: ":/rust/bzzz-v2/data/postgres" + + redis_data: + driver: local + driver_opts: + type: nfs + o: addr=192.168.1.27,rw,sync + device: ":/rust/bzzz-v2/data/redis" + +secrets: + openai_api_key: + external: true + name: bzzz_openai_api_key + postgres_password: + external: true + name: bzzz_postgres_password + +configs: + bzzz_config: + external: true + name: bzzz_v2_config + mcp_config: + external: true + name: bzzz_mcp_config + proxy_config: + external: true + name: bzzz_proxy_config + redis_config: + external: true + name: bzzz_redis_config \ No newline at end of file diff --git a/infrastructure/docs/DEPLOYMENT_RUNBOOK.md b/infrastructure/docs/DEPLOYMENT_RUNBOOK.md new file mode 100644 index 00000000..d37358e3 --- /dev/null +++ b/infrastructure/docs/DEPLOYMENT_RUNBOOK.md @@ -0,0 +1,581 @@ +# BZZZ v2 Deployment Runbook + +## Overview + +This runbook provides step-by-step procedures for deploying, operating, and maintaining BZZZ v2 infrastructure. It covers normal operations, emergency procedures, and troubleshooting guidelines. + +## Prerequisites + +### System Requirements + +- **Cluster**: 3 nodes (WALNUT, IRONWOOD, ACACIA) +- **OS**: Ubuntu 22.04 LTS or newer +- **Docker**: Version 24+ with Swarm mode enabled +- **Storage**: NFS mount at `/rust/` with 500GB+ available +- **Network**: Internal 192.168.1.0/24 with external internet access +- **Secrets**: OpenAI API key and database credentials + +### Access Requirements + +- SSH access to all cluster nodes +- Docker Swarm manager privileges +- Sudo access for system configuration +- GitLab access for CI/CD pipeline management + +## Pre-Deployment Checklist + +### Infrastructure Verification + +```bash +# Verify Docker Swarm status +docker node ls +docker network ls | grep tengig + +# Check available storage +df -h /rust/ + +# Verify network connectivity +ping -c 3 192.168.1.27 # WALNUT +ping -c 3 192.168.1.113 # IRONWOOD +ping -c 3 192.168.1.xxx # ACACIA + +# Test registry access +docker pull registry.home.deepblack.cloud/hello-world || echo "Registry access test" +``` + +### Security Hardening + +```bash +# Run security hardening script +cd /home/tony/chorus/project-queues/active/BZZZ/infrastructure/security +sudo ./security-hardening.sh + +# Verify firewall status +sudo ufw status verbose + +# Check fail2ban status +sudo fail2ban-client status +``` + +## Deployment Procedures + +### 1. Initial Deployment (Fresh Install) + +#### Step 1: Prepare Infrastructure + +```bash +# Create directory structure +mkdir -p /rust/bzzz-v2/{config,data,logs,backup} +mkdir -p /rust/bzzz-v2/data/{blobs,conversations,dht,postgres,redis} +mkdir -p /rust/bzzz-v2/config/{swarm,monitoring,security} + +# Set permissions +sudo chown -R tony:tony /rust/bzzz-v2 +chmod -R 755 /rust/bzzz-v2 +``` + +#### Step 2: Configure Secrets and Configs + +```bash +cd /home/tony/chorus/project-queues/active/BZZZ/infrastructure + +# Create Docker secrets +docker secret create bzzz_postgres_password config/secrets/postgres_password +docker secret create bzzz_openai_api_key ~/chorus/business/secrets/openai-api-key +docker secret create bzzz_grafana_admin_password config/secrets/grafana_admin_password + +# Create Docker configs +docker config create bzzz_v2_config config/bzzz-config.yaml +docker config create bzzz_prometheus_config monitoring/configs/prometheus.yml +docker config create bzzz_alertmanager_config monitoring/configs/alertmanager.yml +``` + +#### Step 3: Deploy Core Services + +```bash +# Deploy main BZZZ v2 stack +docker stack deploy -c docker-compose.swarm.yml bzzz-v2 + +# Wait for services to start (this may take 5-10 minutes) +watch docker stack ps bzzz-v2 +``` + +#### Step 4: Deploy Monitoring Stack + +```bash +# Deploy monitoring services +docker stack deploy -c monitoring/docker-compose.monitoring.yml bzzz-monitoring + +# Verify monitoring services +curl -f http://localhost:9090/-/healthy # Prometheus +curl -f http://localhost:3000/api/health # Grafana +``` + +#### Step 5: Verify Deployment + +```bash +# Check all services are running +docker service ls --filter label=com.docker.stack.namespace=bzzz-v2 + +# Test external endpoints +curl -f https://bzzz.deepblack.cloud/health +curl -f https://mcp.deepblack.cloud/health +curl -f https://resolve.deepblack.cloud/health + +# Check P2P mesh connectivity +docker exec $(docker ps -q -f label=com.docker.swarm.service.name=bzzz-v2_bzzz-agent | head -1) \ + curl -s http://localhost:9000/api/v2/peers | jq '.connected_peers | length' +``` + +### 2. Update Deployment (Rolling Update) + +#### Step 1: Pre-Update Checks + +```bash +# Check current deployment health +docker stack ps bzzz-v2 | grep -v "Shutdown\|Failed" + +# Backup current configuration +mkdir -p /rust/bzzz-v2/backup/$(date +%Y%m%d-%H%M%S) +docker config ls | grep bzzz_ > /rust/bzzz-v2/backup/$(date +%Y%m%d-%H%M%S)/configs.txt +docker secret ls | grep bzzz_ > /rust/bzzz-v2/backup/$(date +%Y%m%d-%H%M%S)/secrets.txt +``` + +#### Step 2: Update Images + +```bash +# Update to new image version +export NEW_IMAGE_TAG="v2.1.0" + +# Update Docker Compose file with new image tags +sed -i "s/registry.home.deepblack.cloud\/bzzz:.*$/registry.home.deepblack.cloud\/bzzz:${NEW_IMAGE_TAG}/g" \ + docker-compose.swarm.yml + +# Deploy updated stack (rolling update) +docker stack deploy -c docker-compose.swarm.yml bzzz-v2 +``` + +#### Step 3: Monitor Update Progress + +```bash +# Watch rolling update progress +watch "docker service ps bzzz-v2_bzzz-agent | head -20" + +# Check for any failed updates +docker service ps bzzz-v2_bzzz-agent --filter desired-state=running --filter current-state=failed +``` + +### 3. Migration from v1 to v2 + +```bash +# Use the automated migration script +cd /home/tony/chorus/project-queues/active/BZZZ/infrastructure/migration-scripts + +# Dry run first to preview changes +./migrate-v1-to-v2.sh --dry-run + +# Execute full migration +./migrate-v1-to-v2.sh + +# If rollback is needed +./migrate-v1-to-v2.sh --rollback +``` + +## Monitoring and Health Checks + +### Health Check Commands + +```bash +# Service health checks +docker service ls --filter label=com.docker.stack.namespace=bzzz-v2 +docker service ps bzzz-v2_bzzz-agent --filter desired-state=running + +# Application health checks +curl -f https://bzzz.deepblack.cloud/health +curl -f https://mcp.deepblack.cloud/health +curl -f https://resolve.deepblack.cloud/health +curl -f https://openai.deepblack.cloud/health + +# P2P network health +docker exec $(docker ps -q -f label=com.docker.swarm.service.name=bzzz-v2_bzzz-agent | head -1) \ + curl -s http://localhost:9000/api/v2/dht/stats | jq '.' + +# Database connectivity +docker exec $(docker ps -q -f label=com.docker.swarm.service.name=bzzz-v2_postgres) \ + pg_isready -U bzzz -d bzzz_v2 + +# Cache connectivity +docker exec $(docker ps -q -f label=com.docker.swarm.service.name=bzzz-v2_redis) \ + redis-cli ping +``` + +### Performance Monitoring + +```bash +# Check resource usage +docker stats --no-stream + +# Monitor disk usage +df -h /rust/bzzz-v2/data/ + +# Check network connections +netstat -tuln | grep -E ":(9000|3001|3002|3003|9101|9102|9103)" + +# Monitor OpenAI API usage +curl -s http://localhost:9203/metrics | grep openai_cost +``` + +## Troubleshooting Guide + +### Common Issues and Solutions + +#### 1. Service Won't Start + +**Symptoms:** Service stuck in `preparing` or constantly restarting + +**Diagnosis:** +```bash +# Check service logs +docker service logs bzzz-v2_bzzz-agent --tail 50 + +# Check node resources +docker node ls +docker system df + +# Verify secrets and configs +docker secret ls | grep bzzz_ +docker config ls | grep bzzz_ +``` + +**Solutions:** +- Check resource constraints and availability +- Verify secrets and configs are accessible +- Ensure image is available and correct +- Check node labels and placement constraints + +#### 2. P2P Network Issues + +**Symptoms:** Agents not discovering each other, DHT lookups failing + +**Diagnosis:** +```bash +# Check peer connections +docker exec $(docker ps -q -f label=com.docker.swarm.service.name=bzzz-v2_bzzz-agent | head -1) \ + curl -s http://localhost:9000/api/v2/peers + +# Check DHT bootstrap nodes +curl http://localhost:9101/health +curl http://localhost:9102/health +curl http://localhost:9103/health + +# Check network connectivity +docker network inspect bzzz-internal +``` + +**Solutions:** +- Restart DHT bootstrap services +- Check firewall rules for P2P ports +- Verify Docker Swarm overlay network +- Check for port conflicts + +#### 3. High OpenAI Costs + +**Symptoms:** Cost alerts triggering, rate limits being hit + +**Diagnosis:** +```bash +# Check current usage +curl -s http://localhost:9203/metrics | grep -E "openai_(cost|requests|tokens)" + +# Check rate limiting +docker service logs bzzz-v2_openai-proxy --tail 100 | grep "rate limit" +``` + +**Solutions:** +- Adjust rate limiting parameters +- Review conversation patterns for excessive API calls +- Implement request caching +- Consider model selection optimization + +#### 4. Database Connection Issues + +**Symptoms:** Service errors related to database connectivity + +**Diagnosis:** +```bash +# Check PostgreSQL status +docker service logs bzzz-v2_postgres --tail 50 + +# Test connection from agent +docker exec $(docker ps -q -f label=com.docker.swarm.service.name=bzzz-v2_bzzz-agent | head -1) \ + pg_isready -h postgres -U bzzz + +# Check connection limits +docker exec $(docker ps -q -f label=com.docker.swarm.service.name=bzzz-v2_postgres) \ + psql -U bzzz -d bzzz_v2 -c "SELECT count(*) FROM pg_stat_activity;" +``` + +**Solutions:** +- Restart PostgreSQL service +- Check connection pool settings +- Increase max_connections if needed +- Review long-running queries + +#### 5. Storage Issues + +**Symptoms:** Disk full alerts, content store errors + +**Diagnosis:** +```bash +# Check disk usage +df -h /rust/bzzz-v2/data/ +du -sh /rust/bzzz-v2/data/blobs/ + +# Check content store health +curl -s http://localhost:9202/metrics | grep content_store +``` + +**Solutions:** +- Run garbage collection on old blobs +- Clean up old conversation threads +- Increase storage capacity +- Adjust retention policies + +## Emergency Procedures + +### Service Outage Response + +#### Priority 1: Complete Service Outage + +```bash +# 1. Check cluster status +docker node ls +docker service ls --filter label=com.docker.stack.namespace=bzzz-v2 + +# 2. Emergency restart of critical services +docker service update --force bzzz-v2_bzzz-agent +docker service update --force bzzz-v2_postgres +docker service update --force bzzz-v2_redis + +# 3. If stack is corrupted, redeploy +docker stack rm bzzz-v2 +sleep 60 +docker stack deploy -c docker-compose.swarm.yml bzzz-v2 + +# 4. Monitor recovery +watch docker stack ps bzzz-v2 +``` + +#### Priority 2: Partial Service Degradation + +```bash +# 1. Identify problematic services +docker service ps bzzz-v2_bzzz-agent --filter desired-state=running --filter current-state=failed + +# 2. Scale up healthy replicas +docker service update --replicas 3 bzzz-v2_bzzz-agent + +# 3. Remove unhealthy tasks +docker service update --force bzzz-v2_bzzz-agent +``` + +### Security Incident Response + +#### Step 1: Immediate Containment + +```bash +# 1. Block suspicious IPs +sudo ufw insert 1 deny from SUSPICIOUS_IP + +# 2. Check for compromise indicators +sudo fail2ban-client status +sudo tail -100 /var/log/audit/audit.log | grep -i "denied\|failed\|error" + +# 3. Isolate affected services +docker service update --replicas 0 AFFECTED_SERVICE +``` + +#### Step 2: Investigation + +```bash +# 1. Check access logs +docker service logs bzzz-v2_bzzz-agent --since 1h | grep -i "error\|failed\|unauthorized" + +# 2. Review monitoring alerts +curl -s http://localhost:9093/api/v1/alerts | jq '.data[] | select(.state=="firing")' + +# 3. Examine network connections +netstat -tuln +ss -tulpn | grep -E ":(9000|3001|3002|3003)" +``` + +#### Step 3: Recovery + +```bash +# 1. Update security rules +./infrastructure/security/security-hardening.sh + +# 2. Rotate secrets if compromised +docker secret rm bzzz_postgres_password +openssl rand -base64 32 | docker secret create bzzz_postgres_password - + +# 3. Restart services with new secrets +docker stack deploy -c docker-compose.swarm.yml bzzz-v2 +``` + +### Data Recovery Procedures + +#### Backup Restoration + +```bash +# 1. Stop services +docker stack rm bzzz-v2 + +# 2. Restore from backup +BACKUP_DATE="20241201-120000" +rsync -av /rust/bzzz-v2/backup/$BACKUP_DATE/ /rust/bzzz-v2/data/ + +# 3. Restart services +docker stack deploy -c docker-compose.swarm.yml bzzz-v2 +``` + +#### Database Recovery + +```bash +# 1. Stop application services +docker service scale bzzz-v2_bzzz-agent=0 + +# 2. Create database backup +docker exec $(docker ps -q -f label=com.docker.swarm.service.name=bzzz-v2_postgres) \ + pg_dump -U bzzz bzzz_v2 > /rust/bzzz-v2/backup/database-$(date +%Y%m%d-%H%M%S).sql + +# 3. Restore database +docker exec -i $(docker ps -q -f label=com.docker.swarm.service.name=bzzz-v2_postgres) \ + psql -U bzzz -d bzzz_v2 < /rust/bzzz-v2/backup/database-backup.sql + +# 4. Restart application services +docker service scale bzzz-v2_bzzz-agent=3 +``` + +## Maintenance Procedures + +### Routine Maintenance (Weekly) + +```bash +#!/bin/bash +# Weekly maintenance script + +# 1. Check service health +docker service ls --filter label=com.docker.stack.namespace=bzzz-v2 +docker system df + +# 2. Clean up unused resources +docker system prune -f +docker volume prune -f + +# 3. Backup critical data +pg_dump -h localhost -U bzzz bzzz_v2 | gzip > \ + /rust/bzzz-v2/backup/weekly-db-$(date +%Y%m%d).sql.gz + +# 4. Rotate logs +find /rust/bzzz-v2/logs -name "*.log" -mtime +7 -delete + +# 5. Check certificate expiration +openssl x509 -in /rust/bzzz-v2/config/tls/server/walnut.pem -noout -dates + +# 6. Update security rules +fail2ban-client reload + +# 7. Generate maintenance report +echo "Maintenance completed on $(date)" >> /rust/bzzz-v2/logs/maintenance.log +``` + +### Scaling Procedures + +#### Scale Up + +```bash +# Increase replica count +docker service scale bzzz-v2_bzzz-agent=5 +docker service scale bzzz-v2_mcp-server=5 + +# Add new node to cluster (run on new node) +docker swarm join --token $WORKER_TOKEN $MANAGER_IP:2377 + +# Label new node +docker node update --label-add bzzz.role=agent NEW_NODE_HOSTNAME +``` + +#### Scale Down + +```bash +# Gracefully reduce replicas +docker service scale bzzz-v2_bzzz-agent=2 +docker service scale bzzz-v2_mcp-server=2 + +# Remove node from cluster +docker node update --availability drain NODE_HOSTNAME +docker node rm NODE_HOSTNAME +``` + +## Performance Tuning + +### Database Optimization + +```bash +# PostgreSQL tuning +docker exec $(docker ps -q -f label=com.docker.swarm.service.name=bzzz-v2_postgres) \ + psql -U bzzz -d bzzz_v2 -c " + ALTER SYSTEM SET shared_buffers = '1GB'; + ALTER SYSTEM SET max_connections = 200; + ALTER SYSTEM SET checkpoint_timeout = '15min'; + SELECT pg_reload_conf(); + " +``` + +### Storage Optimization + +```bash +# Content store optimization +find /rust/bzzz-v2/data/blobs -name "*.tmp" -mtime +1 -delete +find /rust/bzzz-v2/data/blobs -type f -size 0 -delete + +# Compress old logs +find /rust/bzzz-v2/logs -name "*.log" -mtime +3 -exec gzip {} \; +``` + +### Network Optimization + +```bash +# Optimize network buffer sizes +echo 'net.core.rmem_max = 134217728' | sudo tee -a /etc/sysctl.conf +echo 'net.core.wmem_max = 134217728' | sudo tee -a /etc/sysctl.conf +echo 'net.ipv4.tcp_rmem = 4096 87380 134217728' | sudo tee -a /etc/sysctl.conf +echo 'net.ipv4.tcp_wmem = 4096 65536 134217728' | sudo tee -a /etc/sysctl.conf +sudo sysctl -p +``` + +## Contact Information + +### On-Call Procedures + +- **Primary Contact**: DevOps Team Lead +- **Secondary Contact**: Senior Site Reliability Engineer +- **Escalation**: Platform Engineering Manager + +### Communication Channels + +- **Slack**: #bzzz-incidents +- **Email**: devops@deepblack.cloud +- **Phone**: Emergency On-Call Rotation + +### Documentation + +- **Runbooks**: This document +- **Architecture**: `/docs/BZZZ_V2_INFRASTRUCTURE_ARCHITECTURE.md` +- **API Documentation**: https://bzzz.deepblack.cloud/docs +- **Monitoring Dashboards**: https://grafana.deepblack.cloud + +--- + +*This runbook should be reviewed and updated monthly. Last updated: $(date)* \ No newline at end of file diff --git a/infrastructure/migration-scripts/migrate-v1-to-v2.sh b/infrastructure/migration-scripts/migrate-v1-to-v2.sh new file mode 100755 index 00000000..ca0f5b37 --- /dev/null +++ b/infrastructure/migration-scripts/migrate-v1-to-v2.sh @@ -0,0 +1,514 @@ +#!/bin/bash +set -euo pipefail + +# BZZZ v1 to v2 Migration Script +# This script handles the complete migration from BZZZ v1 (SystemD) to v2 (Docker Swarm) + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LOG_FILE="/var/log/bzzz-migration-$(date +%Y%m%d-%H%M%S).log" +BACKUP_DIR="/rust/bzzz-v2/backup/$(date +%Y%m%d-%H%M%S)" +DRY_RUN=${DRY_RUN:-false} + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +log() { + echo -e "${BLUE}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1" | tee -a "$LOG_FILE" +} + +error() { + echo -e "${RED}[ERROR]${NC} $1" | tee -a "$LOG_FILE" + exit 1 +} + +warn() { + echo -e "${YELLOW}[WARN]${NC} $1" | tee -a "$LOG_FILE" +} + +success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" | tee -a "$LOG_FILE" +} + +check_prerequisites() { + log "Checking prerequisites..." + + # Check if running as root for some operations + if [[ $EUID -eq 0 ]]; then + error "This script should not be run as root. Run as tony user with sudo access." + fi + + # Check required commands + local commands=("docker" "systemctl" "pg_dump" "rsync" "curl") + for cmd in "${commands[@]}"; do + if ! command -v "$cmd" &> /dev/null; then + error "Required command '$cmd' not found" + fi + done + + # Check Docker Swarm status + if ! docker info | grep -q "Swarm: active"; then + error "Docker Swarm is not active. Please initialize swarm first." + fi + + # Check available disk space + local available=$(df /rust | awk 'NR==2 {print $4}') + local required=10485760 # 10GB in KB + if [[ $available -lt $required ]]; then + error "Insufficient disk space. Need at least 10GB available in /rust" + fi + + success "Prerequisites check passed" +} + +backup_v1_data() { + log "Creating backup of v1 data..." + + if [[ "$DRY_RUN" == "true" ]]; then + log "[DRY RUN] Would create backup at: $BACKUP_DIR" + return 0 + fi + + mkdir -p "$BACKUP_DIR" + + # Backup v1 configuration + if [[ -d "/home/tony/chorus/project-queues/active/BZZZ" ]]; then + rsync -av "/home/tony/chorus/project-queues/active/BZZZ/" "$BACKUP_DIR/v1-source/" + fi + + # Backup systemd service files + sudo cp /etc/systemd/system/bzzz.service "$BACKUP_DIR/" 2>/dev/null || true + + # Backup hypercore logs (if any) + if [[ -d "/home/tony/.config/bzzz" ]]; then + rsync -av "/home/tony/.config/bzzz/" "$BACKUP_DIR/config/" + fi + + # Backup any existing data directories + for node in walnut ironwood acacia; do + if [[ -d "/rust/bzzz/$node" ]]; then + rsync -av "/rust/bzzz/$node/" "$BACKUP_DIR/data/$node/" + fi + done + + success "Backup completed at: $BACKUP_DIR" +} + +stop_v1_services() { + log "Stopping BZZZ v1 services..." + + if [[ "$DRY_RUN" == "true" ]]; then + log "[DRY RUN] Would stop v1 systemd services" + return 0 + fi + + local nodes=("walnut" "ironwood" "acacia") + for node in "${nodes[@]}"; do + if sudo systemctl is-active --quiet "bzzz@$node" 2>/dev/null || sudo systemctl is-active --quiet bzzz 2>/dev/null; then + log "Stopping BZZZ service on $node..." + sudo systemctl stop "bzzz@$node" 2>/dev/null || sudo systemctl stop bzzz 2>/dev/null || true + sudo systemctl disable "bzzz@$node" 2>/dev/null || sudo systemctl disable bzzz 2>/dev/null || true + fi + done + + # Wait for services to fully stop + sleep 10 + + success "v1 services stopped" +} + +setup_v2_infrastructure() { + log "Setting up v2 infrastructure..." + + if [[ "$DRY_RUN" == "true" ]]; then + log "[DRY RUN] Would create v2 directory structure" + return 0 + fi + + # Create directory structure + mkdir -p /rust/bzzz-v2/{config,data,logs} + mkdir -p /rust/bzzz-v2/data/{blobs,conversations,dht,postgres,redis} + mkdir -p /rust/bzzz-v2/data/blobs/{data,index,temp} + mkdir -p /rust/bzzz-v2/data/dht/{walnut,ironwood,acacia} + mkdir -p /rust/bzzz-v2/config/{swarm,systemd,secrets} + mkdir -p /rust/bzzz-v2/logs/{application,p2p,monitoring} + + # Set permissions + sudo chown -R tony:tony /rust/bzzz-v2 + chmod -R 755 /rust/bzzz-v2 + + # Create placeholder configuration files + cat > /rust/bzzz-v2/config/bzzz-config.yaml << 'EOF' +agent: + id: "" + specialization: "advanced_reasoning" + capabilities: ["code_generation", "debugging", "analysis"] + models: ["llama3.2:70b", "qwen2.5:72b"] + max_tasks: 3 + +hive_api: + base_url: "http://hive.deepblack.cloud" + api_key: "" + +dht: + bootstrap_nodes: + - "walnut:9101" + - "ironwood:9102" + - "acacia:9103" + +content_store: + path: "/app/data/blobs" + replication_factor: 3 + shard_depth: 2 + +openai: + rate_limit_rpm: 1000 + rate_limit_tpm: 100000 + cost_tracking: true +EOF + + success "v2 infrastructure setup completed" +} + +migrate_conversation_data() { + log "Migrating conversation data..." + + if [[ "$DRY_RUN" == "true" ]]; then + log "[DRY RUN] Would migrate hypercore logs to content-addressed storage" + return 0 + fi + + # Check if there are any hypercore logs to migrate + local log_files=() + for node in walnut ironwood acacia; do + if [[ -f "/home/tony/.config/bzzz/hypercore-$node.log" ]]; then + log_files+=("/home/tony/.config/bzzz/hypercore-$node.log") + fi + done + + if [[ ${#log_files[@]} -eq 0 ]]; then + warn "No hypercore logs found for migration" + return 0 + fi + + # Process each log file and create content-addressed blobs + local migration_script="$SCRIPT_DIR/convert-hypercore-to-cas.py" + if [[ -f "$migration_script" ]]; then + python3 "$migration_script" "${log_files[@]}" --output-dir "/rust/bzzz-v2/data/blobs/data" + success "Conversation data migrated to content-addressed storage" + else + warn "Migration script not found, skipping conversation data migration" + fi +} + +setup_docker_secrets() { + log "Setting up Docker secrets..." + + if [[ "$DRY_RUN" == "true" ]]; then + log "[DRY RUN] Would create Docker secrets" + return 0 + fi + + # Create PostgreSQL password secret + if [[ -f "/home/tony/chorus/business/secrets/postgres-bzzz-password" ]]; then + docker secret create bzzz_postgres_password /home/tony/chorus/business/secrets/postgres-bzzz-password 2>/dev/null || true + else + # Generate random password + openssl rand -base64 32 | docker secret create bzzz_postgres_password - 2>/dev/null || true + fi + + # Create OpenAI API key secret + if [[ -f "/home/tony/chorus/business/secrets/openai-api-key" ]]; then + docker secret create bzzz_openai_api_key /home/tony/chorus/business/secrets/openai-api-key 2>/dev/null || true + else + warn "OpenAI API key not found in secrets directory" + fi + + success "Docker secrets configured" +} + +setup_docker_configs() { + log "Setting up Docker configs..." + + if [[ "$DRY_RUN" == "true" ]]; then + log "[DRY RUN] Would create Docker configs" + return 0 + fi + + # Create main BZZZ config + docker config create bzzz_v2_config /rust/bzzz-v2/config/bzzz-config.yaml 2>/dev/null || true + + # Create MCP server config + cat > /tmp/mcp-config.yaml << 'EOF' +server: + port: 3001 + max_connections: 1000 + timeout_seconds: 30 + +tools: + enabled: true + max_execution_time: 300 + +logging: + level: info + format: json +EOF + docker config create bzzz_mcp_config /tmp/mcp-config.yaml 2>/dev/null || true + rm /tmp/mcp-config.yaml + + # Create proxy config + cat > /tmp/proxy-config.yaml << 'EOF' +openai: + rate_limit: + requests_per_minute: 1000 + tokens_per_minute: 100000 + cost_tracking: + enabled: true + log_requests: true + models: + - "gpt-4" + - "gpt-4-turbo" + - "gpt-3.5-turbo" + +server: + port: 3002 + timeout: 30s +EOF + docker config create bzzz_proxy_config /tmp/proxy-config.yaml 2>/dev/null || true + rm /tmp/proxy-config.yaml + + # Create Redis config + cat > /tmp/redis.conf << 'EOF' +bind 0.0.0.0 +port 6379 +timeout 0 +keepalive 300 +maxclients 10000 +maxmemory 1gb +maxmemory-policy allkeys-lru +save 900 1 +save 300 10 +save 60 10000 +EOF + docker config create bzzz_redis_config /tmp/redis.conf 2>/dev/null || true + rm /tmp/redis.conf + + success "Docker configs created" +} + +deploy_v2_stack() { + log "Deploying BZZZ v2 Docker stack..." + + if [[ "$DRY_RUN" == "true" ]]; then + log "[DRY RUN] Would deploy Docker stack with: docker stack deploy -c docker-compose.swarm.yml bzzz-v2" + return 0 + fi + + cd "$SCRIPT_DIR/.." + + # Verify compose file + if ! docker-compose -f infrastructure/docker-compose.swarm.yml config > /dev/null; then + error "Docker compose file validation failed" + fi + + # Deploy the stack + docker stack deploy -c infrastructure/docker-compose.swarm.yml bzzz-v2 + + # Wait for services to start + log "Waiting for services to become ready..." + local max_wait=300 # 5 minutes + local wait_time=0 + + while [[ $wait_time -lt $max_wait ]]; do + local ready_services=$(docker service ls --filter label=com.docker.stack.namespace=bzzz-v2 --format "table {{.Name}}\t{{.Replicas}}" | grep -v "0/" | wc -l) + local total_services=$(docker service ls --filter label=com.docker.stack.namespace=bzzz-v2 --format "table {{.Name}}" | wc -l) + + if [[ $ready_services -eq $total_services ]]; then + success "All services are ready" + break + fi + + log "Waiting for services... ($ready_services/$total_services ready)" + sleep 10 + wait_time=$((wait_time + 10)) + done + + if [[ $wait_time -ge $max_wait ]]; then + error "Timeout waiting for services to become ready" + fi +} + +verify_v2_deployment() { + log "Verifying v2 deployment..." + + # Check service health + local services=("bzzz-v2_bzzz-agent" "bzzz-v2_postgres" "bzzz-v2_redis" "bzzz-v2_mcp-server") + for service in "${services[@]}"; do + if ! docker service ps "$service" | grep -q "Running"; then + error "Service $service is not running properly" + fi + done + + # Test DHT connectivity + log "Testing DHT connectivity..." + if ! timeout 30 docker exec "$(docker ps -q -f label=com.docker.swarm.service.name=bzzz-v2_dht-bootstrap-walnut)" \ + curl -f http://localhost:9101/health > /dev/null 2>&1; then + warn "DHT bootstrap node (walnut) health check failed" + fi + + # Test MCP server + log "Testing MCP server..." + if ! timeout 10 curl -f http://localhost:3001/health > /dev/null 2>&1; then + warn "MCP server health check failed" + fi + + # Test content resolver + log "Testing content resolver..." + if ! timeout 10 curl -f http://localhost:3003/health > /dev/null 2>&1; then + warn "Content resolver health check failed" + fi + + success "v2 deployment verification completed" +} + +update_node_labels() { + log "Updating Docker node labels for service placement..." + + if [[ "$DRY_RUN" == "true" ]]; then + log "[DRY RUN] Would update node labels" + return 0 + fi + + # Set node labels for service placement + docker node update --label-add bzzz.role=agent walnut 2>/dev/null || true + docker node update --label-add bzzz.role=agent ironwood 2>/dev/null || true + docker node update --label-add bzzz.role=agent acacia 2>/dev/null || true + + success "Node labels updated" +} + +cleanup_v1_artifacts() { + log "Cleaning up v1 artifacts..." + + if [[ "$DRY_RUN" == "true" ]]; then + log "[DRY RUN] Would clean up v1 systemd files and binaries" + return 0 + fi + + # Remove systemd service files (but keep backup) + sudo rm -f /etc/systemd/system/bzzz.service + sudo rm -f /etc/systemd/system/bzzz@.service + sudo systemctl daemon-reload + + # Move v1 binaries to backup location + if [[ -f "/home/tony/chorus/project-queues/active/BZZZ/bzzz" ]]; then + mv "/home/tony/chorus/project-queues/active/BZZZ/bzzz" "$BACKUP_DIR/bzzz-v1-binary" + fi + + success "v1 cleanup completed" +} + +print_migration_summary() { + log "Migration Summary:" + log "==================" + log "βœ… v1 services stopped and disabled" + log "βœ… v2 infrastructure deployed to Docker Swarm" + log "βœ… Data migrated to content-addressed storage" + log "βœ… DHT network established across 3 nodes" + log "βœ… MCP server and OpenAI proxy deployed" + log "βœ… Monitoring and health checks configured" + log "" + log "Access Points:" + log "- BZZZ Agent API: https://bzzz.deepblack.cloud" + log "- MCP Server: https://mcp.deepblack.cloud" + log "- Content Resolver: https://resolve.deepblack.cloud" + log "- OpenAI Proxy: https://openai.deepblack.cloud" + log "" + log "Monitoring:" + log "- docker service ls --filter label=com.docker.stack.namespace=bzzz-v2" + log "- docker stack ps bzzz-v2" + log "- docker service logs bzzz-v2_bzzz-agent" + log "" + log "Backup Location: $BACKUP_DIR" + log "Migration Log: $LOG_FILE" +} + +rollback_to_v1() { + log "Rolling back to v1..." + + # Stop v2 services + docker stack rm bzzz-v2 2>/dev/null || true + sleep 30 + + # Restore v1 systemd service + if [[ -f "$BACKUP_DIR/bzzz.service" ]]; then + sudo cp "$BACKUP_DIR/bzzz.service" /etc/systemd/system/ + sudo systemctl daemon-reload + sudo systemctl enable bzzz + sudo systemctl start bzzz + fi + + # Restore v1 binary + if [[ -f "$BACKUP_DIR/bzzz-v1-binary" ]]; then + cp "$BACKUP_DIR/bzzz-v1-binary" "/home/tony/chorus/project-queues/active/BZZZ/bzzz" + chmod +x "/home/tony/chorus/project-queues/active/BZZZ/bzzz" + fi + + success "Rollback to v1 completed" +} + +main() { + log "Starting BZZZ v1 to v2 migration..." + log "DRY_RUN mode: $DRY_RUN" + + # Handle rollback if requested + if [[ "${1:-}" == "--rollback" ]]; then + rollback_to_v1 + return 0 + fi + + # Trap to handle errors + trap 'error "Migration failed at line $LINENO"' ERR + + check_prerequisites + backup_v1_data + stop_v1_services + setup_v2_infrastructure + migrate_conversation_data + setup_docker_secrets + setup_docker_configs + update_node_labels + deploy_v2_stack + verify_v2_deployment + cleanup_v1_artifacts + print_migration_summary + + success "BZZZ v2 migration completed successfully!" + log "Run with --rollback to revert to v1 if needed" +} + +# Handle script arguments +case "${1:-}" in + --dry-run) + DRY_RUN=true + main + ;; + --rollback) + main --rollback + ;; + --help|-h) + echo "Usage: $0 [--dry-run|--rollback|--help]" + echo "" + echo "Options:" + echo " --dry-run Preview migration steps without making changes" + echo " --rollback Rollback to v1 (emergency use only)" + echo " --help Show this help message" + exit 0 + ;; + *) + main + ;; +esac \ No newline at end of file diff --git a/infrastructure/monitoring/configs/alert-rules.yml b/infrastructure/monitoring/configs/alert-rules.yml new file mode 100644 index 00000000..245b53ed --- /dev/null +++ b/infrastructure/monitoring/configs/alert-rules.yml @@ -0,0 +1,339 @@ +# BZZZ v2 Prometheus Alert Rules + +groups: + # P2P Network Health Rules + - name: p2p-network + rules: + - alert: P2PNetworkPartition + expr: bzzz_p2p_connected_peers < 2 + for: 5m + labels: + severity: critical + component: p2p + annotations: + summary: "P2P network partition detected" + description: "Node {{ $labels.instance }} has less than 2 peers connected for more than 5 minutes" + + - alert: P2PHighLatency + expr: histogram_quantile(0.95, bzzz_p2p_message_duration_seconds) > 5 + for: 2m + labels: + severity: warning + component: p2p + annotations: + summary: "High P2P message latency" + description: "95th percentile P2P message latency is {{ $value }}s on {{ $labels.instance }}" + + - alert: P2PMessageDropRate + expr: rate(bzzz_p2p_messages_dropped_total[5m]) > 0.1 + for: 2m + labels: + severity: warning + component: p2p + annotations: + summary: "High P2P message drop rate" + description: "P2P message drop rate is {{ $value | humanizePercentage }} on {{ $labels.instance }}" + + # DHT Network Rules + - name: dht-network + rules: + - alert: DHTBootstrapNodeDown + expr: up{job="dht-bootstrap"} == 0 + for: 1m + labels: + severity: critical + component: dht + annotations: + summary: "DHT bootstrap node is down" + description: "DHT bootstrap node {{ $labels.instance }} has been down for more than 1 minute" + + - alert: DHTRoutingTableSize + expr: bzzz_dht_routing_table_size < 10 + for: 5m + labels: + severity: warning + component: dht + annotations: + summary: "DHT routing table is small" + description: "DHT routing table size is {{ $value }} on {{ $labels.instance }}, indicating poor network connectivity" + + - alert: DHTLookupFailureRate + expr: rate(bzzz_dht_lookup_failures_total[5m]) / rate(bzzz_dht_lookups_total[5m]) > 0.2 + for: 2m + labels: + severity: warning + component: dht + annotations: + summary: "High DHT lookup failure rate" + description: "DHT lookup failure rate is {{ $value | humanizePercentage }} on {{ $labels.instance }}" + + # Content Store Rules + - name: content-store + rules: + - alert: ContentStoreDiskUsage + expr: (bzzz_content_store_disk_used_bytes / bzzz_content_store_disk_total_bytes) * 100 > 85 + for: 5m + labels: + severity: warning + component: content-store + disk_usage: "{{ $value | humanize }}" + annotations: + summary: "Content store disk usage is high" + description: "Content store disk usage is {{ $value | humanizePercentage }} on {{ $labels.instance }}" + + - alert: ContentStoreDiskFull + expr: (bzzz_content_store_disk_used_bytes / bzzz_content_store_disk_total_bytes) * 100 > 95 + for: 1m + labels: + severity: critical + component: content-store + disk_usage: "{{ $value | humanize }}" + annotations: + summary: "Content store disk is nearly full" + description: "Content store disk usage is {{ $value | humanizePercentage }} on {{ $labels.instance }}" + + - alert: ContentReplicationFailed + expr: increase(bzzz_content_replication_failures_total[10m]) > 5 + for: 5m + labels: + severity: warning + component: content-store + annotations: + summary: "Content replication failures detected" + description: "{{ $value }} content replication failures in the last 10 minutes on {{ $labels.instance }}" + + - alert: BLAKE3HashCollision + expr: increase(bzzz_blake3_hash_collisions_total[1h]) > 0 + for: 0m + labels: + severity: critical + component: content-store + annotations: + summary: "BLAKE3 hash collision detected" + description: "BLAKE3 hash collision detected on {{ $labels.instance }} - immediate investigation required" + + # OpenAI Integration Rules + - name: openai-integration + rules: + - alert: OpenAIHighCost + expr: bzzz_openai_cost_daily_usd > 100 + for: 0m + labels: + severity: warning + component: openai-cost + current_cost: "{{ $value }}" + cost_threshold: "100" + cost_period: "daily" + annotations: + summary: "OpenAI daily cost exceeds threshold" + description: "Daily OpenAI cost is ${{ $value }}, exceeding the $100 threshold" + + - alert: OpenAICriticalCost + expr: bzzz_openai_cost_daily_usd > 500 + for: 0m + labels: + severity: critical + component: openai-cost + current_cost: "{{ $value }}" + cost_threshold: "500" + cost_period: "daily" + annotations: + summary: "OpenAI daily cost critically high" + description: "Daily OpenAI cost is ${{ $value }}, which is critically high - consider rate limiting" + + - alert: OpenAIRateLimitHit + expr: increase(bzzz_openai_rate_limit_hits_total[5m]) > 10 + for: 1m + labels: + severity: warning + component: openai-cost + annotations: + summary: "OpenAI rate limit frequently hit" + description: "OpenAI rate limit hit {{ $value }} times in the last 5 minutes" + + - alert: OpenAIProxyDown + expr: up{job="openai-proxy"} == 0 + for: 2m + labels: + severity: critical + component: service-health + annotations: + summary: "OpenAI proxy is down" + description: "OpenAI proxy service is down on {{ $labels.instance }}" + + # MCP Server Rules + - name: mcp-server + rules: + - alert: MCPServerDown + expr: up{job="mcp-server"} == 0 + for: 2m + labels: + severity: critical + component: service-health + annotations: + summary: "MCP server is down" + description: "MCP server is down on {{ $labels.instance }}" + + - alert: MCPHighResponseTime + expr: histogram_quantile(0.95, bzzz_mcp_request_duration_seconds) > 10 + for: 5m + labels: + severity: warning + component: service-health + annotations: + summary: "MCP server high response time" + description: "95th percentile MCP response time is {{ $value }}s on {{ $labels.instance }}" + + - alert: MCPConnectionLimit + expr: bzzz_mcp_active_connections / bzzz_mcp_max_connections > 0.8 + for: 2m + labels: + severity: warning + component: service-health + annotations: + summary: "MCP server connection limit approaching" + description: "MCP server connection usage is {{ $value | humanizePercentage }} on {{ $labels.instance }}" + + # Conversation Threading Rules + - name: conversation-threading + rules: + - alert: ConversationThreadLag + expr: bzzz_conversation_lamport_clock_lag_seconds > 30 + for: 2m + labels: + severity: warning + component: conversation + annotations: + summary: "Conversation thread lag detected" + description: "Lamport clock lag is {{ $value }}s on {{ $labels.instance }}, indicating thread synchronization issues" + + - alert: ConversationStorageFailure + expr: increase(bzzz_conversation_storage_failures_total[5m]) > 3 + for: 1m + labels: + severity: critical + component: conversation + annotations: + summary: "Conversation storage failures" + description: "{{ $value }} conversation storage failures in the last 5 minutes on {{ $labels.instance }}" + + # System Resource Rules + - name: system-resources + rules: + - alert: NodeDown + expr: up{job="node-exporter"} == 0 + for: 1m + labels: + severity: critical + component: system + annotations: + summary: "Node is down" + description: "Node {{ $labels.instance }} has been down for more than 1 minute" + + - alert: HighCPUUsage + expr: 100 - (avg(rate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance) * 100) > 80 + for: 5m + labels: + severity: warning + component: resources + resource_type: "cpu" + usage_percent: "{{ $value | humanize }}" + threshold: "80" + annotations: + summary: "High CPU usage" + description: "CPU usage is {{ $value | humanizePercentage }} on {{ $labels.instance }}" + + - alert: HighMemoryUsage + expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100 > 85 + for: 5m + labels: + severity: warning + component: resources + resource_type: "memory" + usage_percent: "{{ $value | humanize }}" + threshold: "85" + annotations: + summary: "High memory usage" + description: "Memory usage is {{ $value | humanizePercentage }} on {{ $labels.instance }}" + + - alert: DiskSpaceLow + expr: (node_filesystem_avail_bytes{fstype!="tmpfs"} / node_filesystem_size_bytes{fstype!="tmpfs"}) * 100 < 15 + for: 5m + labels: + severity: warning + component: resources + resource_type: "disk" + usage_percent: "{{ 100 - $value | humanize }}" + threshold: "85" + annotations: + summary: "Low disk space" + description: "Disk space is {{ 100 - $value | humanizePercentage }} full on {{ $labels.instance }} ({{ $labels.mountpoint }})" + + # Database Rules + - name: database + rules: + - alert: PostgreSQLDown + expr: up{job="postgres"} == 0 + for: 1m + labels: + severity: critical + component: service-health + annotations: + summary: "PostgreSQL is down" + description: "PostgreSQL database is down on {{ $labels.instance }}" + + - alert: PostgreSQLHighConnections + expr: pg_stat_database_numbackends / pg_settings_max_connections > 0.8 + for: 2m + labels: + severity: warning + component: service-health + annotations: + summary: "PostgreSQL connection limit approaching" + description: "PostgreSQL connection usage is {{ $value | humanizePercentage }} on {{ $labels.instance }}" + + - alert: RedisDown + expr: up{job="redis"} == 0 + for: 1m + labels: + severity: critical + component: service-health + annotations: + summary: "Redis is down" + description: "Redis cache is down on {{ $labels.instance }}" + + # Security Rules + - name: security + rules: + - alert: UnauthorizedP2PConnection + expr: increase(bzzz_p2p_unauthorized_connections_total[5m]) > 5 + for: 1m + labels: + severity: warning + component: security + security_type: "unauthorized_connection" + annotations: + summary: "Unauthorized P2P connection attempts" + description: "{{ $value }} unauthorized P2P connection attempts in the last 5 minutes on {{ $labels.instance }}" + + - alert: SuspiciousContentRequest + expr: increase(bzzz_content_suspicious_requests_total[5m]) > 10 + for: 2m + labels: + severity: warning + component: security + security_type: "suspicious_content" + annotations: + summary: "Suspicious content requests detected" + description: "{{ $value }} suspicious content requests in the last 5 minutes on {{ $labels.instance }}" + + - alert: FailedAuthentication + expr: increase(bzzz_auth_failures_total[5m]) > 20 + for: 1m + labels: + severity: warning + component: security + security_type: "authentication_failure" + annotations: + summary: "High authentication failure rate" + description: "{{ $value }} authentication failures in the last 5 minutes on {{ $labels.instance }}" \ No newline at end of file diff --git a/infrastructure/monitoring/configs/alertmanager.yml b/infrastructure/monitoring/configs/alertmanager.yml new file mode 100644 index 00000000..c56bc1e0 --- /dev/null +++ b/infrastructure/monitoring/configs/alertmanager.yml @@ -0,0 +1,255 @@ +# AlertManager Configuration for BZZZ v2 + +global: + smtp_smarthost: 'localhost:587' + smtp_from: 'alerts@deepblack.cloud' + smtp_require_tls: true + resolve_timeout: 5m + +# Template files +templates: + - '/etc/alertmanager/templates/*.tmpl' + +# Route configuration +route: + group_by: ['cluster', 'alertname', 'service'] + group_wait: 30s + group_interval: 5m + repeat_interval: 12h + receiver: 'default' + routes: + # Critical P2P network issues + - match: + severity: critical + component: p2p + receiver: 'p2p-critical' + group_wait: 10s + repeat_interval: 5m + + # DHT network issues + - match: + component: dht + receiver: 'dht-alerts' + group_wait: 1m + repeat_interval: 30m + + # Content store issues + - match: + component: content-store + receiver: 'storage-alerts' + group_wait: 2m + repeat_interval: 1h + + # OpenAI cost alerts + - match: + component: openai-cost + receiver: 'cost-alerts' + group_wait: 5m + repeat_interval: 6h + + # Service health alerts + - match: + component: service-health + receiver: 'service-alerts' + group_wait: 1m + repeat_interval: 15m + + # Resource exhaustion + - match: + severity: warning + component: resources + receiver: 'resource-alerts' + group_wait: 5m + repeat_interval: 2h + + # Security alerts + - match: + component: security + receiver: 'security-alerts' + group_wait: 30s + repeat_interval: 1h + +# Inhibition rules +inhibit_rules: + # Silence warning if critical alert is firing + - source_match: + severity: 'critical' + target_match: + severity: 'warning' + equal: ['cluster', 'service', 'instance'] + + # Silence service alerts if node is down + - source_match: + alertname: 'NodeDown' + target_match: + component: 'service-health' + equal: ['instance'] + +# Receiver configurations +receivers: + # Default receiver + - name: 'default' + slack_configs: + - api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK' + channel: '#bzzz-monitoring' + title: 'BZZZ v2 Alert' + text: | + {{ range .Alerts }} + *Alert:* {{ .Annotations.summary }} + *Description:* {{ .Annotations.description }} + *Severity:* {{ .Labels.severity }} + *Instance:* {{ .Labels.instance }} + *Service:* {{ .Labels.service }} + {{ end }} + send_resolved: true + + # Critical P2P network alerts + - name: 'p2p-critical' + slack_configs: + - api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK' + channel: '#bzzz-critical' + title: '🚨 CRITICAL P2P Network Issue' + text: | + {{ range .Alerts }} + *CRITICAL P2P ALERT* + + *Summary:* {{ .Annotations.summary }} + *Description:* {{ .Annotations.description }} + *Node:* {{ .Labels.instance }} + *Time:* {{ .StartsAt.Format "2006-01-02 15:04:05" }} + + *Immediate Action Required* + {{ end }} + send_resolved: true + pagerduty_configs: + - service_key: 'YOUR_PAGERDUTY_SERVICE_KEY' + description: '{{ .GroupLabels.alertname }} - {{ .Annotations.summary }}' + + # DHT network alerts + - name: 'dht-alerts' + slack_configs: + - api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK' + channel: '#bzzz-dht' + title: 'πŸ”— DHT Network Alert' + text: | + {{ range .Alerts }} + *DHT Network Issue* + + *Alert:* {{ .Annotations.summary }} + *Description:* {{ .Annotations.description }} + *Bootstrap Node:* {{ .Labels.instance }} + *Peers Connected:* {{ .Labels.peer_count | default "unknown" }} + {{ end }} + send_resolved: true + + # Storage alerts + - name: 'storage-alerts' + slack_configs: + - api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK' + channel: '#bzzz-storage' + title: 'πŸ’Ύ Content Store Alert' + text: | + {{ range .Alerts }} + *Storage Alert* + + *Issue:* {{ .Annotations.summary }} + *Details:* {{ .Annotations.description }} + *Node:* {{ .Labels.instance }} + *Usage:* {{ .Labels.disk_usage | default "unknown" }}% + {{ end }} + send_resolved: true + + # OpenAI cost alerts + - name: 'cost-alerts' + slack_configs: + - api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK' + channel: '#bzzz-costs' + title: 'πŸ’° OpenAI Cost Alert' + text: | + {{ range .Alerts }} + *Cost Alert* + + *Alert:* {{ .Annotations.summary }} + *Current Cost:* ${{ .Labels.current_cost | default "unknown" }} + *Threshold:* ${{ .Labels.cost_threshold | default "unknown" }} + *Period:* {{ .Labels.cost_period | default "daily" }} + *Action:* {{ .Annotations.description }} + {{ end }} + send_resolved: true + email_configs: + - to: 'finance@deepblack.cloud' + subject: 'BZZZ v2 OpenAI Cost Alert' + body: | + OpenAI usage has exceeded cost thresholds. + + {{ range .Alerts }} + Alert: {{ .Annotations.summary }} + Current Cost: ${{ .Labels.current_cost }} + Threshold: ${{ .Labels.cost_threshold }} + {{ end }} + + # Service health alerts + - name: 'service-alerts' + slack_configs: + - api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK' + channel: '#bzzz-services' + title: 'πŸ”§ Service Health Alert' + text: | + {{ range .Alerts }} + *Service Health Issue* + + *Service:* {{ .Labels.service }} + *Alert:* {{ .Annotations.summary }} + *Node:* {{ .Labels.instance }} + *Status:* {{ .Labels.status | default "unknown" }} + *Description:* {{ .Annotations.description }} + {{ end }} + send_resolved: true + + # Resource alerts + - name: 'resource-alerts' + slack_configs: + - api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK' + channel: '#bzzz-resources' + title: '⚑ Resource Alert' + text: | + {{ range .Alerts }} + *Resource Warning* + + *Resource:* {{ .Labels.resource_type | default "unknown" }} + *Node:* {{ .Labels.instance }} + *Alert:* {{ .Annotations.summary }} + *Current Usage:* {{ .Labels.usage_percent | default "unknown" }}% + *Threshold:* {{ .Labels.threshold | default "unknown" }}% + {{ end }} + send_resolved: true + + # Security alerts + - name: 'security-alerts' + slack_configs: + - api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK' + channel: '#bzzz-security' + title: 'πŸ”’ Security Alert' + text: | + {{ range .Alerts }} + *SECURITY ALERT* + + *Type:* {{ .Labels.security_type | default "unknown" }} + *Alert:* {{ .Annotations.summary }} + *Source:* {{ .Labels.instance }} + *Details:* {{ .Annotations.description }} + *Severity:* {{ .Labels.severity }} + {{ end }} + send_resolved: true + email_configs: + - to: 'security@deepblack.cloud' + subject: 'BZZZ v2 Security Alert' + body: | + Security alert triggered in BZZZ v2 cluster. + + {{ range .Alerts }} + Alert: {{ .Annotations.summary }} + Severity: {{ .Labels.severity }} + Source: {{ .Labels.instance }} + Details: {{ .Annotations.description }} + {{ end }} \ No newline at end of file diff --git a/infrastructure/monitoring/configs/prometheus.yml b/infrastructure/monitoring/configs/prometheus.yml new file mode 100644 index 00000000..c18b307a --- /dev/null +++ b/infrastructure/monitoring/configs/prometheus.yml @@ -0,0 +1,216 @@ +# Prometheus Configuration for BZZZ v2 Monitoring + +global: + scrape_interval: 30s + scrape_timeout: 10s + evaluation_interval: 30s + external_labels: + cluster: 'deepblack-cloud' + environment: 'production' + +rule_files: + - "/etc/prometheus/rules.yml" + +alerting: + alertmanagers: + - static_configs: + - targets: + - alertmanager:9093 + +scrape_configs: + # Prometheus self-monitoring + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] + metrics_path: /metrics + scrape_interval: 15s + + # System metrics from node exporters + - job_name: 'node-exporter' + static_configs: + - targets: + - 'walnut:9100' + - 'ironwood:9100' + - 'acacia:9100' + metrics_path: /metrics + scrape_interval: 15s + + # Container metrics from cAdvisor + - job_name: 'cadvisor' + static_configs: + - targets: + - 'walnut:8080' + - 'ironwood:8080' + - 'acacia:8080' + metrics_path: /metrics + scrape_interval: 30s + + # BZZZ v2 Application Services + - job_name: 'bzzz-agent' + docker_sd_configs: + - host: unix:///var/run/docker.sock + port: 9000 + relabel_configs: + - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] + target_label: __tmp_service_name + - source_labels: [__tmp_service_name] + regex: bzzz-v2_bzzz-agent + action: keep + - source_labels: [__meta_docker_container_label_com_docker_swarm_node_id] + target_label: node_id + - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] + target_label: service + metrics_path: /metrics + scrape_interval: 15s + + # MCP Server Metrics + - job_name: 'mcp-server' + docker_sd_configs: + - host: unix:///var/run/docker.sock + port: 3001 + relabel_configs: + - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] + regex: bzzz-v2_mcp-server + action: keep + - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] + target_label: service + metrics_path: /metrics + scrape_interval: 30s + + # OpenAI Proxy Metrics + - job_name: 'openai-proxy' + docker_sd_configs: + - host: unix:///var/run/docker.sock + port: 3002 + relabel_configs: + - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] + regex: bzzz-v2_openai-proxy + action: keep + - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] + target_label: service + metrics_path: /metrics + scrape_interval: 30s + + # Content Resolver Metrics + - job_name: 'content-resolver' + docker_sd_configs: + - host: unix:///var/run/docker.sock + port: 3003 + relabel_configs: + - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] + regex: bzzz-v2_content-resolver + action: keep + - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] + target_label: service + metrics_path: /metrics + scrape_interval: 30s + + # DHT Bootstrap Nodes + - job_name: 'dht-bootstrap' + static_configs: + - targets: + - 'walnut:9101' + - 'ironwood:9102' + - 'acacia:9103' + labels: + service: 'dht-bootstrap' + metrics_path: /metrics + scrape_interval: 15s + + # P2P Network Metrics + - job_name: 'bzzz-p2p-exporter' + static_configs: + - targets: ['bzzz-p2p-exporter:9200'] + metrics_path: /metrics + scrape_interval: 30s + + # DHT Network Monitoring + - job_name: 'dht-monitor' + static_configs: + - targets: ['dht-monitor:9201'] + metrics_path: /metrics + scrape_interval: 60s + + # Content Store Monitoring + - job_name: 'content-monitor' + static_configs: + - targets: ['content-monitor:9202'] + metrics_path: /metrics + scrape_interval: 300s # 5 minutes for storage checks + + # OpenAI Cost Monitoring + - job_name: 'openai-cost-monitor' + static_configs: + - targets: ['openai-cost-monitor:9203'] + metrics_path: /metrics + scrape_interval: 60s + + # Database Metrics (PostgreSQL) + - job_name: 'postgres' + docker_sd_configs: + - host: unix:///var/run/docker.sock + port: 5432 + relabel_configs: + - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] + regex: bzzz-v2_postgres + action: keep + - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] + target_label: service + metrics_path: /metrics + scrape_interval: 30s + params: + dbname: [bzzz_v2] + + # Cache Metrics (Redis) + - job_name: 'redis' + docker_sd_configs: + - host: unix:///var/run/docker.sock + port: 6379 + relabel_configs: + - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] + regex: bzzz-v2_redis + action: keep + - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] + target_label: service + metrics_path: /metrics + scrape_interval: 30s + + # Traefik Load Balancer Metrics + - job_name: 'traefik' + static_configs: + - targets: ['traefik:8080'] + metrics_path: /metrics + scrape_interval: 30s + + # Conversation Management Metrics + - job_name: 'conversation-manager' + docker_sd_configs: + - host: unix:///var/run/docker.sock + port: 8090 + relabel_configs: + - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] + regex: bzzz-v2_conversation-manager + action: keep + - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] + target_label: service + metrics_path: /metrics + scrape_interval: 30s + + # External Service Monitoring (Webhook endpoints) + - job_name: 'external-health' + static_configs: + - targets: + - 'bzzz.deepblack.cloud' + - 'mcp.deepblack.cloud' + - 'resolve.deepblack.cloud' + - 'openai.deepblack.cloud' + metrics_path: /health + scrape_interval: 60s + scrape_timeout: 10s + +# Remote write configuration for long-term storage (optional) +# remote_write: +# - url: "https://prometheus-remote-write.example.com/api/v1/write" +# basic_auth: +# username: "bzzz-cluster" +# password_file: "/etc/prometheus/remote-write-password" \ No newline at end of file diff --git a/infrastructure/monitoring/docker-compose.monitoring.yml b/infrastructure/monitoring/docker-compose.monitoring.yml new file mode 100644 index 00000000..d209a3c2 --- /dev/null +++ b/infrastructure/monitoring/docker-compose.monitoring.yml @@ -0,0 +1,372 @@ +version: '3.8' + +services: + # Prometheus for metrics collection + prometheus: + image: prom/prometheus:v2.48.0 + networks: + - tengig + - monitoring + ports: + - "9090:9090" + volumes: + - /rust/bzzz-v2/config/prometheus:/etc/prometheus:ro + - /rust/bzzz-v2/data/prometheus:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--storage.tsdb.retention.time=30d' + - '--storage.tsdb.retention.size=50GB' + - '--web.console.libraries=/etc/prometheus/console_libraries' + - '--web.console.templates=/etc/prometheus/consoles' + - '--web.enable-lifecycle' + - '--web.external-url=https://prometheus.deepblack.cloud' + configs: + - source: prometheus_config + target: /etc/prometheus/prometheus.yml + - source: prometheus_rules + target: /etc/prometheus/rules.yml + deploy: + replicas: 1 + placement: + constraints: + - node.hostname == walnut + resources: + limits: + memory: 4G + cpus: '2.0' + reservations: + memory: 2G + cpus: '1.0' + labels: + - "traefik.enable=true" + - "traefik.http.routers.prometheus.rule=Host(`prometheus.deepblack.cloud`)" + - "traefik.http.services.prometheus.loadbalancer.server.port=9090" + - "traefik.http.routers.prometheus.tls=true" + + # Grafana for visualization + grafana: + image: grafana/grafana:10.2.0 + networks: + - tengig + - monitoring + ports: + - "3000:3000" + environment: + - GF_SECURITY_ADMIN_PASSWORD__FILE=/run/secrets/grafana_admin_password + - GF_SERVER_ROOT_URL=https://grafana.deepblack.cloud + - GF_SERVER_DOMAIN=grafana.deepblack.cloud + - GF_ANALYTICS_REPORTING_ENABLED=false + - GF_ANALYTICS_CHECK_FOR_UPDATES=false + - GF_USERS_ALLOW_SIGN_UP=false + - GF_INSTALL_PLUGINS=grafana-piechart-panel,grafana-worldmap-panel + volumes: + - /rust/bzzz-v2/data/grafana:/var/lib/grafana + - /rust/bzzz-v2/config/grafana/provisioning:/etc/grafana/provisioning:ro + secrets: + - grafana_admin_password + deploy: + replicas: 1 + placement: + constraints: + - node.hostname == walnut + resources: + limits: + memory: 2G + cpus: '1.0' + reservations: + memory: 1G + cpus: '0.5' + labels: + - "traefik.enable=true" + - "traefik.http.routers.grafana.rule=Host(`grafana.deepblack.cloud`)" + - "traefik.http.services.grafana.loadbalancer.server.port=3000" + - "traefik.http.routers.grafana.tls=true" + + # AlertManager for alerting + alertmanager: + image: prom/alertmanager:v0.26.0 + networks: + - tengig + - monitoring + ports: + - "9093:9093" + volumes: + - /rust/bzzz-v2/data/alertmanager:/alertmanager + command: + - '--config.file=/etc/alertmanager/config.yml' + - '--storage.path=/alertmanager' + - '--web.external-url=https://alerts.deepblack.cloud' + configs: + - source: alertmanager_config + target: /etc/alertmanager/config.yml + deploy: + replicas: 1 + placement: + constraints: + - node.hostname == ironwood + resources: + limits: + memory: 1G + cpus: '0.5' + reservations: + memory: 512M + cpus: '0.25' + labels: + - "traefik.enable=true" + - "traefik.http.routers.alertmanager.rule=Host(`alerts.deepblack.cloud`)" + - "traefik.http.services.alertmanager.loadbalancer.server.port=9093" + - "traefik.http.routers.alertmanager.tls=true" + + # Node Exporter for system metrics + node-exporter: + image: prom/node-exporter:v1.6.1 + networks: + - monitoring + ports: + - "9100:9100" + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + - /etc/hostname:/etc/nodename:ro + command: + - '--path.procfs=/host/proc' + - '--path.rootfs=/rootfs' + - '--path.sysfs=/host/sys' + - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)' + - '--collector.textfile.directory=/var/lib/node_exporter/textfile_collector' + deploy: + mode: global + resources: + limits: + memory: 256M + cpus: '0.5' + reservations: + memory: 128M + cpus: '0.25' + + # cAdvisor for container metrics + cadvisor: + image: gcr.io/cadvisor/cadvisor:v0.47.0 + networks: + - monitoring + ports: + - "8080:8080" + volumes: + - /:/rootfs:ro + - /var/run:/var/run:rw + - /sys:/sys:ro + - /var/lib/docker:/var/lib/docker:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + command: + - '--housekeeping_interval=10s' + - '--docker_only=true' + - '--disable_metrics=percpu,process,sched,tcp,udp,disk,diskIO,accelerator,hugetlb,referenced_memory,cpu_topology,resctrl' + deploy: + mode: global + resources: + limits: + memory: 512M + cpus: '0.5' + reservations: + memory: 256M + cpus: '0.25' + + # BZZZ P2P Metrics Exporter + bzzz-p2p-exporter: + image: registry.home.deepblack.cloud/bzzz/p2p-exporter:v2.0.0 + networks: + - monitoring + - bzzz-internal + ports: + - "9200:9200" + environment: + - BZZZ_AGENT_ENDPOINTS=http://bzzz-v2_bzzz-agent:9000 + - DHT_BOOTSTRAP_NODES=walnut:9101,ironwood:9102,acacia:9103 + - METRICS_PORT=9200 + - SCRAPE_INTERVAL=30s + deploy: + replicas: 1 + placement: + constraints: + - node.hostname == acacia + resources: + limits: + memory: 512M + cpus: '0.5' + + # DHT Network Monitor + dht-monitor: + image: registry.home.deepblack.cloud/bzzz/dht-monitor:v2.0.0 + networks: + - monitoring + - bzzz-internal + ports: + - "9201:9201" + environment: + - DHT_BOOTSTRAP_NODES=walnut:9101,ironwood:9102,acacia:9103 + - MONITOR_PORT=9201 + - PEER_CHECK_INTERVAL=60s + deploy: + replicas: 1 + resources: + limits: + memory: 256M + cpus: '0.25' + + # Content Store Monitor + content-monitor: + image: registry.home.deepblack.cloud/bzzz/content-monitor:v2.0.0 + networks: + - monitoring + - bzzz-internal + ports: + - "9202:9202" + environment: + - CONTENT_STORE_PATH=/rust/bzzz-v2/data/blobs + - MONITOR_PORT=9202 + - CHECK_INTERVAL=300s + volumes: + - /rust/bzzz-v2/data/blobs:/data/blobs:ro + deploy: + replicas: 1 + resources: + limits: + memory: 256M + cpus: '0.25' + + # OpenAI Cost Monitor + openai-cost-monitor: + image: registry.home.deepblack.cloud/bzzz/openai-cost-monitor:v2.0.0 + networks: + - monitoring + - bzzz-internal + ports: + - "9203:9203" + environment: + - POSTGRES_HOST=bzzz-v2_postgres + - POSTGRES_DB=bzzz_v2 + - POSTGRES_USER=bzzz + - MONITOR_PORT=9203 + - COST_ALERT_THRESHOLD=100.00 + secrets: + - postgres_password + deploy: + replicas: 1 + resources: + limits: + memory: 256M + cpus: '0.25' + + # Log aggregation with Loki + loki: + image: grafana/loki:2.9.0 + networks: + - monitoring + ports: + - "3100:3100" + volumes: + - /rust/bzzz-v2/data/loki:/loki + command: -config.file=/etc/loki/local-config.yaml + configs: + - source: loki_config + target: /etc/loki/local-config.yaml + deploy: + replicas: 1 + placement: + constraints: + - node.hostname == acacia + resources: + limits: + memory: 2G + cpus: '1.0' + reservations: + memory: 1G + cpus: '0.5' + + # Promtail for log shipping + promtail: + image: grafana/promtail:2.9.0 + networks: + - monitoring + volumes: + - /var/log:/var/log:ro + - /var/lib/docker/containers:/var/lib/docker/containers:ro + - /rust/bzzz-v2/logs:/app/logs:ro + command: -config.file=/etc/promtail/config.yml + configs: + - source: promtail_config + target: /etc/promtail/config.yml + deploy: + mode: global + resources: + limits: + memory: 256M + cpus: '0.25' + + # Jaeger for distributed tracing + jaeger: + image: jaegertracing/all-in-one:1.49 + networks: + - tengig + - monitoring + ports: + - "16686:16686" + - "14268:14268" + environment: + - COLLECTOR_OTLP_ENABLED=true + - SPAN_STORAGE_TYPE=badger + - BADGER_EPHEMERAL=false + - BADGER_DIRECTORY_VALUE=/badger/data + - BADGER_DIRECTORY_KEY=/badger/key + volumes: + - /rust/bzzz-v2/data/jaeger:/badger + deploy: + replicas: 1 + placement: + constraints: + - node.hostname == ironwood + resources: + limits: + memory: 1G + cpus: '0.5' + labels: + - "traefik.enable=true" + - "traefik.http.routers.jaeger.rule=Host(`tracing.deepblack.cloud`)" + - "traefik.http.services.jaeger.loadbalancer.server.port=16686" + - "traefik.http.routers.jaeger.tls=true" + +networks: + tengig: + external: true + monitoring: + driver: overlay + attachable: true + bzzz-internal: + external: true + +secrets: + grafana_admin_password: + external: true + name: bzzz_grafana_admin_password + postgres_password: + external: true + name: bzzz_postgres_password + +configs: + prometheus_config: + external: true + name: bzzz_prometheus_config + prometheus_rules: + external: true + name: bzzz_prometheus_rules + alertmanager_config: + external: true + name: bzzz_alertmanager_config + loki_config: + external: true + name: bzzz_loki_config + promtail_config: + external: true + name: bzzz_promtail_config \ No newline at end of file diff --git a/infrastructure/security/network-policy.yaml b/infrastructure/security/network-policy.yaml new file mode 100644 index 00000000..8ea2e2e0 --- /dev/null +++ b/infrastructure/security/network-policy.yaml @@ -0,0 +1,335 @@ +# Kubernetes Network Policy for BZZZ v2 (if migrating to K8s later) +# Currently using Docker Swarm, but this provides a template for K8s migration + +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: bzzz-v2-network-policy + namespace: bzzz-v2 +spec: + podSelector: {} + policyTypes: + - Ingress + - Egress + + # Default deny all ingress and egress + ingress: [] + egress: [] + +--- +# Allow internal cluster communication +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: bzzz-internal-communication + namespace: bzzz-v2 +spec: + podSelector: + matchLabels: + app: bzzz-agent + policyTypes: + - Ingress + - Egress + ingress: + - from: + - namespaceSelector: + matchLabels: + name: bzzz-v2 + ports: + - protocol: TCP + port: 9000 + - protocol: UDP + port: 9000 + egress: + - to: + - namespaceSelector: + matchLabels: + name: bzzz-v2 + ports: + - protocol: TCP + port: 9000 + - protocol: UDP + port: 9000 + +--- +# DHT Bootstrap Network Policy +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: dht-bootstrap-policy + namespace: bzzz-v2 +spec: + podSelector: + matchLabels: + app: dht-bootstrap + policyTypes: + - Ingress + - Egress + ingress: + - from: + - namespaceSelector: + matchLabels: + name: bzzz-v2 + ports: + - protocol: TCP + port: 9101 + - protocol: TCP + port: 9102 + - protocol: TCP + port: 9103 + egress: + - to: + - namespaceSelector: + matchLabels: + name: bzzz-v2 + ports: + - protocol: TCP + port: 9101 + - protocol: TCP + port: 9102 + - protocol: TCP + port: 9103 + +--- +# MCP Server Network Policy +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: mcp-server-policy + namespace: bzzz-v2 +spec: + podSelector: + matchLabels: + app: mcp-server + policyTypes: + - Ingress + - Egress + ingress: + - from: + - namespaceSelector: + matchLabels: + name: traefik + ports: + - protocol: TCP + port: 3001 + - from: + - podSelector: + matchLabels: + app: bzzz-agent + ports: + - protocol: TCP + port: 3001 + egress: + - to: + - podSelector: + matchLabels: + app: bzzz-agent + ports: + - protocol: TCP + port: 9000 + +--- +# OpenAI Proxy Network Policy +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: openai-proxy-policy + namespace: bzzz-v2 +spec: + podSelector: + matchLabels: + app: openai-proxy + policyTypes: + - Ingress + - Egress + ingress: + - from: + - namespaceSelector: + matchLabels: + name: traefik + ports: + - protocol: TCP + port: 3002 + - from: + - podSelector: + matchLabels: + app: bzzz-agent + ports: + - protocol: TCP + port: 3002 + egress: + # Allow outbound to OpenAI API + - to: [] + ports: + - protocol: TCP + port: 443 + # Allow access to Redis and PostgreSQL + - to: + - podSelector: + matchLabels: + app: redis + ports: + - protocol: TCP + port: 6379 + - to: + - podSelector: + matchLabels: + app: postgres + ports: + - protocol: TCP + port: 5432 + +--- +# Content Resolver Network Policy +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: content-resolver-policy + namespace: bzzz-v2 +spec: + podSelector: + matchLabels: + app: content-resolver + policyTypes: + - Ingress + - Egress + ingress: + - from: + - namespaceSelector: + matchLabels: + name: traefik + ports: + - protocol: TCP + port: 3003 + - from: + - podSelector: + matchLabels: + app: bzzz-agent + ports: + - protocol: TCP + port: 3003 + egress: + - to: + - podSelector: + matchLabels: + app: dht-bootstrap + ports: + - protocol: TCP + port: 9101 + - protocol: TCP + port: 9102 + - protocol: TCP + port: 9103 + +--- +# Database Network Policy +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: postgres-policy + namespace: bzzz-v2 +spec: + podSelector: + matchLabels: + app: postgres + policyTypes: + - Ingress + ingress: + - from: + - podSelector: + matchLabels: + app: bzzz-agent + - podSelector: + matchLabels: + app: openai-proxy + - podSelector: + matchLabels: + app: conversation-manager + - podSelector: + matchLabels: + app: openai-cost-monitor + ports: + - protocol: TCP + port: 5432 + +--- +# Redis Network Policy +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: redis-policy + namespace: bzzz-v2 +spec: + podSelector: + matchLabels: + app: redis + policyTypes: + - Ingress + ingress: + - from: + - podSelector: + matchLabels: + app: bzzz-agent + - podSelector: + matchLabels: + app: openai-proxy + ports: + - protocol: TCP + port: 6379 + +--- +# Monitoring Network Policy +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: monitoring-policy + namespace: bzzz-v2 +spec: + podSelector: + matchLabels: + monitoring: "true" + policyTypes: + - Ingress + - Egress + ingress: + - from: + - namespaceSelector: + matchLabels: + name: monitoring + - namespaceSelector: + matchLabels: + name: traefik + ports: + - protocol: TCP + port: 9090 + - protocol: TCP + port: 3000 + - protocol: TCP + port: 9093 + egress: + # Allow monitoring to scrape all services + - to: + - namespaceSelector: + matchLabels: + name: bzzz-v2 + ports: + - protocol: TCP + port: 9000 + - protocol: TCP + port: 3001 + - protocol: TCP + port: 3002 + - protocol: TCP + port: 3003 + - protocol: TCP + port: 9100 + - protocol: TCP + port: 8080 + - protocol: TCP + port: 9200 + - protocol: TCP + port: 9201 + - protocol: TCP + port: 9202 + - protocol: TCP + port: 9203 \ No newline at end of file diff --git a/infrastructure/security/security-hardening.sh b/infrastructure/security/security-hardening.sh new file mode 100755 index 00000000..95685ee8 --- /dev/null +++ b/infrastructure/security/security-hardening.sh @@ -0,0 +1,675 @@ +#!/bin/bash +# BZZZ v2 Security Hardening Script +# Applies comprehensive security configurations for the cluster + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LOG_FILE="/var/log/bzzz-security-hardening-$(date +%Y%m%d-%H%M%S).log" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +log() { + echo -e "${BLUE}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1" | tee -a "$LOG_FILE" +} + +error() { + echo -e "${RED}[ERROR]${NC} $1" | tee -a "$LOG_FILE" + exit 1 +} + +warn() { + echo -e "${YELLOW}[WARN]${NC} $1" | tee -a "$LOG_FILE" +} + +success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" | tee -a "$LOG_FILE" +} + +check_root() { + if [[ $EUID -eq 0 ]]; then + error "This script should not be run as root. Run as tony user with sudo access." + fi +} + +configure_firewall() { + log "Configuring UFW firewall for BZZZ v2..." + + # Enable UFW if not enabled + sudo ufw --force enable + + # Default policies + sudo ufw default deny incoming + sudo ufw default allow outgoing + + # SSH access + sudo ufw allow ssh + + # Docker Swarm ports (internal cluster only) + sudo ufw allow from 192.168.1.0/24 to any port 2376 proto tcp comment "Docker daemon TLS" + sudo ufw allow from 192.168.1.0/24 to any port 2377 proto tcp comment "Docker Swarm management" + sudo ufw allow from 192.168.1.0/24 to any port 7946 proto tcp comment "Docker Swarm node communication" + sudo ufw allow from 192.168.1.0/24 to any port 7946 proto udp comment "Docker Swarm node communication" + sudo ufw allow from 192.168.1.0/24 to any port 4789 proto udp comment "Docker Swarm overlay networks" + + # BZZZ v2 P2P ports (internal cluster only) + sudo ufw allow from 192.168.1.0/24 to any port 9000:9300 proto tcp comment "BZZZ v2 P2P" + sudo ufw allow from 192.168.1.0/24 to any port 9000:9300 proto udp comment "BZZZ v2 P2P" + + # DHT bootstrap ports + sudo ufw allow from 192.168.1.0/24 to any port 9101:9103 proto tcp comment "BZZZ DHT Bootstrap" + + # mDNS discovery (local network only) + sudo ufw allow from 192.168.1.0/24 to any port 5353 proto udp comment "mDNS discovery" + + # HTTP/HTTPS through Traefik (external access) + sudo ufw allow 80/tcp comment "HTTP" + sudo ufw allow 443/tcp comment "HTTPS" + + # Internal service ports (cluster only) + sudo ufw allow from 192.168.1.0/24 to any port 3000:3100 proto tcp comment "BZZZ v2 services" + sudo ufw allow from 192.168.1.0/24 to any port 5432 proto tcp comment "PostgreSQL" + sudo ufw allow from 192.168.1.0/24 to any port 6379 proto tcp comment "Redis" + + # Monitoring ports (cluster only) + sudo ufw allow from 192.168.1.0/24 to any port 9090:9203 proto tcp comment "Monitoring" + + # Rate limiting rules + sudo ufw limit ssh comment "Rate limit SSH" + + # Log denied connections + sudo ufw logging on + + success "Firewall configured successfully" +} + +configure_docker_security() { + log "Configuring Docker security..." + + # Create Docker daemon configuration + sudo mkdir -p /etc/docker + + cat << 'EOF' | sudo tee /etc/docker/daemon.json > /dev/null +{ + "log-driver": "json-file", + "log-opts": { + "max-size": "100m", + "max-file": "3" + }, + "live-restore": true, + "userland-proxy": false, + "icc": false, + "userns-remap": "default", + "no-new-privileges": true, + "seccomp-profile": "/etc/docker/seccomp-default.json", + "apparmor-profile": "docker-default", + "storage-driver": "overlay2", + "storage-opts": [ + "overlay2.override_kernel_check=true" + ], + "default-ulimits": { + "nofile": { + "Name": "nofile", + "Hard": 65536, + "Soft": 65536 + } + }, + "registry-mirrors": ["https://registry.home.deepblack.cloud"], + "insecure-registries": ["registry.home.deepblack.cloud:5000"], + "features": { + "buildkit": true + } +} +EOF + + # Create custom seccomp profile + cat << 'EOF' | sudo tee /etc/docker/seccomp-default.json > /dev/null +{ + "defaultAction": "SCMP_ACT_ERRNO", + "architectures": [ + "SCMP_ARCH_X86_64", + "SCMP_ARCH_X86", + "SCMP_ARCH_X32" + ], + "syscalls": [ + { + "names": [ + "accept", + "access", + "arch_prctl", + "bind", + "brk", + "chdir", + "chmod", + "chown", + "clone", + "close", + "connect", + "dup", + "dup2", + "epoll_create", + "epoll_ctl", + "epoll_wait", + "execve", + "exit", + "exit_group", + "fcntl", + "fstat", + "futex", + "getcwd", + "getdents", + "getgid", + "getpid", + "getppid", + "gettid", + "getuid", + "listen", + "lstat", + "mmap", + "mprotect", + "munmap", + "nanosleep", + "open", + "openat", + "pipe", + "poll", + "prctl", + "read", + "readlink", + "recv", + "recvfrom", + "rt_sigaction", + "rt_sigprocmask", + "rt_sigreturn", + "sched_yield", + "send", + "sendto", + "set_robust_list", + "setsockopt", + "socket", + "stat", + "write" + ], + "action": "SCMP_ACT_ALLOW" + } + ] +} +EOF + + # Restart Docker to apply changes + sudo systemctl daemon-reload + sudo systemctl restart docker + + success "Docker security configuration applied" +} + +setup_tls_certificates() { + log "Setting up TLS certificates..." + + # Create certificates directory + mkdir -p /rust/bzzz-v2/config/tls/{ca,server,client} + + # Generate CA key and certificate + if [[ ! -f /rust/bzzz-v2/config/tls/ca/ca-key.pem ]]; then + openssl genrsa -out /rust/bzzz-v2/config/tls/ca/ca-key.pem 4096 + openssl req -new -x509 -days 3650 -key /rust/bzzz-v2/config/tls/ca/ca-key.pem \ + -out /rust/bzzz-v2/config/tls/ca/ca.pem \ + -subj "/C=US/ST=Cloud/L=DeepBlack/O=BZZZ/CN=bzzz-ca" + + log "Generated new CA certificate" + fi + + # Generate server certificates for each node + local nodes=("walnut" "ironwood" "acacia") + for node in "${nodes[@]}"; do + if [[ ! -f "/rust/bzzz-v2/config/tls/server/${node}-key.pem" ]]; then + # Generate server key + openssl genrsa -out "/rust/bzzz-v2/config/tls/server/${node}-key.pem" 4096 + + # Generate server certificate request + openssl req -new -key "/rust/bzzz-v2/config/tls/server/${node}-key.pem" \ + -out "/rust/bzzz-v2/config/tls/server/${node}.csr" \ + -subj "/C=US/ST=Cloud/L=DeepBlack/O=BZZZ/CN=${node}.deepblack.cloud" + + # Create extensions file + cat > "/rust/bzzz-v2/config/tls/server/${node}-ext.cnf" << EOF +subjectAltName = DNS:${node}.deepblack.cloud,DNS:${node},DNS:localhost,IP:127.0.0.1,IP:192.168.1.27 +extendedKeyUsage = serverAuth,clientAuth +EOF + + # Generate server certificate + openssl x509 -req -days 365 -in "/rust/bzzz-v2/config/tls/server/${node}.csr" \ + -CA /rust/bzzz-v2/config/tls/ca/ca.pem \ + -CAkey /rust/bzzz-v2/config/tls/ca/ca-key.pem \ + -out "/rust/bzzz-v2/config/tls/server/${node}.pem" \ + -extensions v3_req -extfile "/rust/bzzz-v2/config/tls/server/${node}-ext.cnf" \ + -CAcreateserial + + # Clean up CSR and extensions file + rm "/rust/bzzz-v2/config/tls/server/${node}.csr" "/rust/bzzz-v2/config/tls/server/${node}-ext.cnf" + + log "Generated TLS certificate for $node" + fi + done + + # Generate client certificates for inter-service communication + if [[ ! -f /rust/bzzz-v2/config/tls/client/client-key.pem ]]; then + openssl genrsa -out /rust/bzzz-v2/config/tls/client/client-key.pem 4096 + openssl req -new -key /rust/bzzz-v2/config/tls/client/client-key.pem \ + -out /rust/bzzz-v2/config/tls/client/client.csr \ + -subj "/C=US/ST=Cloud/L=DeepBlack/O=BZZZ/CN=bzzz-client" + + openssl x509 -req -days 365 -in /rust/bzzz-v2/config/tls/client/client.csr \ + -CA /rust/bzzz-v2/config/tls/ca/ca.pem \ + -CAkey /rust/bzzz-v2/config/tls/ca/ca-key.pem \ + -out /rust/bzzz-v2/config/tls/client/client.pem \ + -CAcreateserial + + rm /rust/bzzz-v2/config/tls/client/client.csr + + log "Generated client certificate" + fi + + # Set appropriate permissions + chmod -R 600 /rust/bzzz-v2/config/tls + chmod 755 /rust/bzzz-v2/config/tls /rust/bzzz-v2/config/tls/{ca,server,client} + + success "TLS certificates configured" +} + +configure_secrets_management() { + log "Configuring secrets management..." + + # Create secrets directory with restricted permissions + mkdir -p /rust/bzzz-v2/config/secrets + chmod 700 /rust/bzzz-v2/config/secrets + + # Generate random secrets if they don't exist + local secrets=( + "postgres_password" + "redis_password" + "grafana_admin_password" + "prometheus_web_password" + "alertmanager_web_password" + ) + + for secret in "${secrets[@]}"; do + local secret_file="/rust/bzzz-v2/config/secrets/${secret}" + if [[ ! -f "$secret_file" ]]; then + openssl rand -base64 32 > "$secret_file" + chmod 600 "$secret_file" + log "Generated secret: $secret" + fi + done + + # Create Docker secrets + for secret in "${secrets[@]}"; do + local secret_file="/rust/bzzz-v2/config/secrets/${secret}" + if docker secret inspect "bzzz_${secret}" >/dev/null 2>&1; then + log "Docker secret bzzz_${secret} already exists" + else + docker secret create "bzzz_${secret}" "$secret_file" + log "Created Docker secret: bzzz_${secret}" + fi + done + + # Handle OpenAI API key if it exists + local openai_key_file="/home/tony/chorus/business/secrets/openai-api-key" + if [[ -f "$openai_key_file" ]]; then + if ! docker secret inspect bzzz_openai_api_key >/dev/null 2>&1; then + docker secret create bzzz_openai_api_key "$openai_key_file" + log "Created OpenAI API key secret" + fi + else + warn "OpenAI API key not found at $openai_key_file" + fi + + success "Secrets management configured" +} + +setup_network_security() { + log "Setting up network security..." + + # Configure iptables rules for container isolation + cat << 'EOF' | sudo tee /etc/iptables/rules.v4 > /dev/null +*filter +:INPUT ACCEPT [0:0] +:FORWARD DROP [0:0] +:OUTPUT ACCEPT [0:0] +:DOCKER-USER - [0:0] + +# Allow established connections +-A INPUT -m state --state ESTABLISHED,RELATED -j ACCEPT + +# Allow loopback +-A INPUT -i lo -j ACCEPT + +# Allow SSH (with rate limiting) +-A INPUT -p tcp --dport 22 -m state --state NEW -m recent --set +-A INPUT -p tcp --dport 22 -m state --state NEW -m recent --update --seconds 60 --hitcount 4 -j DROP +-A INPUT -p tcp --dport 22 -j ACCEPT + +# Allow HTTP/HTTPS +-A INPUT -p tcp --dport 80 -j ACCEPT +-A INPUT -p tcp --dport 443 -j ACCEPT + +# Allow Docker Swarm (internal network only) +-A INPUT -s 192.168.1.0/24 -p tcp --dport 2376 -j ACCEPT +-A INPUT -s 192.168.1.0/24 -p tcp --dport 2377 -j ACCEPT +-A INPUT -s 192.168.1.0/24 -p tcp --dport 7946 -j ACCEPT +-A INPUT -s 192.168.1.0/24 -p udp --dport 7946 -j ACCEPT +-A INPUT -s 192.168.1.0/24 -p udp --dport 4789 -j ACCEPT + +# Allow BZZZ P2P (internal network only) +-A INPUT -s 192.168.1.0/24 -p tcp --dport 9000:9300 -j ACCEPT +-A INPUT -s 192.168.1.0/24 -p udp --dport 9000:9300 -j ACCEPT + +# Block container-to-host access except for specific services +-A DOCKER-USER -i docker_gwbridge -j ACCEPT +-A DOCKER-USER -i docker0 -j ACCEPT +-A DOCKER-USER -j DROP + +# Drop everything else +-A INPUT -j DROP + +COMMIT +EOF + + # Apply iptables rules + sudo iptables-restore < /etc/iptables/rules.v4 + + # Enable IP forwarding for Docker + echo 'net.ipv4.ip_forward=1' | sudo tee -a /etc/sysctl.conf + echo 'net.ipv6.conf.all.forwarding=1' | sudo tee -a /etc/sysctl.conf + + # Kernel security parameters + cat << 'EOF' | sudo tee -a /etc/sysctl.conf > /dev/null + +# BZZZ v2 Security Parameters +net.ipv4.conf.all.rp_filter=1 +net.ipv4.conf.default.rp_filter=1 +net.ipv4.icmp_echo_ignore_broadcasts=1 +net.ipv4.icmp_ignore_bogus_error_responses=1 +net.ipv4.tcp_syncookies=1 +net.ipv4.conf.all.log_martians=1 +net.ipv4.conf.default.log_martians=1 +net.ipv4.conf.all.accept_source_route=0 +net.ipv4.conf.default.accept_source_route=0 +net.ipv6.conf.all.accept_source_route=0 +net.ipv6.conf.default.accept_source_route=0 +net.ipv4.conf.all.accept_redirects=0 +net.ipv4.conf.default.accept_redirects=0 +net.ipv6.conf.all.accept_redirects=0 +net.ipv6.conf.default.accept_redirects=0 +net.ipv4.conf.all.secure_redirects=0 +net.ipv4.conf.default.secure_redirects=0 +net.ipv4.conf.all.send_redirects=0 +net.ipv4.conf.default.send_redirects=0 + +# Kernel hardening +kernel.dmesg_restrict=1 +kernel.kptr_restrict=2 +kernel.yama.ptrace_scope=1 +fs.suid_dumpable=0 +kernel.core_uses_pid=1 +EOF + + # Apply sysctl settings + sudo sysctl -p + + success "Network security configured" +} + +configure_audit_logging() { + log "Configuring audit logging..." + + # Install auditd if not present + if ! command -v auditctl &> /dev/null; then + sudo apt-get update + sudo apt-get install -y auditd audispd-plugins + fi + + # Configure audit rules + cat << 'EOF' | sudo tee /etc/audit/rules.d/bzzz-v2.rules > /dev/null +# BZZZ v2 Audit Rules + +# Monitor file changes in sensitive directories +-w /etc/docker/ -p wa -k docker-config +-w /rust/bzzz-v2/config/secrets/ -p wa -k bzzz-secrets +-w /rust/bzzz-v2/config/tls/ -p wa -k bzzz-tls +-w /etc/ssl/ -p wa -k ssl-config + +# Monitor process execution +-a always,exit -F arch=b64 -S execve -k process-execution +-a always,exit -F arch=b32 -S execve -k process-execution + +# Monitor network connections +-a always,exit -F arch=b64 -S socket -k network-socket +-a always,exit -F arch=b32 -S socket -k network-socket + +# Monitor file permission changes +-a always,exit -F arch=b64 -S chmod,fchmod,fchmodat -k file-permissions +-a always,exit -F arch=b32 -S chmod,fchmod,fchmodat -k file-permissions + +# Monitor privilege escalation +-w /usr/bin/sudo -p x -k privilege-escalation +-w /bin/su -p x -k privilege-escalation + +# Monitor Docker daemon +-w /var/lib/docker/ -p wa -k docker-data +-w /usr/bin/docker -p x -k docker-exec +-w /usr/bin/dockerd -p x -k docker-daemon + +# Make rules immutable +-e 2 +EOF + + # Restart auditd to apply rules + sudo systemctl restart auditd + + # Configure log rotation for audit logs + cat << 'EOF' | sudo tee /etc/logrotate.d/bzzz-audit > /dev/null +/var/log/audit/*.log { + daily + rotate 30 + compress + delaycompress + missingok + notifempty + create 640 root adm + postrotate + /sbin/service auditd restart > /dev/null 2>&1 || true + endscript +} +EOF + + success "Audit logging configured" +} + +setup_intrusion_detection() { + log "Setting up intrusion detection..." + + # Install fail2ban if not present + if ! command -v fail2ban-server &> /dev/null; then + sudo apt-get update + sudo apt-get install -y fail2ban + fi + + # Configure fail2ban for BZZZ v2 + cat << 'EOF' | sudo tee /etc/fail2ban/jail.d/bzzz-v2.conf > /dev/null +[DEFAULT] +bantime = 3600 +findtime = 600 +maxretry = 5 +backend = systemd + +[sshd] +enabled = true +port = ssh +filter = sshd +logpath = /var/log/auth.log +maxretry = 3 +bantime = 7200 + +[docker-auth] +enabled = true +port = 2376 +filter = docker-auth +logpath = /var/log/audit/audit.log +maxretry = 3 +bantime = 3600 + +[bzzz-p2p] +enabled = true +port = 9000:9300 +filter = bzzz-p2p +logpath = /rust/bzzz-v2/logs/application/bzzz-agent.log +maxretry = 10 +bantime = 1800 + +[traefik-auth] +enabled = true +port = http,https +filter = traefik-auth +logpath = /var/log/traefik/access.log +maxretry = 5 +bantime = 3600 +EOF + + # Create custom fail2ban filters + cat << 'EOF' | sudo tee /etc/fail2ban/filter.d/docker-auth.conf > /dev/null +[Definition] +failregex = ^.*type=SYSCALL.*comm="dockerd".*res=failed.*$ +ignoreregex = +EOF + + cat << 'EOF' | sudo tee /etc/fail2ban/filter.d/bzzz-p2p.conf > /dev/null +[Definition] +failregex = ^.*level=error.*msg="unauthorized connection attempt".*peer=.*$ + ^.*level=warn.*msg="rate limit exceeded".*source=.*$ +ignoreregex = +EOF + + cat << 'EOF' | sudo tee /etc/fail2ban/filter.d/traefik-auth.conf > /dev/null +[Definition] +failregex = ^.*"(GET|POST|PUT|DELETE).*" (401|403) .*$ +ignoreregex = +EOF + + # Start and enable fail2ban + sudo systemctl enable fail2ban + sudo systemctl start fail2ban + + success "Intrusion detection configured" +} + +configure_container_security() { + log "Configuring container security policies..." + + # Create AppArmor profile for BZZZ containers + cat << 'EOF' | sudo tee /etc/apparmor.d/bzzz-container > /dev/null +#include + +profile bzzz-container flags=(attach_disconnected,mediate_deleted) { + #include + + capability, + file, + network, + + deny @{PROC}/* w, + deny @{PROC}/sys/fs/** w, + deny @{PROC}/sysrq-trigger rwklx, + deny @{PROC}/mem rwklx, + deny @{PROC}/kmem rwklx, + deny @{PROC}/sys/kernel/[^s][^h][^m]* w, + deny mount, + deny /sys/[^f]** wklx, + deny /sys/f[^s]** wklx, + deny /sys/fs/[^c]** wklx, + deny /sys/fs/c[^g]** wklx, + deny /sys/fs/cg[^r]** wklx, + deny /sys/firmware/** rwklx, + deny /sys/kernel/security/** rwklx, + + # Allow access to application directories + /app/** r, + /app/bzzz rix, + /data/** rw, + /config/** r, + + # Allow temporary files + /tmp/** rw, + + # Network access + network inet, + network inet6, + network unix, +} +EOF + + # Load AppArmor profile + sudo apparmor_parser -r /etc/apparmor.d/bzzz-container + + # Create seccomp profile for BZZZ containers + mkdir -p /rust/bzzz-v2/config/security + cat << 'EOF' > /rust/bzzz-v2/config/security/bzzz-seccomp.json +{ + "defaultAction": "SCMP_ACT_ERRNO", + "architectures": [ + "SCMP_ARCH_X86_64", + "SCMP_ARCH_X86", + "SCMP_ARCH_X32" + ], + "syscalls": [ + { + "names": [ + "accept", "access", "arch_prctl", "bind", "brk", + "chdir", "chmod", "chown", "clone", "close", + "connect", "dup", "dup2", "epoll_create", "epoll_ctl", + "epoll_wait", "execve", "exit", "exit_group", "fcntl", + "fstat", "futex", "getcwd", "getdents", "getgid", + "getpid", "getppid", "gettid", "getuid", "listen", + "lstat", "mmap", "mprotect", "munmap", "nanosleep", + "open", "openat", "pipe", "poll", "prctl", + "read", "readlink", "recv", "recvfrom", "rt_sigaction", + "rt_sigprocmask", "rt_sigreturn", "sched_yield", "send", + "sendto", "set_robust_list", "setsockopt", "socket", + "stat", "write" + ], + "action": "SCMP_ACT_ALLOW" + } + ] +} +EOF + + success "Container security policies configured" +} + +main() { + log "Starting BZZZ v2 security hardening..." + + check_root + configure_firewall + configure_docker_security + setup_tls_certificates + configure_secrets_management + setup_network_security + configure_audit_logging + setup_intrusion_detection + configure_container_security + + success "BZZZ v2 security hardening completed successfully!" + log "Security configuration saved to: $LOG_FILE" + log "Review firewall rules: sudo ufw status verbose" + log "Check fail2ban status: sudo fail2ban-client status" + log "Verify audit rules: sudo auditctl -l" +} + +# Execute main function +main "$@" \ No newline at end of file diff --git a/logging/hypercore.go b/logging/hypercore.go index b3f93e7b..17a86cfb 100644 --- a/logging/hypercore.go +++ b/logging/hypercore.go @@ -47,7 +47,7 @@ const ( TaskCompleted LogType = "task_completed" TaskFailed LogType = "task_failed" - // Antennae meta-discussion logs + // HMMM meta-discussion logs PlanProposed LogType = "plan_proposed" ObjectionRaised LogType = "objection_raised" Collaboration LogType = "collaboration" diff --git a/main.go b/main.go index 0712f7eb..c34e6200 100644 --- a/main.go +++ b/main.go @@ -59,7 +59,7 @@ func main() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - fmt.Println("πŸš€ Starting Bzzz + Antennae P2P Task Coordination System...") + fmt.Println("πŸš€ Starting Bzzz + HMMM P2P Task Coordination System...") // Load configuration cfg, err := config.LoadConfig("") @@ -129,7 +129,7 @@ func main() { defer mdnsDiscovery.Close() // Initialize PubSub with hypercore logging - ps, err := pubsub.NewPubSubWithLogger(ctx, node.Host(), "bzzz/coordination/v1", "antennae/meta-discussion/v1", hlog) + ps, err := pubsub.NewPubSubWithLogger(ctx, node.Host(), "bzzz/coordination/v1", "hmmm/meta-discussion/v1", hlog) if err != nil { log.Fatalf("Failed to create PubSub: %v", err) } @@ -198,7 +198,7 @@ func main() { fmt.Printf("πŸ” Listening for peers on local network...\n") fmt.Printf("πŸ“‘ Ready for task coordination and meta-discussion\n") - fmt.Printf("🎯 Antennae collaborative reasoning enabled\n") + fmt.Printf("🎯 HMMM collaborative reasoning enabled\n") // Handle graceful shutdown c := make(chan os.Signal, 1) diff --git a/mcp-server/package.json b/mcp-server/package.json new file mode 100644 index 00000000..ba74c318 --- /dev/null +++ b/mcp-server/package.json @@ -0,0 +1,54 @@ +{ + "name": "@bzzz/mcp-server", + "version": "1.0.0", + "description": "Model Context Protocol server for BZZZ v2 GPT-4 agent integration", + "main": "dist/index.js", + "types": "dist/index.d.ts", + "scripts": { + "build": "tsc", + "start": "node dist/index.js", + "dev": "ts-node src/index.ts", + "test": "jest", + "lint": "eslint src/**/*.ts", + "format": "prettier --write src/**/*.ts" + }, + "keywords": [ + "mcp", + "bzzz", + "gpt-4", + "p2p", + "distributed", + "ai-agents" + ], + "author": "BZZZ Development Team", + "license": "MIT", + "dependencies": { + "@modelcontextprotocol/sdk": "^0.5.0", + "@types/node": "^20.0.0", + "axios": "^1.6.0", + "express": "^4.18.0", + "openai": "^4.28.0", + "ws": "^8.16.0", + "zod": "^3.22.0", + "winston": "^3.11.0", + "crypto": "^1.0.1", + "uuid": "^9.0.0" + }, + "devDependencies": { + "@types/express": "^4.17.0", + "@types/jest": "^29.5.0", + "@types/ws": "^8.5.0", + "@types/uuid": "^9.0.0", + "@typescript-eslint/eslint-plugin": "^6.0.0", + "@typescript-eslint/parser": "^6.0.0", + "eslint": "^8.56.0", + "jest": "^29.7.0", + "prettier": "^3.1.0", + "ts-jest": "^29.1.0", + "ts-node": "^10.9.0", + "typescript": "^5.3.0" + }, + "engines": { + "node": ">=18.0.0" + } +} \ No newline at end of file diff --git a/mcp-server/src/config/config.ts b/mcp-server/src/config/config.ts new file mode 100644 index 00000000..17892838 --- /dev/null +++ b/mcp-server/src/config/config.ts @@ -0,0 +1,303 @@ +import { readFileSync } from 'fs'; +import path from 'path'; + +export interface BzzzMcpConfig { + openai: { + apiKey: string; + defaultModel: string; + maxTokens: number; + temperature: number; + }; + bzzz: { + nodeUrl: string; + networkId: string; + pubsubTopics: string[]; + }; + cost: { + dailyLimit: number; + monthlyLimit: number; + warningThreshold: number; + }; + conversation: { + maxActiveThreads: number; + defaultTimeout: number; + escalationRules: EscalationRule[]; + }; + agents: { + maxAgents: number; + defaultRoles: AgentRoleConfig[]; + }; + logging: { + level: string; + file?: string; + }; +} + +export interface EscalationRule { + name: string; + conditions: EscalationCondition[]; + actions: EscalationAction[]; + priority: number; +} + +export interface EscalationCondition { + type: 'thread_duration' | 'no_progress' | 'disagreement_count' | 'error_rate'; + threshold: number | boolean; + timeframe?: number; // seconds +} + +export interface EscalationAction { + type: 'notify_human' | 'request_expert' | 'escalate_to_architect' | 'create_decision_thread'; + target?: string; + priority?: string; + participants?: string[]; +} + +export interface AgentRoleConfig { + role: string; + capabilities: string[]; + systemPrompt: string; + interactionPatterns: Record; + specialization: string; +} + +export class Config { + private static instance: Config; + private config: BzzzMcpConfig; + + private constructor() { + this.config = this.loadConfig(); + } + + public static getInstance(): Config { + if (!Config.instance) { + Config.instance = new Config(); + } + return Config.instance; + } + + public get openai() { + return this.config.openai; + } + + public get bzzz() { + return this.config.bzzz; + } + + public get cost() { + return this.config.cost; + } + + public get conversation() { + return this.config.conversation; + } + + public get agents() { + return this.config.agents; + } + + public get logging() { + return this.config.logging; + } + + private loadConfig(): BzzzMcpConfig { + // Load OpenAI API key from BZZZ secrets + const openaiKeyPath = path.join( + process.env.HOME || '/home/tony', + 'chorus/business/secrets/openai-api-key-for-bzzz.txt' + ); + + let openaiKey = process.env.OPENAI_API_KEY || ''; + try { + openaiKey = readFileSync(openaiKeyPath, 'utf8').trim(); + } catch (error) { + console.warn(`Failed to load OpenAI key from ${openaiKeyPath}:`, error); + } + + const defaultConfig: BzzzMcpConfig = { + openai: { + apiKey: openaiKey, + defaultModel: process.env.OPENAI_MODEL || 'gpt-4', + maxTokens: parseInt(process.env.OPENAI_MAX_TOKENS || '4000'), + temperature: parseFloat(process.env.OPENAI_TEMPERATURE || '0.7'), + }, + bzzz: { + nodeUrl: process.env.BZZZ_NODE_URL || 'http://localhost:8080', + networkId: process.env.BZZZ_NETWORK_ID || 'bzzz-local', + pubsubTopics: [ + 'bzzz/coordination/v1', + 'hmmm/meta-discussion/v1', + 'bzzz/context-feedback/v1' + ], + }, + cost: { + dailyLimit: parseFloat(process.env.DAILY_COST_LIMIT || '100.0'), + monthlyLimit: parseFloat(process.env.MONTHLY_COST_LIMIT || '1000.0'), + warningThreshold: parseFloat(process.env.COST_WARNING_THRESHOLD || '0.8'), + }, + conversation: { + maxActiveThreads: parseInt(process.env.MAX_ACTIVE_THREADS || '10'), + defaultTimeout: parseInt(process.env.THREAD_TIMEOUT || '3600'), // 1 hour + escalationRules: this.getDefaultEscalationRules(), + }, + agents: { + maxAgents: parseInt(process.env.MAX_AGENTS || '5'), + defaultRoles: this.getDefaultAgentRoles(), + }, + logging: { + level: process.env.LOG_LEVEL || 'info', + file: process.env.LOG_FILE, + }, + }; + + return defaultConfig; + } + + private getDefaultEscalationRules(): EscalationRule[] { + return [ + { + name: 'Long Running Thread', + priority: 1, + conditions: [ + { + type: 'thread_duration', + threshold: 7200, // 2 hours + timeframe: 0, + }, + { + type: 'no_progress', + threshold: true, + timeframe: 1800, // 30 minutes + }, + ], + actions: [ + { + type: 'notify_human', + target: 'project_manager', + priority: 'medium', + }, + { + type: 'request_expert', + }, + ], + }, + { + name: 'Consensus Failure', + priority: 2, + conditions: [ + { + type: 'disagreement_count', + threshold: 3, + timeframe: 0, + }, + { + type: 'thread_duration', + threshold: 3600, // 1 hour + timeframe: 0, + }, + ], + actions: [ + { + type: 'escalate_to_architect', + priority: 'high', + }, + { + type: 'create_decision_thread', + participants: ['senior_architect'], + }, + ], + }, + ]; + } + + private getDefaultAgentRoles(): AgentRoleConfig[] { + return [ + { + role: 'architect', + specialization: 'system_design', + capabilities: [ + 'system_design', + 'architecture_review', + 'technology_selection', + 'scalability_analysis', + ], + systemPrompt: `You are a senior software architect specializing in distributed systems and P2P networks. +Your role is to provide technical guidance, review system designs, and ensure architectural consistency. +You work collaboratively with other agents and can coordinate multi-agent discussions. + +Available BZZZ tools allow you to: +- Announce your presence and capabilities +- Discover and communicate with other agents +- Participate in threaded conversations +- Post messages and updates to the P2P network +- Subscribe to relevant events and notifications + +Always consider: +- System scalability and performance +- Security implications +- Maintainability and code quality +- Integration with existing CHORUS infrastructure`, + interactionPatterns: { + 'peer_architects': 'collaborative_review', + 'developers': 'guidance_provision', + 'reviewers': 'design_validation', + }, + }, + { + role: 'reviewer', + specialization: 'code_quality', + capabilities: [ + 'code_review', + 'security_analysis', + 'performance_optimization', + 'best_practices_enforcement', + ], + systemPrompt: `You are a senior code reviewer focused on maintaining high code quality and security standards. +Your role is to review code changes, identify potential issues, and provide constructive feedback. +You collaborate with developers and architects to ensure code meets quality standards. + +When reviewing code, evaluate: +- Code correctness and logic +- Security vulnerabilities +- Performance implications +- Adherence to best practices +- Test coverage and quality +- Documentation completeness + +Provide specific, actionable feedback and suggest improvements where needed.`, + interactionPatterns: { + 'architects': 'design_consultation', + 'developers': 'feedback_provision', + 'other_reviewers': 'peer_review', + }, + }, + { + role: 'documentation', + specialization: 'technical_writing', + capabilities: [ + 'technical_writing', + 'api_documentation', + 'user_guides', + 'knowledge_synthesis', + ], + systemPrompt: `You specialize in creating clear, comprehensive technical documentation. +Your role is to analyze technical content, identify documentation needs, and create high-quality documentation. +You work with all team members to ensure knowledge is properly captured and shared. + +Focus on: +- Clarity and readability +- Completeness and accuracy +- Appropriate level of detail for the audience +- Proper structure and organization +- Integration with existing documentation + +Consider different audiences: developers, users, administrators, and stakeholders.`, + interactionPatterns: { + 'all_roles': 'information_gathering', + 'architects': 'technical_consultation', + 'developers': 'implementation_clarification', + }, + }, + ]; + } +} \ No newline at end of file diff --git a/mcp-server/src/index.ts b/mcp-server/src/index.ts new file mode 100644 index 00000000..275804b0 --- /dev/null +++ b/mcp-server/src/index.ts @@ -0,0 +1,361 @@ +#!/usr/bin/env node + +/** + * BZZZ MCP Server + * Model Context Protocol server enabling GPT-4 agents to participate in BZZZ P2P network + */ + +import { Server } from "@modelcontextprotocol/sdk/server/index.js"; +import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; +import { CallToolRequestSchema, ListToolsRequestSchema } from "@modelcontextprotocol/sdk/types.js"; +import { BzzzProtocolTools } from "./tools/protocol-tools.js"; +import { AgentManager } from "./agents/agent-manager.js"; +import { ConversationManager } from "./conversations/conversation-manager.js"; +import { BzzzP2PConnector } from "./p2p/bzzz-connector.js"; +import { OpenAIIntegration } from "./ai/openai-integration.js"; +import { CostTracker } from "./utils/cost-tracker.js"; +import { Logger } from "./utils/logger.js"; +import { Config } from "./config/config.js"; + +class BzzzMcpServer { + private server: Server; + private protocolTools: BzzzProtocolTools; + private agentManager: AgentManager; + private conversationManager: ConversationManager; + private p2pConnector: BzzzP2PConnector; + private openaiIntegration: OpenAIIntegration; + private costTracker: CostTracker; + private logger: Logger; + + constructor() { + this.logger = new Logger("BzzzMcpServer"); + + // Initialize server + this.server = new Server( + { + name: "bzzz-mcp-server", + version: "1.0.0", + }, + { + capabilities: { + tools: {}, + resources: {}, + }, + } + ); + + // Initialize components + this.initializeComponents(); + this.setupToolHandlers(); + this.setupEventHandlers(); + } + + private initializeComponents(): void { + const config = Config.getInstance(); + + // Initialize OpenAI integration + this.openaiIntegration = new OpenAIIntegration({ + apiKey: config.openai.apiKey, + defaultModel: config.openai.defaultModel, + maxTokens: config.openai.maxTokens, + }); + + // Initialize cost tracking + this.costTracker = new CostTracker({ + dailyLimit: config.cost.dailyLimit, + monthlyLimit: config.cost.monthlyLimit, + warningThreshold: config.cost.warningThreshold, + }); + + // Initialize P2P connector + this.p2pConnector = new BzzzP2PConnector({ + bzzzNodeUrl: config.bzzz.nodeUrl, + networkId: config.bzzz.networkId, + }); + + // Initialize conversation manager + this.conversationManager = new ConversationManager({ + maxActiveThreads: config.conversation.maxActiveThreads, + defaultTimeout: config.conversation.defaultTimeout, + escalationRules: config.conversation.escalationRules, + }); + + // Initialize agent manager + this.agentManager = new AgentManager({ + openaiIntegration: this.openaiIntegration, + costTracker: this.costTracker, + conversationManager: this.conversationManager, + p2pConnector: this.p2pConnector, + }); + + // Initialize protocol tools + this.protocolTools = new BzzzProtocolTools({ + agentManager: this.agentManager, + p2pConnector: this.p2pConnector, + conversationManager: this.conversationManager, + }); + } + + private setupToolHandlers(): void { + // List available tools + this.server.setRequestHandler(ListToolsRequestSchema, async () => { + return { + tools: [ + // Protocol tools + { + name: "bzzz_announce", + description: "Announce agent presence and capabilities on the BZZZ network", + inputSchema: { + type: "object", + properties: { + agent_id: { type: "string", description: "Unique agent identifier" }, + role: { type: "string", description: "Agent role (architect, reviewer, etc.)" }, + capabilities: { + type: "array", + items: { type: "string" }, + description: "List of agent capabilities" + }, + specialization: { type: "string", description: "Agent specialization area" }, + max_tasks: { type: "number", default: 3, description: "Maximum concurrent tasks" }, + }, + required: ["agent_id", "role"], + }, + }, + { + name: "bzzz_lookup", + description: "Discover agents and resources using semantic addressing", + inputSchema: { + type: "object", + properties: { + semantic_address: { + type: "string", + description: "Format: bzzz://agent:role@project:task/path", + }, + filter_criteria: { + type: "object", + properties: { + expertise: { type: "array", items: { type: "string" } }, + availability: { type: "boolean" }, + performance_threshold: { type: "number" }, + }, + }, + }, + required: ["semantic_address"], + }, + }, + { + name: "bzzz_get", + description: "Retrieve content from BZZZ semantic addresses", + inputSchema: { + type: "object", + properties: { + address: { type: "string", description: "BZZZ semantic address" }, + include_metadata: { type: "boolean", default: true }, + max_history: { type: "number", default: 10 }, + }, + required: ["address"], + }, + }, + { + name: "bzzz_post", + description: "Post events or messages to BZZZ addresses", + inputSchema: { + type: "object", + properties: { + target_address: { type: "string", description: "Target BZZZ address" }, + message_type: { type: "string", description: "Type of message" }, + content: { type: "object", description: "Message content" }, + priority: { + type: "string", + enum: ["low", "medium", "high", "urgent"], + default: "medium" + }, + thread_id: { type: "string", description: "Optional conversation thread ID" }, + }, + required: ["target_address", "message_type", "content"], + }, + }, + { + name: "bzzz_thread", + description: "Manage threaded conversations between agents", + inputSchema: { + type: "object", + properties: { + action: { + type: "string", + enum: ["create", "join", "leave", "list", "summarize"], + description: "Thread action to perform" + }, + thread_id: { type: "string", description: "Thread identifier" }, + participants: { + type: "array", + items: { type: "string" }, + description: "List of participant agent IDs" + }, + topic: { type: "string", description: "Thread topic" }, + }, + required: ["action"], + }, + }, + { + name: "bzzz_subscribe", + description: "Subscribe to real-time events from BZZZ network", + inputSchema: { + type: "object", + properties: { + event_types: { + type: "array", + items: { type: "string" }, + description: "Types of events to subscribe to" + }, + filter_address: { type: "string", description: "Optional address filter" }, + callback_webhook: { type: "string", description: "Optional webhook URL" }, + }, + required: ["event_types"], + }, + }, + ], + }; + }); + + // Handle tool calls + this.server.setRequestHandler(CallToolRequestSchema, async (request) => { + const { name, arguments: args } = request.params; + + try { + let result; + + switch (name) { + case "bzzz_announce": + result = await this.protocolTools.handleAnnounce(args); + break; + case "bzzz_lookup": + result = await this.protocolTools.handleLookup(args); + break; + case "bzzz_get": + result = await this.protocolTools.handleGet(args); + break; + case "bzzz_post": + result = await this.protocolTools.handlePost(args); + break; + case "bzzz_thread": + result = await this.protocolTools.handleThread(args); + break; + case "bzzz_subscribe": + result = await this.protocolTools.handleSubscribe(args); + break; + default: + throw new Error(`Unknown tool: ${name}`); + } + + return { + content: [ + { + type: "text" as const, + text: JSON.stringify(result, null, 2), + }, + ], + }; + } catch (error) { + this.logger.error(`Tool execution failed for ${name}:`, error); + return { + content: [ + { + type: "text" as const, + text: `Error: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } + }); + } + + private setupEventHandlers(): void { + // Handle P2P events + this.p2pConnector.on("message", (message) => { + this.logger.debug("P2P message received:", message); + this.conversationManager.handleIncomingMessage(message); + }); + + // Handle conversation events + this.conversationManager.on("escalation", (thread, reason) => { + this.logger.warn(`Thread ${thread.id} escalated: ${reason}`); + this.handleEscalation(thread, reason); + }); + + // Handle cost warnings + this.costTracker.on("warning", (usage) => { + this.logger.warn("Cost warning:", usage); + }); + + this.costTracker.on("limit_exceeded", (usage) => { + this.logger.error("Cost limit exceeded:", usage); + // Implement emergency shutdown or throttling + }); + } + + private async handleEscalation(thread: any, reason: string): Promise { + // Implement human escalation logic + this.logger.info(`Escalating thread ${thread.id} to human: ${reason}`); + + // Could integrate with: + // - Slack notifications + // - Email alerts + // - WHOOSH orchestration system + // - N8N workflows + } + + public async start(): Promise { + // Connect to BZZZ P2P network + await this.p2pConnector.connect(); + this.logger.info("Connected to BZZZ P2P network"); + + // Start conversation manager + await this.conversationManager.start(); + this.logger.info("Conversation manager started"); + + // Start agent manager + await this.agentManager.start(); + this.logger.info("Agent manager started"); + + // Start MCP server + const transport = new StdioServerTransport(); + await this.server.connect(transport); + this.logger.info("BZZZ MCP Server started and listening"); + } + + public async stop(): Promise { + this.logger.info("Shutting down BZZZ MCP Server..."); + + await this.agentManager.stop(); + await this.conversationManager.stop(); + await this.p2pConnector.disconnect(); + + this.logger.info("BZZZ MCP Server stopped"); + } +} + +// Start server if run directly +if (require.main === module) { + const server = new BzzzMcpServer(); + + process.on("SIGINT", async () => { + console.log("Received SIGINT, shutting down gracefully..."); + await server.stop(); + process.exit(0); + }); + + process.on("SIGTERM", async () => { + console.log("Received SIGTERM, shutting down gracefully..."); + await server.stop(); + process.exit(0); + }); + + server.start().catch((error) => { + console.error("Failed to start BZZZ MCP Server:", error); + process.exit(1); + }); +} + +export { BzzzMcpServer }; \ No newline at end of file diff --git a/mcp-server/src/tools/protocol-tools.ts b/mcp-server/src/tools/protocol-tools.ts new file mode 100644 index 00000000..1491c97b --- /dev/null +++ b/mcp-server/src/tools/protocol-tools.ts @@ -0,0 +1,493 @@ +import { Logger } from '../utils/logger.js'; +import { AgentManager } from '../agents/agent-manager.js'; +import { ConversationManager } from '../conversations/conversation-manager.js'; +import { BzzzP2PConnector } from '../p2p/bzzz-connector.js'; + +export interface SemanticAddress { + agent?: string; + role?: string; + project?: string; + task?: string; + path?: string; + raw: string; +} + +export interface ProtocolToolsConfig { + agentManager: AgentManager; + p2pConnector: BzzzP2PConnector; + conversationManager: ConversationManager; +} + +/** + * BzzzProtocolTools implements the core BZZZ protocol operations as MCP tools + */ +export class BzzzProtocolTools { + private logger: Logger; + private agentManager: AgentManager; + private p2pConnector: BzzzP2PConnector; + private conversationManager: ConversationManager; + + constructor(config: ProtocolToolsConfig) { + this.logger = new Logger('BzzzProtocolTools'); + this.agentManager = config.agentManager; + this.p2pConnector = config.p2pConnector; + this.conversationManager = config.conversationManager; + } + + /** + * Handle bzzz_announce - Agent presence announcement + */ + async handleAnnounce(args: Record): Promise { + const { agent_id, role, capabilities, specialization, max_tasks = 3 } = args; + + if (!agent_id || !role) { + throw new Error('agent_id and role are required for announcement'); + } + + this.logger.info(`Announcing agent ${agent_id} with role ${role}`); + + try { + // Create or update agent + const agent = await this.agentManager.createAgent({ + id: agent_id, + role, + capabilities: capabilities || [], + specialization: specialization || role, + maxTasks: max_tasks, + }); + + // Announce to P2P network + const announcement = { + type: 'capability_broadcast', + agent_id, + role, + capabilities: capabilities || [], + specialization: specialization || role, + max_tasks, + timestamp: new Date().toISOString(), + network_address: this.p2pConnector.getNodeId(), + }; + + await this.p2pConnector.publishMessage('bzzz/coordination/v1', announcement); + + return { + success: true, + message: `Agent ${agent_id} (${role}) announced to BZZZ network`, + agent: { + id: agent.id, + role: agent.role, + capabilities: agent.capabilities, + specialization: agent.specialization, + status: agent.status, + }, + }; + } catch (error) { + this.logger.error('Failed to announce agent:', error); + throw new Error(`Announcement failed: ${error instanceof Error ? error.message : String(error)}`); + } + } + + /** + * Handle bzzz_lookup - Semantic address discovery + */ + async handleLookup(args: Record): Promise { + const { semantic_address, filter_criteria = {} } = args; + + if (!semantic_address) { + throw new Error('semantic_address is required for lookup'); + } + + this.logger.info(`Looking up semantic address: ${semantic_address}`); + + try { + // Parse semantic address + const address = this.parseSemanticAddress(semantic_address); + + // Discover matching agents + const agents = await this.discoverAgents(address, filter_criteria); + + // Query P2P network for additional matches + const networkResults = await this.queryP2PNetwork(address); + + // Combine and rank results + const allMatches = [...agents, ...networkResults]; + const rankedMatches = this.rankMatches(allMatches, address, filter_criteria); + + return { + success: true, + address: semantic_address, + parsed_address: address, + matches: rankedMatches, + count: rankedMatches.length, + query_time: new Date().toISOString(), + }; + } catch (error) { + this.logger.error('Failed to lookup address:', error); + throw new Error(`Lookup failed: ${error instanceof Error ? error.message : String(error)}`); + } + } + + /** + * Handle bzzz_get - Content retrieval from addresses + */ + async handleGet(args: Record): Promise { + const { address, include_metadata = true, max_history = 10 } = args; + + if (!address) { + throw new Error('address is required for get operation'); + } + + this.logger.info(`Getting content from address: ${address}`); + + try { + const parsedAddress = this.parseSemanticAddress(address); + + // Retrieve content based on address type + let content; + let metadata = {}; + + if (parsedAddress.agent) { + // Get agent-specific content + content = await this.getAgentContent(parsedAddress, max_history); + if (include_metadata) { + metadata = await this.getAgentMetadata(parsedAddress.agent); + } + } else if (parsedAddress.project) { + // Get project-specific content + content = await this.getProjectContent(parsedAddress, max_history); + if (include_metadata) { + metadata = await this.getProjectMetadata(parsedAddress.project); + } + } else { + // General network query + content = await this.getNetworkContent(parsedAddress, max_history); + } + + return { + success: true, + address, + content, + metadata: include_metadata ? metadata : undefined, + retrieved_at: new Date().toISOString(), + }; + } catch (error) { + this.logger.error('Failed to get content:', error); + throw new Error(`Get operation failed: ${error instanceof Error ? error.message : String(error)}`); + } + } + + /** + * Handle bzzz_post - Event/message posting + */ + async handlePost(args: Record): Promise { + const { target_address, message_type, content, priority = 'medium', thread_id } = args; + + if (!target_address || !message_type || !content) { + throw new Error('target_address, message_type, and content are required for post operation'); + } + + this.logger.info(`Posting ${message_type} to address: ${target_address}`); + + try { + const parsedAddress = this.parseSemanticAddress(target_address); + + // Create message payload + const message = { + type: message_type, + content, + priority, + thread_id, + target_address, + sender_id: this.p2pConnector.getNodeId(), + timestamp: new Date().toISOString(), + parsed_address: parsedAddress, + }; + + // Determine routing strategy + let deliveryResults; + + if (parsedAddress.agent) { + // Direct agent messaging + deliveryResults = await this.postToAgent(parsedAddress.agent, message); + } else if (parsedAddress.role) { + // Role-based broadcasting + deliveryResults = await this.postToRole(parsedAddress.role, message); + } else if (parsedAddress.project) { + // Project-specific messaging + deliveryResults = await this.postToProject(parsedAddress.project, message); + } else { + // General network broadcast + deliveryResults = await this.postToNetwork(message); + } + + return { + success: true, + message_id: this.generateMessageId(), + target_address, + message_type, + delivery_results: deliveryResults, + posted_at: new Date().toISOString(), + }; + } catch (error) { + this.logger.error('Failed to post message:', error); + throw new Error(`Post operation failed: ${error instanceof Error ? error.message : String(error)}`); + } + } + + /** + * Handle bzzz_thread - Conversation management + */ + async handleThread(args: Record): Promise { + const { action, thread_id, participants, topic } = args; + + if (!action) { + throw new Error('action is required for thread operation'); + } + + this.logger.info(`Thread action: ${action}, thread_id: ${thread_id}`); + + try { + let result; + + switch (action) { + case 'create': + if (!topic || !participants?.length) { + throw new Error('topic and participants are required for creating threads'); + } + result = await this.conversationManager.createThread({ + topic, + participants, + creator: this.p2pConnector.getNodeId(), + }); + break; + + case 'join': + if (!thread_id) { + throw new Error('thread_id is required for joining threads'); + } + result = await this.conversationManager.joinThread( + thread_id, + this.p2pConnector.getNodeId() + ); + break; + + case 'leave': + if (!thread_id) { + throw new Error('thread_id is required for leaving threads'); + } + result = await this.conversationManager.leaveThread( + thread_id, + this.p2pConnector.getNodeId() + ); + break; + + case 'list': + result = await this.conversationManager.listThreads( + this.p2pConnector.getNodeId() + ); + break; + + case 'summarize': + if (!thread_id) { + throw new Error('thread_id is required for summarizing threads'); + } + result = await this.conversationManager.summarizeThread(thread_id); + break; + + default: + throw new Error(`Unknown thread action: ${action}`); + } + + return { + success: true, + action, + thread_id, + result, + timestamp: new Date().toISOString(), + }; + } catch (error) { + this.logger.error('Thread operation failed:', error); + throw new Error(`Thread operation failed: ${error instanceof Error ? error.message : String(error)}`); + } + } + + /** + * Handle bzzz_subscribe - Real-time event subscription + */ + async handleSubscribe(args: Record): Promise { + const { event_types, filter_address, callback_webhook } = args; + + if (!event_types?.length) { + throw new Error('event_types is required for subscription'); + } + + this.logger.info(`Subscribing to events: ${event_types.join(', ')}`); + + try { + const subscription = await this.p2pConnector.subscribe({ + eventTypes: event_types, + filterAddress: filter_address, + callbackWebhook: callback_webhook, + subscriberId: this.p2pConnector.getNodeId(), + }); + + return { + success: true, + subscription_id: subscription.id, + event_types, + filter_address, + callback_webhook, + subscribed_at: new Date().toISOString(), + status: 'active', + }; + } catch (error) { + this.logger.error('Failed to create subscription:', error); + throw new Error(`Subscription failed: ${error instanceof Error ? error.message : String(error)}`); + } + } + + // Helper methods + + private parseSemanticAddress(address: string): SemanticAddress { + // Parse bzzz://agent:role@project:task/path + const bzzzMatch = address.match(/^bzzz:\/\/([^@\/]+)@([^\/]+)(?:\/(.+))?$/); + + if (bzzzMatch) { + const [, agentRole, projectTask, path] = bzzzMatch; + const [agent, role] = agentRole.split(':'); + const [project, task] = projectTask.split(':'); + + return { + agent: agent !== '*' ? agent : undefined, + role: role !== '*' ? role : undefined, + project: project !== '*' ? project : undefined, + task: task !== '*' ? task : undefined, + path: path || undefined, + raw: address, + }; + } + + // Simple address format + return { raw: address }; + } + + private async discoverAgents(address: SemanticAddress, criteria: any): Promise { + const agents = await this.agentManager.getAgents(); + + return agents.filter(agent => { + if (address.agent && agent.id !== address.agent) return false; + if (address.role && agent.role !== address.role) return false; + if (criteria.availability && !agent.available) return false; + if (criteria.performance_threshold && agent.performance < criteria.performance_threshold) return false; + if (criteria.expertise?.length && !criteria.expertise.some((exp: string) => + agent.capabilities.includes(exp))) return false; + + return true; + }); + } + + private async queryP2PNetwork(address: SemanticAddress): Promise { + // Query the P2P network for matching agents + const query = { + type: 'agent_discovery', + criteria: address, + timestamp: new Date().toISOString(), + }; + + const responses = await this.p2pConnector.queryNetwork(query, 5000); // 5 second timeout + return responses; + } + + private rankMatches(matches: any[], address: SemanticAddress, criteria: any): any[] { + return matches + .map(match => ({ + ...match, + score: this.calculateMatchScore(match, address, criteria), + })) + .sort((a, b) => b.score - a.score) + .slice(0, 20); // Limit to top 20 matches + } + + private calculateMatchScore(match: any, address: SemanticAddress, criteria: any): number { + let score = 0; + + // Exact matches get highest score + if (address.agent && match.id === address.agent) score += 100; + if (address.role && match.role === address.role) score += 50; + + // Capability matching + if (criteria.expertise?.length) { + const matchingExp = criteria.expertise.filter((exp: string) => + match.capabilities?.includes(exp) + ).length; + score += (matchingExp / criteria.expertise.length) * 30; + } + + // Availability bonus + if (match.available) score += 10; + + // Performance bonus + if (match.performance) score += match.performance * 10; + + return score; + } + + private async getAgentContent(address: SemanticAddress, maxHistory: number): Promise { + const agent = await this.agentManager.getAgent(address.agent!); + if (!agent) { + throw new Error(`Agent ${address.agent} not found`); + } + + const content = { + agent_info: agent, + recent_activity: await this.agentManager.getRecentActivity(address.agent!, maxHistory), + current_tasks: await this.agentManager.getCurrentTasks(address.agent!), + }; + + if (address.path) { + content[address.path] = await this.agentManager.getAgentData(address.agent!, address.path); + } + + return content; + } + + private async getProjectContent(address: SemanticAddress, maxHistory: number): Promise { + // Get project-related content from P2P network + return await this.p2pConnector.getProjectData(address.project!, maxHistory); + } + + private async getNetworkContent(address: SemanticAddress, maxHistory: number): Promise { + // Get general network content + return await this.p2pConnector.getNetworkData(address.raw, maxHistory); + } + + private async getAgentMetadata(agentId: string): Promise { + return await this.agentManager.getAgentMetadata(agentId); + } + + private async getProjectMetadata(projectId: string): Promise { + return await this.p2pConnector.getProjectMetadata(projectId); + } + + private async postToAgent(agentId: string, message: any): Promise { + return await this.p2pConnector.sendDirectMessage(agentId, message); + } + + private async postToRole(role: string, message: any): Promise { + const topic = `bzzz/roles/${role.toLowerCase().replace(/\s+/g, '_')}/v1`; + return await this.p2pConnector.publishMessage(topic, message); + } + + private async postToProject(projectId: string, message: any): Promise { + const topic = `bzzz/projects/${projectId}/coordination/v1`; + return await this.p2pConnector.publishMessage(topic, message); + } + + private async postToNetwork(message: any): Promise { + return await this.p2pConnector.publishMessage('bzzz/coordination/v1', message); + } + + private generateMessageId(): string { + return `msg_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`; + } +} \ No newline at end of file diff --git a/mcp-server/tsconfig.json b/mcp-server/tsconfig.json new file mode 100644 index 00000000..22a4c5a4 --- /dev/null +++ b/mcp-server/tsconfig.json @@ -0,0 +1,27 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "commonjs", + "lib": ["ES2022"], + "outDir": "./dist", + "rootDir": "./src", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "declaration": true, + "declarationMap": true, + "sourceMap": true, + "resolveJsonModule": true, + "experimentalDecorators": true, + "emitDecoratorMetadata": true + }, + "include": [ + "src/**/*" + ], + "exclude": [ + "node_modules", + "dist", + "**/*.test.ts" + ] +} \ No newline at end of file diff --git a/monitoring/antennae_monitor.go b/monitoring/hmmm_monitor.go similarity index 83% rename from monitoring/antennae_monitor.go rename to monitoring/hmmm_monitor.go index 4f34e21a..7068bcfe 100644 --- a/monitoring/antennae_monitor.go +++ b/monitoring/hmmm_monitor.go @@ -12,8 +12,8 @@ import ( "github.com/anthonyrawlins/bzzz/pubsub" ) -// AntennaeMonitor tracks and logs antennae coordination activity -type AntennaeMonitor struct { +// HmmmMonitor tracks and logs HMMM coordination activity +type HmmmMonitor struct { ctx context.Context pubsub *pubsub.PubSub logFile *os.File @@ -72,8 +72,8 @@ type CoordinationMetrics struct { LastUpdated time.Time `json:"last_updated"` } -// NewAntennaeMonitor creates a new antennae monitoring system -func NewAntennaeMonitor(ctx context.Context, ps *pubsub.PubSub, logDir string) (*AntennaeMonitor, error) { +// NewHmmmMonitor creates a new HMMM monitoring system +func NewHmmmMonitor(ctx context.Context, ps *pubsub.PubSub, logDir string) (*HmmmMonitor, error) { // Ensure log directory exists if err := os.MkdirAll(logDir, 0755); err != nil { return nil, fmt.Errorf("failed to create log directory: %w", err) @@ -81,8 +81,8 @@ func NewAntennaeMonitor(ctx context.Context, ps *pubsub.PubSub, logDir string) ( // Create log files timestamp := time.Now().Format("20060102_150405") - logPath := filepath.Join(logDir, fmt.Sprintf("antennae_activity_%s.jsonl", timestamp)) - metricsPath := filepath.Join(logDir, fmt.Sprintf("antennae_metrics_%s.json", timestamp)) + logPath := filepath.Join(logDir, fmt.Sprintf("hmmm_activity_%s.jsonl", timestamp)) + metricsPath := filepath.Join(logDir, fmt.Sprintf("hmmm_metrics_%s.json", timestamp)) logFile, err := os.Create(logPath) if err != nil { @@ -95,7 +95,7 @@ func NewAntennaeMonitor(ctx context.Context, ps *pubsub.PubSub, logDir string) ( return nil, fmt.Errorf("failed to create metrics file: %w", err) } - monitor := &AntennaeMonitor{ + monitor := &HmmmMonitor{ ctx: ctx, pubsub: ps, logFile: logFile, @@ -107,21 +107,21 @@ func NewAntennaeMonitor(ctx context.Context, ps *pubsub.PubSub, logDir string) ( }, } - fmt.Printf("πŸ“Š Antennae Monitor initialized\n") + fmt.Printf("πŸ“Š HMMM Monitor initialized\n") fmt.Printf(" Activity Log: %s\n", logPath) fmt.Printf(" Metrics File: %s\n", metricsPath) return monitor, nil } -// Start begins monitoring antennae coordination activity -func (am *AntennaeMonitor) Start() { +// Start begins monitoring HMMM coordination activity +func (am *HmmmMonitor) Start() { if am.isRunning { return } am.isRunning = true - fmt.Println("πŸ” Starting Antennae coordination monitoring...") + fmt.Println("πŸ” Starting HMMM coordination monitoring...") // Start monitoring routines go am.monitorCoordinationMessages() @@ -131,7 +131,7 @@ func (am *AntennaeMonitor) Start() { } // Stop stops the monitoring system -func (am *AntennaeMonitor) Stop() { +func (am *HmmmMonitor) Stop() { if !am.isRunning { return } @@ -148,12 +148,12 @@ func (am *AntennaeMonitor) Stop() { am.metricsFile.Close() } - fmt.Println("πŸ›‘ Antennae monitoring stopped") + fmt.Println("πŸ›‘ HMMM monitoring stopped") } -// monitorCoordinationMessages listens for antennae meta-discussion messages -func (am *AntennaeMonitor) monitorCoordinationMessages() { - // Subscribe to antennae topic +// monitorCoordinationMessages listens for HMMM meta-discussion messages +func (am *HmmmMonitor) monitorCoordinationMessages() { + // Subscribe to HMMM topic msgChan := make(chan pubsub.Message, 100) // This would be implemented with actual pubsub subscription @@ -172,7 +172,7 @@ func (am *AntennaeMonitor) monitorCoordinationMessages() { } // monitorTaskAnnouncements listens for task announcements -func (am *AntennaeMonitor) monitorTaskAnnouncements() { +func (am *HmmmMonitor) monitorTaskAnnouncements() { // Subscribe to bzzz coordination topic msgChan := make(chan pubsub.Message, 100) @@ -188,8 +188,8 @@ func (am *AntennaeMonitor) monitorTaskAnnouncements() { } } -// processCoordinationMessage processes an antennae coordination message -func (am *AntennaeMonitor) processCoordinationMessage(msg pubsub.Message) { +// processCoordinationMessage processes a HMMM coordination message +func (am *HmmmMonitor) processCoordinationMessage(msg pubsub.Message) { am.mu.Lock() defer am.mu.Unlock() @@ -198,7 +198,7 @@ func (am *AntennaeMonitor) processCoordinationMessage(msg pubsub.Message) { FromAgent: msg.From, MessageType: msg.Type, Content: msg.Data, - Topic: "antennae/meta-discussion", + Topic: "hmmm/meta-discussion", } // Log the message @@ -224,12 +224,12 @@ func (am *AntennaeMonitor) processCoordinationMessage(msg pubsub.Message) { // Update session status based on message type am.updateSessionStatus(session, msg) - fmt.Printf("🧠 Antennae message: %s from %s (Session: %s)\n", + fmt.Printf("🧠 HMMM message: %s from %s (Session: %s)\n", msg.Type, msg.From, sessionID) } // processTaskAnnouncement processes a task announcement -func (am *AntennaeMonitor) processTaskAnnouncement(msg pubsub.Message) { +func (am *HmmmMonitor) processTaskAnnouncement(msg pubsub.Message) { am.mu.Lock() defer am.mu.Unlock() @@ -259,7 +259,7 @@ func (am *AntennaeMonitor) processTaskAnnouncement(msg pubsub.Message) { } // getOrCreateSession gets an existing session or creates a new one -func (am *AntennaeMonitor) getOrCreateSession(sessionID string) *CoordinationSession { +func (am *HmmmMonitor) getOrCreateSession(sessionID string) *CoordinationSession { if session, exists := am.activeSessions[sessionID]; exists { return session } @@ -285,7 +285,7 @@ func (am *AntennaeMonitor) getOrCreateSession(sessionID string) *CoordinationSes } // updateSessionStatus updates session status based on message content -func (am *AntennaeMonitor) updateSessionStatus(session *CoordinationSession, msg pubsub.Message) { +func (am *HmmmMonitor) updateSessionStatus(session *CoordinationSession, msg pubsub.Message) { // Analyze message content to determine status changes if content, ok := msg.Data["type"].(string); ok { switch content { @@ -306,7 +306,7 @@ func (am *AntennaeMonitor) updateSessionStatus(session *CoordinationSession, msg } // periodicMetricsUpdate saves metrics periodically -func (am *AntennaeMonitor) periodicMetricsUpdate() { +func (am *HmmmMonitor) periodicMetricsUpdate() { ticker := time.NewTicker(30 * time.Second) defer ticker.Stop() @@ -322,7 +322,7 @@ func (am *AntennaeMonitor) periodicMetricsUpdate() { } // sessionCleanup removes old inactive sessions -func (am *AntennaeMonitor) sessionCleanup() { +func (am *HmmmMonitor) sessionCleanup() { ticker := time.NewTicker(5 * time.Minute) defer ticker.Stop() @@ -337,7 +337,7 @@ func (am *AntennaeMonitor) sessionCleanup() { } // cleanupOldSessions removes sessions inactive for more than 10 minutes -func (am *AntennaeMonitor) cleanupOldSessions() { +func (am *HmmmMonitor) cleanupOldSessions() { am.mu.Lock() defer am.mu.Unlock() @@ -360,7 +360,7 @@ func (am *AntennaeMonitor) cleanupOldSessions() { } // logActivity logs an activity to the activity log file -func (am *AntennaeMonitor) logActivity(activityType string, data interface{}) { +func (am *HmmmMonitor) logActivity(activityType string, data interface{}) { logEntry := map[string]interface{}{ "timestamp": time.Now().Unix(), "activity_type": activityType, @@ -374,7 +374,7 @@ func (am *AntennaeMonitor) logActivity(activityType string, data interface{}) { } // saveMetrics saves current metrics to file -func (am *AntennaeMonitor) saveMetrics() { +func (am *HmmmMonitor) saveMetrics() { am.mu.RLock() defer am.mu.RUnlock() @@ -406,11 +406,11 @@ func (am *AntennaeMonitor) saveMetrics() { } // printStatus prints current monitoring status -func (am *AntennaeMonitor) printStatus() { +func (am *HmmmMonitor) printStatus() { am.mu.RLock() defer am.mu.RUnlock() - fmt.Printf("πŸ“Š Antennae Monitor Status:\n") + fmt.Printf("πŸ“Š HMMM Monitor Status:\n") fmt.Printf(" Total Sessions: %d (Active: %d, Completed: %d)\n", am.metrics.TotalSessions, am.metrics.ActiveSessions, am.metrics.CompletedSessions) fmt.Printf(" Messages: %d, Announcements: %d\n", @@ -420,14 +420,14 @@ func (am *AntennaeMonitor) printStatus() { } // GetMetrics returns current metrics -func (am *AntennaeMonitor) GetMetrics() *CoordinationMetrics { +func (am *HmmmMonitor) GetMetrics() *CoordinationMetrics { am.mu.RLock() defer am.mu.RUnlock() return am.metrics } // Helper functions -func (am *AntennaeMonitor) extractSessionID(data map[string]interface{}) string { +func (am *HmmmMonitor) extractSessionID(data map[string]interface{}) string { if sessionID, ok := data["session_id"].(string); ok { return sessionID } @@ -444,4 +444,14 @@ func contains(slice []string, item string) bool { } } return false +} + +// Compatibility aliases for the old Antennae naming +// Deprecated: Use HmmmMonitor instead +type AntennaeMonitor = HmmmMonitor + +// NewAntennaeMonitor is a compatibility alias for NewHmmmMonitor +// Deprecated: Use NewHmmmMonitor instead +func NewAntennaeMonitor(ctx context.Context, ps *pubsub.PubSub, logDir string) (*HmmmMonitor, error) { + return NewHmmmMonitor(ctx, ps, logDir) } \ No newline at end of file diff --git a/p2p/config.go b/p2p/config.go index 62888053..c6132bec 100644 --- a/p2p/config.go +++ b/p2p/config.go @@ -25,7 +25,7 @@ type Config struct { // Pubsub configuration EnablePubsub bool BzzzTopic string // Task coordination topic - AntennaeTopic string // Meta-discussion topic + HmmmTopic string // Meta-discussion topic MessageValidationTime time.Duration } @@ -57,7 +57,7 @@ func DefaultConfig() *Config { // Pubsub for coordination and meta-discussion EnablePubsub: true, BzzzTopic: "bzzz/coordination/v1", - AntennaeTopic: "antennae/meta-discussion/v1", + HmmmTopic: "hmmm/meta-discussion/v1", MessageValidationTime: 10 * time.Second, } } @@ -118,10 +118,10 @@ func WithPubsub(enabled bool) Option { } } -// WithTopics sets the Bzzz and Antennae topic names -func WithTopics(bzzzTopic, antennaeTopic string) Option { +// WithTopics sets the Bzzz and HMMM topic names +func WithTopics(bzzzTopic, hmmmTopic string) Option { return func(c *Config) { c.BzzzTopic = bzzzTopic - c.AntennaeTopic = antennaeTopic + c.HmmmTopic = hmmmTopic } } \ No newline at end of file diff --git a/pkg/config/slurp_config.go b/pkg/config/slurp_config.go new file mode 100644 index 00000000..f1c8b421 --- /dev/null +++ b/pkg/config/slurp_config.go @@ -0,0 +1,222 @@ +package config + +import ( + "fmt" + "time" +) + +// SlurpConfig holds SLURP event system integration configuration +type SlurpConfig struct { + // Connection settings + Enabled bool `yaml:"enabled" json:"enabled"` + BaseURL string `yaml:"base_url" json:"base_url"` + APIKey string `yaml:"api_key" json:"api_key"` + Timeout time.Duration `yaml:"timeout" json:"timeout"` + RetryCount int `yaml:"retry_count" json:"retry_count"` + RetryDelay time.Duration `yaml:"retry_delay" json:"retry_delay"` + + // Event generation settings + EventGeneration EventGenerationConfig `yaml:"event_generation" json:"event_generation"` + + // Project-specific event mappings + ProjectMappings map[string]ProjectEventMapping `yaml:"project_mappings" json:"project_mappings"` + + // Default event settings + DefaultEventSettings DefaultEventConfig `yaml:"default_event_settings" json:"default_event_settings"` + + // Batch processing settings + BatchProcessing BatchConfig `yaml:"batch_processing" json:"batch_processing"` +} + +// EventGenerationConfig controls when and how SLURP events are generated +type EventGenerationConfig struct { + // Consensus requirements + MinConsensusStrength float64 `yaml:"min_consensus_strength" json:"min_consensus_strength"` + MinParticipants int `yaml:"min_participants" json:"min_participants"` + RequireUnanimity bool `yaml:"require_unanimity" json:"require_unanimity"` + + // Time-based triggers + MaxDiscussionDuration time.Duration `yaml:"max_discussion_duration" json:"max_discussion_duration"` + MinDiscussionDuration time.Duration `yaml:"min_discussion_duration" json:"min_discussion_duration"` + + // Event type generation rules + EnabledEventTypes []string `yaml:"enabled_event_types" json:"enabled_event_types"` + DisabledEventTypes []string `yaml:"disabled_event_types" json:"disabled_event_types"` + + // Severity calculation + SeverityRules SeverityConfig `yaml:"severity_rules" json:"severity_rules"` +} + +// SeverityConfig defines how to calculate event severity from HMMM discussions +type SeverityConfig struct { + // Base severity for each event type (1-10 scale) + BaseSeverity map[string]int `yaml:"base_severity" json:"base_severity"` + + // Modifiers based on discussion characteristics + ParticipantMultiplier float64 `yaml:"participant_multiplier" json:"participant_multiplier"` + DurationMultiplier float64 `yaml:"duration_multiplier" json:"duration_multiplier"` + UrgencyKeywords []string `yaml:"urgency_keywords" json:"urgency_keywords"` + UrgencyBoost int `yaml:"urgency_boost" json:"urgency_boost"` + + // Severity caps + MinSeverity int `yaml:"min_severity" json:"min_severity"` + MaxSeverity int `yaml:"max_severity" json:"max_severity"` +} + +// ProjectEventMapping defines project-specific event mapping rules +type ProjectEventMapping struct { + ProjectPath string `yaml:"project_path" json:"project_path"` + CustomEventTypes map[string]string `yaml:"custom_event_types" json:"custom_event_types"` + SeverityOverrides map[string]int `yaml:"severity_overrides" json:"severity_overrides"` + AdditionalMetadata map[string]interface{} `yaml:"additional_metadata" json:"additional_metadata"` + EventFilters []EventFilter `yaml:"event_filters" json:"event_filters"` +} + +// EventFilter defines conditions for filtering or modifying events +type EventFilter struct { + Name string `yaml:"name" json:"name"` + Conditions map[string]string `yaml:"conditions" json:"conditions"` + Action string `yaml:"action" json:"action"` // "allow", "deny", "modify" + Modifications map[string]string `yaml:"modifications" json:"modifications"` +} + +// DefaultEventConfig provides default settings for generated events +type DefaultEventConfig struct { + DefaultSeverity int `yaml:"default_severity" json:"default_severity"` + DefaultCreatedBy string `yaml:"default_created_by" json:"default_created_by"` + DefaultTags []string `yaml:"default_tags" json:"default_tags"` + MetadataTemplate map[string]string `yaml:"metadata_template" json:"metadata_template"` +} + +// BatchConfig controls batch processing of SLURP events +type BatchConfig struct { + Enabled bool `yaml:"enabled" json:"enabled"` + MaxBatchSize int `yaml:"max_batch_size" json:"max_batch_size"` + MaxBatchWait time.Duration `yaml:"max_batch_wait" json:"max_batch_wait"` + FlushOnShutdown bool `yaml:"flush_on_shutdown" json:"flush_on_shutdown"` +} + +// HmmmToSlurpMapping defines the mapping between HMMM discussion outcomes and SLURP event types +type HmmmToSlurpMapping struct { + // Consensus types to SLURP event types + ConsensusApproval string `yaml:"consensus_approval" json:"consensus_approval"` // -> "approval" + RiskIdentified string `yaml:"risk_identified" json:"risk_identified"` // -> "warning" + CriticalBlocker string `yaml:"critical_blocker" json:"critical_blocker"` // -> "blocker" + PriorityChange string `yaml:"priority_change" json:"priority_change"` // -> "priority_change" + AccessRequest string `yaml:"access_request" json:"access_request"` // -> "access_update" + ArchitectureDecision string `yaml:"architecture_decision" json:"architecture_decision"` // -> "structural_change" + InformationShare string `yaml:"information_share" json:"information_share"` // -> "announcement" + + // Keywords that trigger specific event types + ApprovalKeywords []string `yaml:"approval_keywords" json:"approval_keywords"` + WarningKeywords []string `yaml:"warning_keywords" json:"warning_keywords"` + BlockerKeywords []string `yaml:"blocker_keywords" json:"blocker_keywords"` + PriorityKeywords []string `yaml:"priority_keywords" json:"priority_keywords"` + AccessKeywords []string `yaml:"access_keywords" json:"access_keywords"` + StructuralKeywords []string `yaml:"structural_keywords" json:"structural_keywords"` + AnnouncementKeywords []string `yaml:"announcement_keywords" json:"announcement_keywords"` +} + +// GetDefaultSlurpConfig returns default SLURP configuration +func GetDefaultSlurpConfig() SlurpConfig { + return SlurpConfig{ + Enabled: false, // Disabled by default until configured + BaseURL: "http://localhost:8080", + Timeout: 30 * time.Second, + RetryCount: 3, + RetryDelay: 5 * time.Second, + + EventGeneration: EventGenerationConfig{ + MinConsensusStrength: 0.7, + MinParticipants: 2, + RequireUnanimity: false, + MaxDiscussionDuration: 30 * time.Minute, + MinDiscussionDuration: 1 * time.Minute, + EnabledEventTypes: []string{ + "announcement", "warning", "blocker", "approval", + "priority_change", "access_update", "structural_change", + }, + DisabledEventTypes: []string{}, + SeverityRules: SeverityConfig{ + BaseSeverity: map[string]int{ + "announcement": 3, + "warning": 5, + "blocker": 8, + "approval": 4, + "priority_change": 6, + "access_update": 5, + "structural_change": 7, + }, + ParticipantMultiplier: 0.2, + DurationMultiplier: 0.1, + UrgencyKeywords: []string{"urgent", "critical", "blocker", "emergency", "immediate"}, + UrgencyBoost: 2, + MinSeverity: 1, + MaxSeverity: 10, + }, + }, + + ProjectMappings: make(map[string]ProjectEventMapping), + + DefaultEventSettings: DefaultEventConfig{ + DefaultSeverity: 5, + DefaultCreatedBy: "hmmm-consensus", + DefaultTags: []string{"hmmm-generated", "automated"}, + MetadataTemplate: map[string]string{ + "source": "hmmm-discussion", + "generation_type": "consensus-based", + }, + }, + + BatchProcessing: BatchConfig{ + Enabled: true, + MaxBatchSize: 10, + MaxBatchWait: 5 * time.Second, + FlushOnShutdown: true, + }, + } +} + +// GetHmmmToSlurpMapping returns the default mapping configuration +func GetHmmmToSlurpMapping() HmmmToSlurpMapping { + return HmmmToSlurpMapping{ + ConsensusApproval: "approval", + RiskIdentified: "warning", + CriticalBlocker: "blocker", + PriorityChange: "priority_change", + AccessRequest: "access_update", + ArchitectureDecision: "structural_change", + InformationShare: "announcement", + + ApprovalKeywords: []string{"approve", "approved", "looks good", "lgtm", "accepted", "agree"}, + WarningKeywords: []string{"warning", "caution", "risk", "potential issue", "concern", "careful"}, + BlockerKeywords: []string{"blocker", "blocked", "critical", "urgent", "cannot proceed", "show stopper"}, + PriorityKeywords: []string{"priority", "urgent", "high priority", "low priority", "reprioritize"}, + AccessKeywords: []string{"access", "permission", "auth", "authorization", "credentials", "token"}, + StructuralKeywords: []string{"architecture", "structure", "design", "refactor", "framework", "pattern"}, + AnnouncementKeywords: []string{"announce", "fyi", "information", "update", "news", "notice"}, + } +} + +// ValidateSlurpConfig validates SLURP configuration +func ValidateSlurpConfig(config SlurpConfig) error { + if config.Enabled { + if config.BaseURL == "" { + return fmt.Errorf("slurp.base_url is required when SLURP is enabled") + } + + if config.EventGeneration.MinConsensusStrength < 0 || config.EventGeneration.MinConsensusStrength > 1 { + return fmt.Errorf("slurp.event_generation.min_consensus_strength must be between 0 and 1") + } + + if config.EventGeneration.MinParticipants < 1 { + return fmt.Errorf("slurp.event_generation.min_participants must be at least 1") + } + + if config.DefaultEventSettings.DefaultSeverity < 1 || config.DefaultEventSettings.DefaultSeverity > 10 { + return fmt.Errorf("slurp.default_event_settings.default_severity must be between 1 and 10") + } + } + + return nil +} \ No newline at end of file diff --git a/pkg/coordination/dependency_detector.go b/pkg/coordination/dependency_detector.go index d4b34170..0c130808 100644 --- a/pkg/coordination/dependency_detector.go +++ b/pkg/coordination/dependency_detector.go @@ -200,7 +200,7 @@ func (dd *DependencyDetector) announceDependency(dep *TaskDependency) { dep.Task2.Repository, dep.Task2.Title, dep.Task2.TaskID, dep.Relationship) - // Create coordination message for Antennae meta-discussion + // Create coordination message for HMMM meta-discussion coordMsg := map[string]interface{}{ "message_type": "dependency_detected", "dependency": dep, @@ -219,11 +219,11 @@ func (dd *DependencyDetector) announceDependency(dep *TaskDependency) { "detected_at": dep.DetectedAt.Unix(), } - // Publish to Antennae meta-discussion channel - if err := dd.pubsub.PublishAntennaeMessage(pubsub.MetaDiscussion, coordMsg); err != nil { + // Publish to HMMM meta-discussion channel + if err := dd.pubsub.PublishHmmmMessage(pubsub.MetaDiscussion, coordMsg); err != nil { fmt.Printf("❌ Failed to announce dependency: %v\n", err) } else { - fmt.Printf("πŸ“‘ Dependency coordination request sent to Antennae channel\n") + fmt.Printf("πŸ“‘ Dependency coordination request sent to HMMM channel\n") } } diff --git a/pkg/coordination/meta_coordinator.go b/pkg/coordination/meta_coordinator.go index dca18f0d..7f4df85a 100644 --- a/pkg/coordination/meta_coordinator.go +++ b/pkg/coordination/meta_coordinator.go @@ -8,6 +8,7 @@ import ( "sync" "time" + "github.com/anthonyrawlins/bzzz/pkg/integration" "github.com/anthonyrawlins/bzzz/pubsub" "github.com/anthonyrawlins/bzzz/reasoning" "github.com/libp2p/go-libp2p/core/peer" @@ -18,6 +19,7 @@ type MetaCoordinator struct { pubsub *pubsub.PubSub ctx context.Context dependencyDetector *DependencyDetector + slurpIntegrator *integration.SlurpEventIntegrator // Active coordination sessions activeSessions map[string]*CoordinationSession // sessionID -> session @@ -79,7 +81,7 @@ func NewMetaCoordinator(ctx context.Context, ps *pubsub.PubSub) *MetaCoordinator mc.dependencyDetector = NewDependencyDetector(ctx, ps) // Set up message handler for meta-discussions - ps.SetAntennaeMessageHandler(mc.handleMetaMessage) + ps.SetHmmmMessageHandler(mc.handleMetaMessage) // Start session management go mc.sessionCleanupLoop() @@ -88,7 +90,13 @@ func NewMetaCoordinator(ctx context.Context, ps *pubsub.PubSub) *MetaCoordinator return mc } -// handleMetaMessage processes incoming Antennae meta-discussion messages +// SetSlurpIntegrator sets the SLURP event integrator for the coordinator +func (mc *MetaCoordinator) SetSlurpIntegrator(integrator *integration.SlurpEventIntegrator) { + mc.slurpIntegrator = integrator + fmt.Printf("🎯 SLURP integrator attached to Meta Coordinator\n") +} + +// handleMetaMessage processes incoming HMMM meta-discussion messages func (mc *MetaCoordinator) handleMetaMessage(msg pubsub.Message, from peer.ID) { messageType, hasType := msg.Data[\"message_type\"].(string) if !hasType { @@ -227,7 +235,7 @@ Keep the plan practical and actionable. Focus on specific next steps.`, // broadcastToSession sends a message to all participants in a session func (mc *MetaCoordinator) broadcastToSession(session *CoordinationSession, data map[string]interface{}) { - if err := mc.pubsub.PublishAntennaeMessage(pubsub.MetaDiscussion, data); err != nil { + if err := mc.pubsub.PublishHmmmMessage(pubsub.MetaDiscussion, data); err != nil { fmt.Printf(\"❌ Failed to broadcast to session %s: %v\\n\", session.SessionID, err) } } @@ -320,6 +328,11 @@ func (mc *MetaCoordinator) escalateSession(session *CoordinationSession, reason fmt.Printf(\"🚨 Escalating coordination session %s: %s\\n\", session.SessionID, reason) + // Generate SLURP event if integrator is available + if mc.slurpIntegrator != nil { + mc.generateSlurpEventFromSession(session, \"escalated\") + } + // Create escalation message escalationData := map[string]interface{}{ \"message_type\": \"escalation\", @@ -341,6 +354,11 @@ func (mc *MetaCoordinator) resolveSession(session *CoordinationSession, resoluti fmt.Printf(\"βœ… Resolved coordination session %s: %s\\n\", session.SessionID, resolution) + // Generate SLURP event if integrator is available + if mc.slurpIntegrator != nil { + mc.generateSlurpEventFromSession(session, \"resolved\") + } + // Broadcast resolution resolutionData := map[string]interface{}{ \"message_type\": \"resolution\", @@ -437,4 +455,72 @@ func (mc *MetaCoordinator) handleCoordinationRequest(msg pubsub.Message, from pe func (mc *MetaCoordinator) handleEscalationRequest(msg pubsub.Message, from peer.ID) { fmt.Printf(\"🚨 Escalation request from %s\\n\", from.ShortString()) // Implementation for handling escalation requests +} + +// generateSlurpEventFromSession creates and sends a SLURP event based on session outcome +func (mc *MetaCoordinator) generateSlurpEventFromSession(session *CoordinationSession, outcome string) { + // Convert coordination session to HMMM discussion context + hmmmMessages := make([]integration.HmmmMessage, len(session.Messages)) + for i, msg := range session.Messages { + hmmmMessages[i] = integration.HmmmMessage{ + From: msg.FromAgentID, + Content: msg.Content, + Type: msg.MessageType, + Timestamp: msg.Timestamp, + Metadata: msg.Metadata, + } + } + + // Extract participant IDs + participants := make([]string, 0, len(session.Participants)) + for agentID := range session.Participants { + participants = append(participants, agentID) + } + + // Determine consensus strength based on outcome + var consensusStrength float64 + switch outcome { + case \"resolved\": + consensusStrength = 0.9 // High consensus for resolved sessions + case \"escalated\": + consensusStrength = 0.3 // Low consensus for escalated sessions + default: + consensusStrength = 0.5 // Medium consensus for other outcomes + } + + // Determine project path from tasks involved + projectPath := \"/unknown\" + if len(session.TasksInvolved) > 0 && session.TasksInvolved[0] != nil { + projectPath = session.TasksInvolved[0].Repository + } + + // Create HMMM discussion context + discussionContext := integration.HmmmDiscussionContext{ + DiscussionID: session.SessionID, + SessionID: session.SessionID, + Participants: participants, + StartTime: session.CreatedAt, + EndTime: session.LastActivity, + Messages: hmmmMessages, + ConsensusReached: outcome == \"resolved\", + ConsensusStrength: consensusStrength, + OutcomeType: outcome, + ProjectPath: projectPath, + RelatedTasks: []string{}, // Could be populated from TasksInvolved + Metadata: map[string]interface{}{ + \"session_type\": session.Type, + \"session_status\": session.Status, + \"resolution\": session.Resolution, + \"escalation_reason\": session.EscalationReason, + \"message_count\": len(session.Messages), + \"participant_count\": len(session.Participants), + }, + } + + // Process the discussion through SLURP integrator + if err := mc.slurpIntegrator.ProcessHmmmDiscussion(mc.ctx, discussionContext); err != nil { + fmt.Printf(\"❌ Failed to process HMMM discussion for SLURP: %v\\n\", err) + } else { + fmt.Printf(\"🎯 Generated SLURP event from session %s (outcome: %s)\\n\", session.SessionID, outcome) + } } \ No newline at end of file diff --git a/pkg/integration/slurp_client.go b/pkg/integration/slurp_client.go new file mode 100644 index 00000000..1132fa85 --- /dev/null +++ b/pkg/integration/slurp_client.go @@ -0,0 +1,327 @@ +package integration + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" + + "github.com/anthonyrawlins/bzzz/pkg/config" +) + +// SlurpClient handles HTTP communication with SLURP endpoints +type SlurpClient struct { + baseURL string + apiKey string + timeout time.Duration + retryCount int + retryDelay time.Duration + httpClient *http.Client +} + +// SlurpEvent represents a SLURP event structure +type SlurpEvent struct { + EventType string `json:"event_type"` + Path string `json:"path"` + Content string `json:"content"` + Severity int `json:"severity"` + CreatedBy string `json:"created_by"` + Metadata map[string]interface{} `json:"metadata"` + Tags []string `json:"tags,omitempty"` + Timestamp time.Time `json:"timestamp"` +} + +// EventResponse represents the response from SLURP API +type EventResponse struct { + Success bool `json:"success"` + EventID string `json:"event_id,omitempty"` + Message string `json:"message,omitempty"` + Error string `json:"error,omitempty"` + Timestamp time.Time `json:"timestamp"` +} + +// BatchEventRequest represents a batch of events to be sent to SLURP +type BatchEventRequest struct { + Events []SlurpEvent `json:"events"` + Source string `json:"source"` +} + +// BatchEventResponse represents the response for batch event creation +type BatchEventResponse struct { + Success bool `json:"success"` + ProcessedCount int `json:"processed_count"` + FailedCount int `json:"failed_count"` + EventIDs []string `json:"event_ids,omitempty"` + Errors []string `json:"errors,omitempty"` + Message string `json:"message,omitempty"` + Timestamp time.Time `json:"timestamp"` +} + +// HealthResponse represents SLURP service health status +type HealthResponse struct { + Status string `json:"status"` + Version string `json:"version,omitempty"` + Uptime string `json:"uptime,omitempty"` + Timestamp time.Time `json:"timestamp"` +} + +// NewSlurpClient creates a new SLURP API client +func NewSlurpClient(config config.SlurpConfig) *SlurpClient { + return &SlurpClient{ + baseURL: strings.TrimSuffix(config.BaseURL, "/"), + apiKey: config.APIKey, + timeout: config.Timeout, + retryCount: config.RetryCount, + retryDelay: config.RetryDelay, + httpClient: &http.Client{ + Timeout: config.Timeout, + }, + } +} + +// CreateEvent sends a single event to SLURP +func (c *SlurpClient) CreateEvent(ctx context.Context, event SlurpEvent) (*EventResponse, error) { + url := fmt.Sprintf("%s/api/events", c.baseURL) + + eventData, err := json.Marshal(event) + if err != nil { + return nil, fmt.Errorf("failed to marshal event: %w", err) + } + + var lastErr error + for attempt := 0; attempt <= c.retryCount; attempt++ { + if attempt > 0 { + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-time.After(c.retryDelay): + } + } + + req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(eventData)) + if err != nil { + lastErr = fmt.Errorf("failed to create request: %w", err) + continue + } + + c.setHeaders(req) + + resp, err := c.httpClient.Do(req) + if err != nil { + lastErr = fmt.Errorf("failed to send request: %w", err) + continue + } + + defer resp.Body.Close() + + if c.isRetryableStatus(resp.StatusCode) && attempt < c.retryCount { + lastErr = fmt.Errorf("retryable error: HTTP %d", resp.StatusCode) + continue + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + + var eventResp EventResponse + if err := json.Unmarshal(body, &eventResp); err != nil { + return nil, fmt.Errorf("failed to unmarshal response: %w", err) + } + + if resp.StatusCode >= 400 { + return &eventResp, fmt.Errorf("SLURP API error (HTTP %d): %s", resp.StatusCode, eventResp.Error) + } + + return &eventResp, nil + } + + return nil, fmt.Errorf("failed after %d attempts: %w", c.retryCount+1, lastErr) +} + +// CreateEventsBatch sends multiple events to SLURP in a single request +func (c *SlurpClient) CreateEventsBatch(ctx context.Context, events []SlurpEvent) (*BatchEventResponse, error) { + url := fmt.Sprintf("%s/api/events/batch", c.baseURL) + + batchRequest := BatchEventRequest{ + Events: events, + Source: "bzzz-hmmm-integration", + } + + batchData, err := json.Marshal(batchRequest) + if err != nil { + return nil, fmt.Errorf("failed to marshal batch request: %w", err) + } + + var lastErr error + for attempt := 0; attempt <= c.retryCount; attempt++ { + if attempt > 0 { + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-time.After(c.retryDelay): + } + } + + req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(batchData)) + if err != nil { + lastErr = fmt.Errorf("failed to create batch request: %w", err) + continue + } + + c.setHeaders(req) + + resp, err := c.httpClient.Do(req) + if err != nil { + lastErr = fmt.Errorf("failed to send batch request: %w", err) + continue + } + + defer resp.Body.Close() + + if c.isRetryableStatus(resp.StatusCode) && attempt < c.retryCount { + lastErr = fmt.Errorf("retryable error: HTTP %d", resp.StatusCode) + continue + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read batch response body: %w", err) + } + + var batchResp BatchEventResponse + if err := json.Unmarshal(body, &batchResp); err != nil { + return nil, fmt.Errorf("failed to unmarshal batch response: %w", err) + } + + if resp.StatusCode >= 400 { + return &batchResp, fmt.Errorf("SLURP batch API error (HTTP %d): %s", resp.StatusCode, batchResp.Message) + } + + return &batchResp, nil + } + + return nil, fmt.Errorf("batch failed after %d attempts: %w", c.retryCount+1, lastErr) +} + +// GetHealth checks SLURP service health +func (c *SlurpClient) GetHealth(ctx context.Context) (*HealthResponse, error) { + url := fmt.Sprintf("%s/api/health", c.baseURL) + + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return nil, fmt.Errorf("failed to create health request: %w", err) + } + + c.setHeaders(req) + + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to send health request: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read health response: %w", err) + } + + var healthResp HealthResponse + if err := json.Unmarshal(body, &healthResp); err != nil { + return nil, fmt.Errorf("failed to unmarshal health response: %w", err) + } + + if resp.StatusCode >= 400 { + return &healthResp, fmt.Errorf("SLURP health check failed (HTTP %d)", resp.StatusCode) + } + + return &healthResp, nil +} + +// QueryEvents retrieves events from SLURP based on filters +func (c *SlurpClient) QueryEvents(ctx context.Context, filters map[string]string) ([]SlurpEvent, error) { + baseURL := fmt.Sprintf("%s/api/events", c.baseURL) + + // Build query parameters + params := url.Values{} + for key, value := range filters { + params.Add(key, value) + } + + queryURL := baseURL + if len(params) > 0 { + queryURL = fmt.Sprintf("%s?%s", baseURL, params.Encode()) + } + + req, err := http.NewRequestWithContext(ctx, "GET", queryURL, nil) + if err != nil { + return nil, fmt.Errorf("failed to create query request: %w", err) + } + + c.setHeaders(req) + + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to send query request: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read query response: %w", err) + } + + var events []SlurpEvent + if err := json.Unmarshal(body, &events); err != nil { + return nil, fmt.Errorf("failed to unmarshal events: %w", err) + } + + if resp.StatusCode >= 400 { + return nil, fmt.Errorf("SLURP query failed (HTTP %d)", resp.StatusCode) + } + + return events, nil +} + +// setHeaders sets common HTTP headers for SLURP API requests +func (c *SlurpClient) setHeaders(req *http.Request) { + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Accept", "application/json") + req.Header.Set("User-Agent", "Bzzz-HMMM-Integration/1.0") + + if c.apiKey != "" { + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", c.apiKey)) + } +} + +// isRetryableStatus determines if an HTTP status code is retryable +func (c *SlurpClient) isRetryableStatus(statusCode int) bool { + switch statusCode { + case http.StatusTooManyRequests, // 429 + http.StatusInternalServerError, // 500 + http.StatusBadGateway, // 502 + http.StatusServiceUnavailable, // 503 + http.StatusGatewayTimeout: // 504 + return true + default: + return false + } +} + +// Close cleans up the client resources +func (c *SlurpClient) Close() error { + // HTTP client doesn't need explicit cleanup, but we can implement + // connection pooling cleanup if needed in the future + return nil +} + +// ValidateConnection tests the connection to SLURP +func (c *SlurpClient) ValidateConnection(ctx context.Context) error { + _, err := c.GetHealth(ctx) + return err +} \ No newline at end of file diff --git a/pkg/integration/slurp_events.go b/pkg/integration/slurp_events.go new file mode 100644 index 00000000..4aa8c4c9 --- /dev/null +++ b/pkg/integration/slurp_events.go @@ -0,0 +1,519 @@ +package integration + +import ( + "context" + "fmt" + "math" + "strings" + "sync" + "time" + + "github.com/anthonyrawlins/bzzz/pkg/config" + "github.com/anthonyrawlins/bzzz/pubsub" + "github.com/libp2p/go-libp2p/core/peer" +) + +// SlurpEventIntegrator manages the integration between HMMM discussions and SLURP events +type SlurpEventIntegrator struct { + config config.SlurpConfig + client *SlurpClient + pubsub *pubsub.PubSub + eventMapping config.HmmmToSlurpMapping + + // Batch processing + eventBatch []SlurpEvent + batchMutex sync.Mutex + batchTimer *time.Timer + + // Context and lifecycle + ctx context.Context + cancel context.CancelFunc + + // Statistics + stats SlurpIntegrationStats + statsMutex sync.RWMutex +} + +// SlurpIntegrationStats tracks integration performance metrics +type SlurpIntegrationStats struct { + EventsGenerated int64 `json:"events_generated"` + EventsSuccessful int64 `json:"events_successful"` + EventsFailed int64 `json:"events_failed"` + BatchesSent int64 `json:"batches_sent"` + LastEventTime time.Time `json:"last_event_time"` + LastSuccessTime time.Time `json:"last_success_time"` + LastFailureTime time.Time `json:"last_failure_time"` + LastFailureError string `json:"last_failure_error"` + AverageResponseTime float64 `json:"average_response_time_ms"` +} + +// HmmmDiscussionContext represents a HMMM discussion that can generate SLURP events +type HmmmDiscussionContext struct { + DiscussionID string `json:"discussion_id"` + SessionID string `json:"session_id,omitempty"` + Participants []string `json:"participants"` + StartTime time.Time `json:"start_time"` + EndTime time.Time `json:"end_time"` + Messages []HmmmMessage `json:"messages"` + ConsensusReached bool `json:"consensus_reached"` + ConsensusStrength float64 `json:"consensus_strength"` + OutcomeType string `json:"outcome_type"` + ProjectPath string `json:"project_path"` + RelatedTasks []string `json:"related_tasks,omitempty"` + Metadata map[string]interface{} `json:"metadata,omitempty"` +} + +// HmmmMessage represents a message in a HMMM discussion +type HmmmMessage struct { + From string `json:"from"` + Content string `json:"content"` + Type string `json:"type"` + Timestamp time.Time `json:"timestamp"` + Metadata map[string]interface{} `json:"metadata,omitempty"` +} + +// NewSlurpEventIntegrator creates a new SLURP event integrator +func NewSlurpEventIntegrator(ctx context.Context, slurpConfig config.SlurpConfig, ps *pubsub.PubSub) (*SlurpEventIntegrator, error) { + if !slurpConfig.Enabled { + return nil, fmt.Errorf("SLURP integration is disabled in configuration") + } + + client := NewSlurpClient(slurpConfig) + + // Test connection to SLURP + if err := client.ValidateConnection(ctx); err != nil { + return nil, fmt.Errorf("failed to connect to SLURP: %w", err) + } + + integrationCtx, cancel := context.WithCancel(ctx) + + integrator := &SlurpEventIntegrator{ + config: slurpConfig, + client: client, + pubsub: ps, + eventMapping: config.GetHmmmToSlurpMapping(), + eventBatch: make([]SlurpEvent, 0, slurpConfig.BatchProcessing.MaxBatchSize), + ctx: integrationCtx, + cancel: cancel, + stats: SlurpIntegrationStats{}, + } + + // Initialize batch processing if enabled + if slurpConfig.BatchProcessing.Enabled { + integrator.initBatchProcessing() + } + + fmt.Printf("🎯 SLURP Event Integrator initialized for %s\n", slurpConfig.BaseURL) + return integrator, nil +} + +// ProcessHmmmDiscussion analyzes a HMMM discussion and generates appropriate SLURP events +func (s *SlurpEventIntegrator) ProcessHmmmDiscussion(ctx context.Context, discussion HmmmDiscussionContext) error { + s.statsMutex.Lock() + s.stats.EventsGenerated++ + s.stats.LastEventTime = time.Now() + s.statsMutex.Unlock() + + // Validate discussion meets generation criteria + if !s.shouldGenerateEvent(discussion) { + fmt.Printf("πŸ“Š Discussion %s does not meet event generation criteria\n", discussion.DiscussionID) + return nil + } + + // Determine event type from discussion + eventType, confidence := s.determineEventType(discussion) + if eventType == "" { + fmt.Printf("πŸ“Š Could not determine event type for discussion %s\n", discussion.DiscussionID) + return nil + } + + // Calculate severity + severity := s.calculateSeverity(discussion, eventType) + + // Generate event content + content := s.generateEventContent(discussion) + + // Create SLURP event + slurpEvent := SlurpEvent{ + EventType: eventType, + Path: discussion.ProjectPath, + Content: content, + Severity: severity, + CreatedBy: s.config.DefaultEventSettings.DefaultCreatedBy, + Timestamp: time.Now(), + Tags: append(s.config.DefaultEventSettings.DefaultTags, fmt.Sprintf("confidence-%.2f", confidence)), + Metadata: map[string]interface{}{ + "discussion_id": discussion.DiscussionID, + "session_id": discussion.SessionID, + "participants": discussion.Participants, + "consensus_strength": discussion.ConsensusStrength, + "discussion_duration": discussion.EndTime.Sub(discussion.StartTime).String(), + "message_count": len(discussion.Messages), + "outcome_type": discussion.OutcomeType, + "generation_confidence": confidence, + }, + } + + // Add custom metadata from template + for key, value := range s.config.DefaultEventSettings.MetadataTemplate { + slurpEvent.Metadata[key] = value + } + + // Add discussion-specific metadata + for key, value := range discussion.Metadata { + slurpEvent.Metadata[key] = value + } + + // Send event (batch or immediate) + if s.config.BatchProcessing.Enabled { + return s.addToBatch(slurpEvent) + } else { + return s.sendImmediateEvent(ctx, slurpEvent, discussion.DiscussionID) + } +} + +// shouldGenerateEvent determines if a discussion meets the criteria for event generation +func (s *SlurpEventIntegrator) shouldGenerateEvent(discussion HmmmDiscussionContext) bool { + // Check minimum participants + if len(discussion.Participants) < s.config.EventGeneration.MinParticipants { + return false + } + + // Check consensus strength + if discussion.ConsensusStrength < s.config.EventGeneration.MinConsensusStrength { + return false + } + + // Check discussion duration + duration := discussion.EndTime.Sub(discussion.StartTime) + if duration < s.config.EventGeneration.MinDiscussionDuration { + return false + } + + if duration > s.config.EventGeneration.MaxDiscussionDuration { + return false // Too long, might indicate stalled discussion + } + + // Check if unanimity is required and achieved + if s.config.EventGeneration.RequireUnanimity && discussion.ConsensusStrength < 1.0 { + return false + } + + return true +} + +// determineEventType analyzes discussion content to determine SLURP event type +func (s *SlurpEventIntegrator) determineEventType(discussion HmmmDiscussionContext) (string, float64) { + // Combine all message content for analysis + var allContent strings.Builder + for _, msg := range discussion.Messages { + allContent.WriteString(strings.ToLower(msg.Content)) + allContent.WriteString(" ") + } + content := allContent.String() + + // Score each event type based on keyword matches + scores := make(map[string]float64) + + scores["approval"] = s.scoreKeywordMatch(content, s.eventMapping.ApprovalKeywords) + scores["warning"] = s.scoreKeywordMatch(content, s.eventMapping.WarningKeywords) + scores["blocker"] = s.scoreKeywordMatch(content, s.eventMapping.BlockerKeywords) + scores["priority_change"] = s.scoreKeywordMatch(content, s.eventMapping.PriorityKeywords) + scores["access_update"] = s.scoreKeywordMatch(content, s.eventMapping.AccessKeywords) + scores["structural_change"] = s.scoreKeywordMatch(content, s.eventMapping.StructuralKeywords) + scores["announcement"] = s.scoreKeywordMatch(content, s.eventMapping.AnnouncementKeywords) + + // Find highest scoring event type + var bestType string + var bestScore float64 + for eventType, score := range scores { + if score > bestScore { + bestType = eventType + bestScore = score + } + } + + // Require minimum confidence threshold + minConfidence := 0.3 + if bestScore < minConfidence { + return "", 0 + } + + // Check if event type is enabled + if s.isEventTypeDisabled(bestType) { + return "", 0 + } + + return bestType, bestScore +} + +// scoreKeywordMatch calculates a score based on keyword frequency +func (s *SlurpEventIntegrator) scoreKeywordMatch(content string, keywords []string) float64 { + if len(keywords) == 0 { + return 0 + } + + matches := 0 + for _, keyword := range keywords { + if strings.Contains(content, strings.ToLower(keyword)) { + matches++ + } + } + + return float64(matches) / float64(len(keywords)) +} + +// isEventTypeDisabled checks if an event type is disabled in configuration +func (s *SlurpEventIntegrator) isEventTypeDisabled(eventType string) bool { + for _, disabled := range s.config.EventGeneration.DisabledEventTypes { + if disabled == eventType { + return true + } + } + + // Check if it's in enabled list (if specified) + if len(s.config.EventGeneration.EnabledEventTypes) > 0 { + for _, enabled := range s.config.EventGeneration.EnabledEventTypes { + if enabled == eventType { + return false + } + } + return true // Not in enabled list + } + + return false +} + +// calculateSeverity determines event severity based on discussion characteristics +func (s *SlurpEventIntegrator) calculateSeverity(discussion HmmmDiscussionContext, eventType string) int { + // Start with base severity for event type + baseSeverity := s.config.EventGeneration.SeverityRules.BaseSeverity[eventType] + if baseSeverity == 0 { + baseSeverity = s.config.DefaultEventSettings.DefaultSeverity + } + + severity := float64(baseSeverity) + + // Apply participant multiplier + participantBoost := float64(len(discussion.Participants)-1) * s.config.EventGeneration.SeverityRules.ParticipantMultiplier + severity += participantBoost + + // Apply duration multiplier + durationHours := discussion.EndTime.Sub(discussion.StartTime).Hours() + durationBoost := durationHours * s.config.EventGeneration.SeverityRules.DurationMultiplier + severity += durationBoost + + // Check for urgency keywords + allContent := strings.ToLower(s.generateEventContent(discussion)) + for _, keyword := range s.config.EventGeneration.SeverityRules.UrgencyKeywords { + if strings.Contains(allContent, strings.ToLower(keyword)) { + severity += float64(s.config.EventGeneration.SeverityRules.UrgencyBoost) + break // Only apply once + } + } + + // Apply bounds + finalSeverity := int(math.Round(severity)) + if finalSeverity < s.config.EventGeneration.SeverityRules.MinSeverity { + finalSeverity = s.config.EventGeneration.SeverityRules.MinSeverity + } + if finalSeverity > s.config.EventGeneration.SeverityRules.MaxSeverity { + finalSeverity = s.config.EventGeneration.SeverityRules.MaxSeverity + } + + return finalSeverity +} + +// generateEventContent creates human-readable content for the SLURP event +func (s *SlurpEventIntegrator) generateEventContent(discussion HmmmDiscussionContext) string { + if discussion.OutcomeType != "" { + return fmt.Sprintf("HMMM discussion reached consensus: %s (%d participants, %.1f%% agreement)", + discussion.OutcomeType, + len(discussion.Participants), + discussion.ConsensusStrength*100) + } + + return fmt.Sprintf("HMMM discussion completed with %d participants over %v", + len(discussion.Participants), + discussion.EndTime.Sub(discussion.StartTime).Round(time.Minute)) +} + +// addToBatch adds an event to the batch for later processing +func (s *SlurpEventIntegrator) addToBatch(event SlurpEvent) error { + s.batchMutex.Lock() + defer s.batchMutex.Unlock() + + s.eventBatch = append(s.eventBatch, event) + + // Check if batch is full + if len(s.eventBatch) >= s.config.BatchProcessing.MaxBatchSize { + return s.flushBatch() + } + + // Reset batch timer + if s.batchTimer != nil { + s.batchTimer.Stop() + } + s.batchTimer = time.AfterFunc(s.config.BatchProcessing.MaxBatchWait, func() { + s.batchMutex.Lock() + defer s.batchMutex.Unlock() + s.flushBatch() + }) + + fmt.Printf("πŸ“¦ Added event to batch (%d/%d)\n", len(s.eventBatch), s.config.BatchProcessing.MaxBatchSize) + return nil +} + +// flushBatch sends all batched events to SLURP +func (s *SlurpEventIntegrator) flushBatch() error { + if len(s.eventBatch) == 0 { + return nil + } + + events := make([]SlurpEvent, len(s.eventBatch)) + copy(events, s.eventBatch) + s.eventBatch = s.eventBatch[:0] // Clear batch + + if s.batchTimer != nil { + s.batchTimer.Stop() + s.batchTimer = nil + } + + fmt.Printf("πŸš€ Flushing batch of %d events to SLURP\n", len(events)) + + start := time.Now() + resp, err := s.client.CreateEventsBatch(s.ctx, events) + duration := time.Since(start) + + s.statsMutex.Lock() + s.stats.BatchesSent++ + s.stats.AverageResponseTime = (s.stats.AverageResponseTime + duration.Seconds()*1000) / 2 + + if err != nil { + s.stats.EventsFailed += int64(len(events)) + s.stats.LastFailureTime = time.Now() + s.stats.LastFailureError = err.Error() + s.statsMutex.Unlock() + + // Publish failure notification + s.publishSlurpEvent("slurp_batch_failed", map[string]interface{}{ + "error": err.Error(), + "event_count": len(events), + "batch_id": fmt.Sprintf("batch_%d", time.Now().Unix()), + }) + + return fmt.Errorf("failed to send batch: %w", err) + } + + s.stats.EventsSuccessful += int64(resp.ProcessedCount) + s.stats.EventsFailed += int64(resp.FailedCount) + s.stats.LastSuccessTime = time.Now() + s.statsMutex.Unlock() + + // Publish success notification + s.publishSlurpEvent("slurp_batch_success", map[string]interface{}{ + "processed_count": resp.ProcessedCount, + "failed_count": resp.FailedCount, + "event_ids": resp.EventIDs, + "batch_id": fmt.Sprintf("batch_%d", time.Now().Unix()), + }) + + fmt.Printf("βœ… Batch processed: %d succeeded, %d failed\n", resp.ProcessedCount, resp.FailedCount) + return nil +} + +// sendImmediateEvent sends a single event immediately to SLURP +func (s *SlurpEventIntegrator) sendImmediateEvent(ctx context.Context, event SlurpEvent, discussionID string) error { + start := time.Now() + resp, err := s.client.CreateEvent(ctx, event) + duration := time.Since(start) + + s.statsMutex.Lock() + s.stats.AverageResponseTime = (s.stats.AverageResponseTime + duration.Seconds()*1000) / 2 + + if err != nil { + s.stats.EventsFailed++ + s.stats.LastFailureTime = time.Now() + s.stats.LastFailureError = err.Error() + s.statsMutex.Unlock() + + // Publish failure notification + s.publishSlurpEvent("slurp_event_failed", map[string]interface{}{ + "discussion_id": discussionID, + "event_type": event.EventType, + "error": err.Error(), + }) + + return fmt.Errorf("failed to send event: %w", err) + } + + s.stats.EventsSuccessful++ + s.stats.LastSuccessTime = time.Now() + s.statsMutex.Unlock() + + // Publish success notification + s.publishSlurpEvent("slurp_event_success", map[string]interface{}{ + "discussion_id": discussionID, + "event_type": event.EventType, + "event_id": resp.EventID, + "severity": event.Severity, + }) + + fmt.Printf("βœ… SLURP event created: %s (ID: %s)\n", event.EventType, resp.EventID) + return nil +} + +// publishSlurpEvent publishes a SLURP integration event to the pubsub system +func (s *SlurpEventIntegrator) publishSlurpEvent(eventType string, data map[string]interface{}) { + var msgType pubsub.MessageType + switch eventType { + case "slurp_event_success", "slurp_batch_success": + msgType = pubsub.SlurpEventGenerated + case "slurp_event_failed", "slurp_batch_failed": + msgType = pubsub.SlurpEventAck + default: + msgType = pubsub.SlurpContextUpdate + } + + data["timestamp"] = time.Now() + data["integration_source"] = "hmmm-slurp-integrator" + + if err := s.pubsub.PublishHmmmMessage(msgType, data); err != nil { + fmt.Printf("❌ Failed to publish SLURP integration event: %v\n", err) + } +} + +// initBatchProcessing initializes batch processing components +func (s *SlurpEventIntegrator) initBatchProcessing() { + fmt.Printf("πŸ“¦ Batch processing enabled: max_size=%d, max_wait=%v\n", + s.config.BatchProcessing.MaxBatchSize, + s.config.BatchProcessing.MaxBatchWait) +} + +// GetStats returns current integration statistics +func (s *SlurpEventIntegrator) GetStats() SlurpIntegrationStats { + s.statsMutex.RLock() + defer s.statsMutex.RUnlock() + return s.stats +} + +// Close shuts down the integrator and flushes any pending events +func (s *SlurpEventIntegrator) Close() error { + s.cancel() + + // Flush any remaining batched events + if s.config.BatchProcessing.Enabled && s.config.BatchProcessing.FlushOnShutdown { + s.batchMutex.Lock() + if len(s.eventBatch) > 0 { + fmt.Printf("🧹 Flushing %d remaining events on shutdown\n", len(s.eventBatch)) + s.flushBatch() + } + s.batchMutex.Unlock() + } + + if s.batchTimer != nil { + s.batchTimer.Stop() + } + + return s.client.Close() +} \ No newline at end of file diff --git a/pkg/mcp/server.go b/pkg/mcp/server.go new file mode 100644 index 00000000..41e4a5b9 --- /dev/null +++ b/pkg/mcp/server.go @@ -0,0 +1,628 @@ +package mcp + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "sync" + "time" + + "github.com/anthonyrawlins/bzzz/logging" + "github.com/anthonyrawlins/bzzz/p2p" + "github.com/anthonyrawlins/bzzz/pubsub" + "github.com/gorilla/websocket" + "github.com/sashabaranov/go-openai" +) + +// McpServer integrates BZZZ P2P network with MCP protocol for GPT-4 agents +type McpServer struct { + // Core components + p2pNode *p2p.Node + pubsub *pubsub.PubSub + hlog *logging.HypercoreLog + openaiClient *openai.Client + + // Agent management + agents map[string]*GPTAgent + agentsMutex sync.RWMutex + + // Server configuration + httpServer *http.Server + wsUpgrader websocket.Upgrader + + // Context and lifecycle + ctx context.Context + cancel context.CancelFunc + + // Statistics and monitoring + stats *ServerStats +} + +// ServerStats tracks MCP server performance metrics +type ServerStats struct { + StartTime time.Time + TotalRequests int64 + ActiveAgents int + MessagesProcessed int64 + TokensConsumed int64 + AverageCostPerTask float64 + ErrorRate float64 + mutex sync.RWMutex +} + +// GPTAgent represents a GPT-4 agent integrated with BZZZ network +type GPTAgent struct { + ID string + Role AgentRole + Model string + SystemPrompt string + Capabilities []string + Specialization string + MaxTasks int + + // State management + Status AgentStatus + CurrentTasks map[string]*AgentTask + Memory *AgentMemory + + // Cost tracking + TokenUsage *TokenUsage + CostLimits *CostLimits + + // P2P Integration + NodeID string + LastAnnouncement time.Time + + // Conversation participation + ActiveThreads map[string]*ConversationThread + + mutex sync.RWMutex +} + +// AgentRole defines the role and responsibilities of an agent +type AgentRole string + +const ( + RoleArchitect AgentRole = "architect" + RoleReviewer AgentRole = "reviewer" + RoleDocumentation AgentRole = "documentation" + RoleDeveloper AgentRole = "developer" + RoleTester AgentRole = "tester" + RoleSecurityExpert AgentRole = "security_expert" + RoleDevOps AgentRole = "devops" +) + +// AgentStatus represents the current state of an agent +type AgentStatus string + +const ( + StatusIdle AgentStatus = "idle" + StatusActive AgentStatus = "active" + StatusCollaborating AgentStatus = "collaborating" + StatusEscalating AgentStatus = "escalating" + StatusTerminating AgentStatus = "terminating" +) + +// AgentTask represents a task being worked on by an agent +type AgentTask struct { + ID string + Title string + Repository string + Number int + StartTime time.Time + Status string + ThreadID string + Context map[string]interface{} +} + +// AgentMemory manages agent memory and learning +type AgentMemory struct { + WorkingMemory map[string]interface{} + EpisodicMemory []ConversationEpisode + SemanticMemory *KnowledgeGraph + ThreadMemories map[string]*ThreadMemory + mutex sync.RWMutex +} + +// ConversationEpisode represents a past interaction +type ConversationEpisode struct { + Timestamp time.Time + Participants []string + Topic string + Summary string + Outcome string + Lessons []string + TokensUsed int +} + +// ConversationThread represents an active conversation +type ConversationThread struct { + ID string + Topic string + Participants []AgentParticipant + Messages []ThreadMessage + State ThreadState + SharedContext map[string]interface{} + DecisionLog []Decision + CreatedAt time.Time + LastActivity time.Time + mutex sync.RWMutex +} + +// AgentParticipant represents an agent participating in a conversation +type AgentParticipant struct { + AgentID string + Role AgentRole + Status ParticipantStatus +} + +// ParticipantStatus represents the status of a participant in a conversation +type ParticipantStatus string + +const ( + ParticipantStatusInvited ParticipantStatus = "invited" + ParticipantStatusActive ParticipantStatus = "active" + ParticipantStatusIdle ParticipantStatus = "idle" + ParticipantStatusLeft ParticipantStatus = "left" +) + +// ThreadMessage represents a message in a conversation thread +type ThreadMessage struct { + ID string + From string + Role AgentRole + Content string + MessageType pubsub.MessageType + Timestamp time.Time + ReplyTo string + TokenCount int + Model string +} + +// ThreadState represents the state of a conversation thread +type ThreadState string + +const ( + ThreadStateActive ThreadState = "active" + ThreadStateCompleted ThreadState = "completed" + ThreadStateEscalated ThreadState = "escalated" + ThreadStateClosed ThreadState = "closed" +) + +// Decision represents a decision made in a conversation +type Decision struct { + ID string + Description string + DecidedBy []string + Timestamp time.Time + Rationale string + Confidence float64 +} + +// NewMcpServer creates a new MCP server instance +func NewMcpServer( + ctx context.Context, + node *p2p.Node, + ps *pubsub.PubSub, + hlog *logging.HypercoreLog, + openaiAPIKey string, +) *McpServer { + serverCtx, cancel := context.WithCancel(ctx) + + server := &McpServer{ + p2pNode: node, + pubsub: ps, + hlog: hlog, + openaiClient: openai.NewClient(openaiAPIKey), + agents: make(map[string]*GPTAgent), + ctx: serverCtx, + cancel: cancel, + wsUpgrader: websocket.Upgrader{ + CheckOrigin: func(r *http.Request) bool { return true }, + }, + stats: &ServerStats{ + StartTime: time.Now(), + }, + } + + return server +} + +// Start initializes and starts the MCP server +func (s *McpServer) Start(port int) error { + // Set up HTTP handlers + mux := http.NewServeMux() + + // MCP WebSocket endpoint + mux.HandleFunc("/mcp", s.handleMCPWebSocket) + + // REST API endpoints + mux.HandleFunc("/api/agents", s.handleAgentsAPI) + mux.HandleFunc("/api/conversations", s.handleConversationsAPI) + mux.HandleFunc("/api/stats", s.handleStatsAPI) + mux.HandleFunc("/health", s.handleHealthCheck) + + // Start HTTP server + s.httpServer = &http.Server{ + Addr: fmt.Sprintf(":%d", port), + Handler: mux, + } + + go func() { + if err := s.httpServer.ListenAndServe(); err != nil && err != http.ErrServerClosed { + fmt.Printf("❌ MCP HTTP server error: %v\n", err) + } + }() + + // Start message handlers + go s.handleBzzzMessages() + go s.handleHmmmMessages() + + // Start periodic tasks + go s.periodicTasks() + + fmt.Printf("πŸš€ MCP Server started on port %d\n", port) + return nil +} + +// Stop gracefully shuts down the MCP server +func (s *McpServer) Stop() error { + s.cancel() + + // Stop all agents + s.agentsMutex.Lock() + for _, agent := range s.agents { + s.stopAgent(agent) + } + s.agentsMutex.Unlock() + + // Stop HTTP server + if s.httpServer != nil { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + return s.httpServer.Shutdown(ctx) + } + + return nil +} + +// CreateGPTAgent creates a new GPT-4 agent +func (s *McpServer) CreateGPTAgent(config *AgentConfig) (*GPTAgent, error) { + agent := &GPTAgent{ + ID: config.ID, + Role: config.Role, + Model: config.Model, + SystemPrompt: config.SystemPrompt, + Capabilities: config.Capabilities, + Specialization: config.Specialization, + MaxTasks: config.MaxTasks, + Status: StatusIdle, + CurrentTasks: make(map[string]*AgentTask), + Memory: NewAgentMemory(), + TokenUsage: NewTokenUsage(), + CostLimits: config.CostLimits, + NodeID: s.p2pNode.ID().ShortString(), + ActiveThreads: make(map[string]*ConversationThread), + } + + s.agentsMutex.Lock() + s.agents[agent.ID] = agent + s.agentsMutex.Unlock() + + // Announce agent to BZZZ network + if err := s.announceAgent(agent); err != nil { + return nil, fmt.Errorf("failed to announce agent: %w", err) + } + + s.hlog.Append(logging.PeerJoined, map[string]interface{}{ + "agent_id": agent.ID, + "role": string(agent.Role), + "capabilities": agent.Capabilities, + "specialization": agent.Specialization, + }) + + fmt.Printf("βœ… Created GPT-4 agent: %s (%s)\n", agent.ID, agent.Role) + return agent, nil +} + +// ProcessCollaborativeTask handles a task that requires multi-agent collaboration +func (s *McpServer) ProcessCollaborativeTask( + task *AgentTask, + requiredRoles []AgentRole, +) (*ConversationThread, error) { + + // Create conversation thread + thread := &ConversationThread{ + ID: fmt.Sprintf("task-%s-%d", task.Repository, task.Number), + Topic: fmt.Sprintf("Collaborative Task: %s", task.Title), + State: ThreadStateActive, + SharedContext: map[string]interface{}{ + "task": task, + "required_roles": requiredRoles, + }, + CreatedAt: time.Now(), + LastActivity: time.Now(), + } + + // Find and invite agents + for _, role := range requiredRoles { + agents := s.findAgentsByRole(role) + if len(agents) == 0 { + return nil, fmt.Errorf("no available agents for role: %s", role) + } + + // Select best agent for this role + selectedAgent := s.selectBestAgent(agents, task) + + thread.Participants = append(thread.Participants, AgentParticipant{ + AgentID: selectedAgent.ID, + Role: role, + Status: ParticipantStatusInvited, + }) + + // Add thread to agent + selectedAgent.mutex.Lock() + selectedAgent.ActiveThreads[thread.ID] = thread + selectedAgent.mutex.Unlock() + } + + // Send initial collaboration request + if err := s.initiateCollaboration(thread); err != nil { + return nil, fmt.Errorf("failed to initiate collaboration: %w", err) + } + + return thread, nil +} + +// handleMCPWebSocket handles WebSocket connections for MCP protocol +func (s *McpServer) handleMCPWebSocket(w http.ResponseWriter, r *http.Request) { + conn, err := s.wsUpgrader.Upgrade(w, r, nil) + if err != nil { + fmt.Printf("❌ WebSocket upgrade failed: %v\n", err) + return + } + defer conn.Close() + + fmt.Printf("πŸ“‘ MCP WebSocket connection established\n") + + // Handle MCP protocol messages + for { + var message map[string]interface{} + if err := conn.ReadJSON(&message); err != nil { + if websocket.IsUnexpectedCloseError(err, websocket.CloseGoingAway, websocket.CloseAbnormalClosure) { + fmt.Printf("❌ WebSocket error: %v\n", err) + } + break + } + + // Process MCP message + response, err := s.processMCPMessage(message) + if err != nil { + fmt.Printf("❌ MCP message processing error: %v\n", err) + response = map[string]interface{}{ + "error": err.Error(), + } + } + + if err := conn.WriteJSON(response); err != nil { + fmt.Printf("❌ WebSocket write error: %v\n", err) + break + } + } +} + +// processMCPMessage processes incoming MCP protocol messages +func (s *McpServer) processMCPMessage(message map[string]interface{}) (map[string]interface{}, error) { + method, ok := message["method"].(string) + if !ok { + return nil, fmt.Errorf("missing or invalid method") + } + + params, _ := message["params"].(map[string]interface{}) + + switch method { + case "tools/list": + return s.listTools(), nil + case "tools/call": + return s.callTool(params) + case "resources/list": + return s.listResources(), nil + case "resources/read": + return s.readResource(params) + default: + return nil, fmt.Errorf("unknown method: %s", method) + } +} + +// callTool handles tool execution requests +func (s *McpServer) callTool(params map[string]interface{}) (map[string]interface{}, error) { + toolName, ok := params["name"].(string) + if !ok { + return nil, fmt.Errorf("missing tool name") + } + + args, _ := params["arguments"].(map[string]interface{}) + + switch toolName { + case "bzzz_announce": + return s.handleBzzzAnnounce(args) + case "bzzz_lookup": + return s.handleBzzzLookup(args) + case "bzzz_get": + return s.handleBzzzGet(args) + case "bzzz_post": + return s.handleBzzzPost(args) + case "bzzz_thread": + return s.handleBzzzThread(args) + case "bzzz_subscribe": + return s.handleBzzzSubscribe(args) + default: + return nil, fmt.Errorf("unknown tool: %s", toolName) + } +} + +// handleBzzzAnnounce implements the bzzz_announce tool +func (s *McpServer) handleBzzzAnnounce(args map[string]interface{}) (map[string]interface{}, error) { + agentID, ok := args["agent_id"].(string) + if !ok { + return nil, fmt.Errorf("agent_id is required") + } + + role, ok := args["role"].(string) + if !ok { + return nil, fmt.Errorf("role is required") + } + + // Create announcement message + announcement := map[string]interface{}{ + "agent_id": agentID, + "role": role, + "capabilities": args["capabilities"], + "specialization": args["specialization"], + "max_tasks": args["max_tasks"], + "announced_at": time.Now(), + "node_id": s.p2pNode.ID().ShortString(), + } + + // Publish to BZZZ network + if err := s.pubsub.PublishBzzzMessage(pubsub.CapabilityBcast, announcement); err != nil { + return nil, fmt.Errorf("failed to announce: %w", err) + } + + return map[string]interface{}{ + "success": true, + "message": fmt.Sprintf("Agent %s (%s) announced to network", agentID, role), + }, nil +} + +// Additional tool handlers would be implemented here... + +// Helper methods + +// announceAgent announces an agent to the BZZZ network +func (s *McpServer) announceAgent(agent *GPTAgent) error { + announcement := map[string]interface{}{ + "type": "gpt_agent_announcement", + "agent_id": agent.ID, + "role": string(agent.Role), + "capabilities": agent.Capabilities, + "specialization": agent.Specialization, + "max_tasks": agent.MaxTasks, + "model": agent.Model, + "node_id": agent.NodeID, + "timestamp": time.Now(), + } + + return s.pubsub.PublishBzzzMessage(pubsub.CapabilityBcast, announcement) +} + +// findAgentsByRole finds all agents with a specific role +func (s *McpServer) findAgentsByRole(role AgentRole) []*GPTAgent { + s.agentsMutex.RLock() + defer s.agentsMutex.RUnlock() + + var agents []*GPTAgent + for _, agent := range s.agents { + if agent.Role == role && agent.Status == StatusIdle { + agents = append(agents, agent) + } + } + + return agents +} + +// selectBestAgent selects the best agent for a task +func (s *McpServer) selectBestAgent(agents []*GPTAgent, task *AgentTask) *GPTAgent { + if len(agents) == 0 { + return nil + } + + // Simple selection: least busy agent + bestAgent := agents[0] + for _, agent := range agents[1:] { + if len(agent.CurrentTasks) < len(bestAgent.CurrentTasks) { + bestAgent = agent + } + } + + return bestAgent +} + +// Additional helper methods would be implemented here... + +// AgentConfig holds configuration for creating a new agent +type AgentConfig struct { + ID string + Role AgentRole + Model string + SystemPrompt string + Capabilities []string + Specialization string + MaxTasks int + CostLimits *CostLimits +} + +// CostLimits defines spending limits for an agent +type CostLimits struct { + DailyLimit float64 + MonthlyLimit float64 + PerTaskLimit float64 +} + +// TokenUsage tracks token consumption +type TokenUsage struct { + TotalTokens int64 + PromptTokens int64 + CompletionTokens int64 + TotalCost float64 + mutex sync.RWMutex +} + +// NewTokenUsage creates a new token usage tracker +func NewTokenUsage() *TokenUsage { + return &TokenUsage{} +} + +// NewAgentMemory creates a new agent memory instance +func NewAgentMemory() *AgentMemory { + return &AgentMemory{ + WorkingMemory: make(map[string]interface{}), + EpisodicMemory: make([]ConversationEpisode, 0), + ThreadMemories: make(map[string]*ThreadMemory), + } +} + +// ThreadMemory represents memory for a specific conversation thread +type ThreadMemory struct { + ThreadID string + Summary string + KeyPoints []string + Decisions []Decision + LastUpdated time.Time +} + +// KnowledgeGraph represents semantic knowledge +type KnowledgeGraph struct { + Concepts map[string]*Concept + Relations map[string]*Relation + mutex sync.RWMutex +} + +// Concept represents a knowledge concept +type Concept struct { + ID string + Name string + Description string + Category string + Confidence float64 +} + +// Relation represents a relationship between concepts +type Relation struct { + From string + To string + Type string + Strength float64 + Evidence []string +} \ No newline at end of file diff --git a/pubsub/pubsub.go b/pubsub/pubsub.go index 41881088..b99b275e 100644 --- a/pubsub/pubsub.go +++ b/pubsub/pubsub.go @@ -13,7 +13,7 @@ import ( pubsub "github.com/libp2p/go-libp2p-pubsub" ) -// PubSub handles publish/subscribe messaging for Bzzz coordination and Antennae meta-discussion +// PubSub handles publish/subscribe messaging for Bzzz coordination and HMMM meta-discussion type PubSub struct { ps *pubsub.PubSub host host.Host @@ -22,12 +22,12 @@ type PubSub struct { // Topic subscriptions bzzzTopic *pubsub.Topic - antennaeTopic *pubsub.Topic + hmmmTopic *pubsub.Topic contextTopic *pubsub.Topic // Message subscriptions bzzzSub *pubsub.Subscription - antennaeSub *pubsub.Subscription + hmmmSub *pubsub.Subscription contextSub *pubsub.Subscription // Dynamic topic management @@ -38,11 +38,11 @@ type PubSub struct { // Configuration bzzzTopicName string - antennaeTopicName string + hmmmTopicName string contextTopicName string - // External message handler for Antennae messages - AntennaeMessageHandler func(msg Message, from peer.ID) + // External message handler for HMMM messages + HmmmMessageHandler func(msg Message, from peer.ID) // External message handler for Context Feedback messages ContextFeedbackHandler func(msg Message, from peer.ID) @@ -69,7 +69,7 @@ const ( CapabilityBcast MessageType = "capability_broadcast" // Only broadcast when capabilities change AvailabilityBcast MessageType = "availability_broadcast" // Regular availability status - // Antennae meta-discussion messages + // HMMM meta-discussion messages MetaDiscussion MessageType = "meta_discussion" // Generic type for all discussion TaskHelpRequest MessageType = "task_help_request" // Request for assistance TaskHelpResponse MessageType = "task_help_response" // Response to a help request @@ -96,6 +96,11 @@ const ( ContextResponse MessageType = "context_response" // Response with context data ContextUsage MessageType = "context_usage" // Report context usage patterns ContextRelevance MessageType = "context_relevance" // Report context relevance scoring + + // SLURP event integration messages + SlurpEventGenerated MessageType = "slurp_event_generated" // HMMM consensus generated SLURP event + SlurpEventAck MessageType = "slurp_event_ack" // Acknowledgment of SLURP event receipt + SlurpContextUpdate MessageType = "slurp_context_update" // Context update from SLURP system ) // Message represents a Bzzz/Antennae message @@ -115,18 +120,18 @@ type Message struct { ThreadID string `json:"thread_id,omitempty"` // Conversation thread ID } -// NewPubSub creates a new PubSub instance for Bzzz coordination and Antennae meta-discussion -func NewPubSub(ctx context.Context, h host.Host, bzzzTopic, antennaeTopic string) (*PubSub, error) { - return NewPubSubWithLogger(ctx, h, bzzzTopic, antennaeTopic, nil) +// NewPubSub creates a new PubSub instance for Bzzz coordination and HMMM meta-discussion +func NewPubSub(ctx context.Context, h host.Host, bzzzTopic, hmmmTopic string) (*PubSub, error) { + return NewPubSubWithLogger(ctx, h, bzzzTopic, hmmmTopic, nil) } // NewPubSubWithLogger creates a new PubSub instance with hypercore logging -func NewPubSubWithLogger(ctx context.Context, h host.Host, bzzzTopic, antennaeTopic string, logger HypercoreLogger) (*PubSub, error) { +func NewPubSubWithLogger(ctx context.Context, h host.Host, bzzzTopic, hmmmTopic string, logger HypercoreLogger) (*PubSub, error) { if bzzzTopic == "" { bzzzTopic = "bzzz/coordination/v1" } - if antennaeTopic == "" { - antennaeTopic = "antennae/meta-discussion/v1" + if hmmmTopic == "" { + hmmmTopic = "hmmm/meta-discussion/v1" } contextTopic := "bzzz/context-feedback/v1" @@ -149,9 +154,9 @@ func NewPubSubWithLogger(ctx context.Context, h host.Host, bzzzTopic, antennaeTo host: h, ctx: pubsubCtx, cancel: cancel, - bzzzTopicName: bzzzTopic, - antennaeTopicName: antennaeTopic, - contextTopicName: contextTopic, + bzzzTopicName: bzzzTopic, + hmmmTopicName: hmmmTopic, + contextTopicName: contextTopic, dynamicTopics: make(map[string]*pubsub.Topic), dynamicSubs: make(map[string]*pubsub.Subscription), hypercoreLog: logger, @@ -165,16 +170,16 @@ func NewPubSubWithLogger(ctx context.Context, h host.Host, bzzzTopic, antennaeTo // Start message handlers go p.handleBzzzMessages() - go p.handleAntennaeMessages() + go p.handleHmmmMessages() go p.handleContextFeedbackMessages() - fmt.Printf("πŸ“‘ PubSub initialized - Bzzz: %s, Antennae: %s, Context: %s\n", bzzzTopic, antennaeTopic, contextTopic) + fmt.Printf("πŸ“‘ PubSub initialized - Bzzz: %s, HMMM: %s, Context: %s\n", bzzzTopic, hmmmTopic, contextTopic) return p, nil } -// SetAntennaeMessageHandler sets the handler for incoming Antennae messages. -func (p *PubSub) SetAntennaeMessageHandler(handler func(msg Message, from peer.ID)) { - p.AntennaeMessageHandler = handler +// SetHmmmMessageHandler sets the handler for incoming HMMM messages. +func (p *PubSub) SetHmmmMessageHandler(handler func(msg Message, from peer.ID)) { + p.HmmmMessageHandler = handler } // SetContextFeedbackHandler sets the handler for incoming context feedback messages. @@ -182,7 +187,7 @@ func (p *PubSub) SetContextFeedbackHandler(handler func(msg Message, from peer.I p.ContextFeedbackHandler = handler } -// joinStaticTopics joins the main Bzzz, Antennae, and Context Feedback topics +// joinStaticTopics joins the main Bzzz, HMMM, and Context Feedback topics func (p *PubSub) joinStaticTopics() error { // Join Bzzz coordination topic bzzzTopic, err := p.ps.Join(p.bzzzTopicName) @@ -197,18 +202,18 @@ func (p *PubSub) joinStaticTopics() error { } p.bzzzSub = bzzzSub - // Join Antennae meta-discussion topic - antennaeTopic, err := p.ps.Join(p.antennaeTopicName) + // Join HMMM meta-discussion topic + hmmmTopic, err := p.ps.Join(p.hmmmTopicName) if err != nil { - return fmt.Errorf("failed to join Antennae topic: %w", err) + return fmt.Errorf("failed to join HMMM topic: %w", err) } - p.antennaeTopic = antennaeTopic + p.hmmmTopic = hmmmTopic - antennaeSub, err := antennaeTopic.Subscribe() + hmmmSub, err := hmmmTopic.Subscribe() if err != nil { - return fmt.Errorf("failed to subscribe to Antennae topic: %w", err) + return fmt.Errorf("failed to subscribe to HMMM topic: %w", err) } - p.antennaeSub = antennaeSub + p.hmmmSub = hmmmSub // Join Context Feedback topic contextTopic, err := p.ps.Join(p.contextTopicName) @@ -364,8 +369,8 @@ func (p *PubSub) PublishBzzzMessage(msgType MessageType, data map[string]interfa return p.bzzzTopic.Publish(p.ctx, msgBytes) } -// PublishAntennaeMessage publishes a message to the Antennae meta-discussion topic -func (p *PubSub) PublishAntennaeMessage(msgType MessageType, data map[string]interface{}) error { +// PublishHmmmMessage publishes a message to the HMMM meta-discussion topic +func (p *PubSub) PublishHmmmMessage(msgType MessageType, data map[string]interface{}) error { msg := Message{ Type: msgType, From: p.host.ID().String(), @@ -378,7 +383,19 @@ func (p *PubSub) PublishAntennaeMessage(msgType MessageType, data map[string]int return fmt.Errorf("failed to marshal message: %w", err) } - return p.antennaeTopic.Publish(p.ctx, msgBytes) + return p.hmmmTopic.Publish(p.ctx, msgBytes) +} + +// PublishAntennaeMessage is a compatibility alias for PublishHmmmMessage +// Deprecated: Use PublishHmmmMessage instead +func (p *PubSub) PublishAntennaeMessage(msgType MessageType, data map[string]interface{}) error { + return p.PublishHmmmMessage(msgType, data) +} + +// SetAntennaeMessageHandler is a compatibility alias for SetHmmmMessageHandler +// Deprecated: Use SetHmmmMessageHandler instead +func (p *PubSub) SetAntennaeMessageHandler(handler func(msg Message, from peer.ID)) { + p.SetHmmmMessageHandler(handler) } // PublishContextFeedbackMessage publishes a message to the Context Feedback topic @@ -424,7 +441,7 @@ func (p *PubSub) PublishRoleBasedMessage(msgType MessageType, data map[string]in case RoleAnnouncement, ExpertiseRequest, ExpertiseResponse, StatusUpdate, WorkAllocation, RoleCollaboration, MentorshipRequest, MentorshipResponse, ProjectUpdate, DeliverableReady: - topic = p.antennaeTopic // Use Antennae topic for role-based messages + topic = p.hmmmTopic // Use HMMM topic for role-based messages default: topic = p.bzzzTopic // Default to Bzzz topic } @@ -432,6 +449,35 @@ func (p *PubSub) PublishRoleBasedMessage(msgType MessageType, data map[string]in return topic.Publish(p.ctx, msgBytes) } +// PublishSlurpEventGenerated publishes a SLURP event generation notification +func (p *PubSub) PublishSlurpEventGenerated(data map[string]interface{}) error { + return p.PublishHmmmMessage(SlurpEventGenerated, data) +} + +// PublishSlurpEventAck publishes a SLURP event acknowledgment +func (p *PubSub) PublishSlurpEventAck(data map[string]interface{}) error { + return p.PublishHmmmMessage(SlurpEventAck, data) +} + +// PublishSlurpContextUpdate publishes a SLURP context update notification +func (p *PubSub) PublishSlurpContextUpdate(data map[string]interface{}) error { + return p.PublishHmmmMessage(SlurpContextUpdate, data) +} + +// PublishSlurpIntegrationEvent publishes a generic SLURP integration event +func (p *PubSub) PublishSlurpIntegrationEvent(eventType string, discussionID string, slurpEvent map[string]interface{}) error { + data := map[string]interface{}{ + "event_type": eventType, + "discussion_id": discussionID, + "slurp_event": slurpEvent, + "timestamp": time.Now(), + "source": "hmmm-slurp-integration", + "peer_id": p.host.ID().String(), + } + + return p.PublishSlurpEventGenerated(data) +} + // GetHypercoreLog returns the hypercore logger for external access func (p *PubSub) GetHypercoreLog() HypercoreLogger { return p.hypercoreLog @@ -473,15 +519,15 @@ func (p *PubSub) handleBzzzMessages() { } } -// handleAntennaeMessages processes incoming Antennae meta-discussion messages -func (p *PubSub) handleAntennaeMessages() { +// handleHmmmMessages processes incoming HMMM meta-discussion messages +func (p *PubSub) handleHmmmMessages() { for { - msg, err := p.antennaeSub.Next(p.ctx) + msg, err := p.hmmmSub.Next(p.ctx) if err != nil { if p.ctx.Err() != nil { return // Context cancelled } - fmt.Printf("❌ Error receiving Antennae message: %v\n", err) + fmt.Printf("❌ Error receiving HMMM message: %v\n", err) continue } @@ -489,16 +535,16 @@ func (p *PubSub) handleAntennaeMessages() { continue } - var antennaeMsg Message - if err := json.Unmarshal(msg.Data, &antennaeMsg); err != nil { - fmt.Printf("❌ Failed to unmarshal Antennae message: %v\n", err) + var hmmmMsg Message + if err := json.Unmarshal(msg.Data, &hmmmMsg); err != nil { + fmt.Printf("❌ Failed to unmarshal HMMM message: %v\n", err) continue } - if p.AntennaeMessageHandler != nil { - p.AntennaeMessageHandler(antennaeMsg, msg.ReceivedFrom) + if p.HmmmMessageHandler != nil { + p.HmmmMessageHandler(hmmmMsg, msg.ReceivedFrom) } else { - p.processAntennaeMessage(antennaeMsg, msg.ReceivedFrom) + p.processHmmmMessage(hmmmMsg, msg.ReceivedFrom) } } } @@ -555,9 +601,9 @@ func (p *PubSub) handleDynamicMessages(sub *pubsub.Subscription) { continue } - // Use the main Antennae handler for all dynamic messages - if p.AntennaeMessageHandler != nil { - p.AntennaeMessageHandler(dynamicMsg, msg.ReceivedFrom) + // Use the main HMMM handler for all dynamic messages + if p.HmmmMessageHandler != nil { + p.HmmmMessageHandler(dynamicMsg, msg.ReceivedFrom) } } } @@ -602,9 +648,9 @@ func (p *PubSub) processBzzzMessage(msg Message, from peer.ID) { } } -// processAntennaeMessage provides default handling for Antennae messages if no external handler is set -func (p *PubSub) processAntennaeMessage(msg Message, from peer.ID) { - fmt.Printf("🎯 Default Antennae Handler [%s] from %s: %v\n", +// processHmmmMessage provides default handling for HMMM messages if no external handler is set +func (p *PubSub) processHmmmMessage(msg Message, from peer.ID) { + fmt.Printf("🎯 Default HMMM Handler [%s] from %s: %v\n", msg.Type, from.ShortString(), msg.Data) // Log to hypercore if logger is available @@ -615,7 +661,7 @@ func (p *PubSub) processAntennaeMessage(msg Message, from peer.ID) { "from_short": from.ShortString(), "timestamp": msg.Timestamp, "data": msg.Data, - "topic": "antennae", + "topic": "hmmm", "from_role": msg.FromRole, "to_roles": msg.ToRoles, "required_expertise": msg.RequiredExpertise, @@ -648,7 +694,7 @@ func (p *PubSub) processAntennaeMessage(msg Message, from peer.ID) { } if err := p.hypercoreLog.AppendString(logType, logData); err != nil { - fmt.Printf("❌ Failed to log Antennae message to hypercore: %v\n", err) + fmt.Printf("❌ Failed to log HMMM message to hypercore: %v\n", err) } } } @@ -700,8 +746,8 @@ func (p *PubSub) Close() error { if p.bzzzSub != nil { p.bzzzSub.Cancel() } - if p.antennaeSub != nil { - p.antennaeSub.Cancel() + if p.hmmmSub != nil { + p.hmmmSub.Cancel() } if p.contextSub != nil { p.contextSub.Cancel() @@ -710,8 +756,8 @@ func (p *PubSub) Close() error { if p.bzzzTopic != nil { p.bzzzTopic.Close() } - if p.antennaeTopic != nil { - p.antennaeTopic.Close() + if p.hmmmTopic != nil { + p.hmmmTopic.Close() } if p.contextTopic != nil { p.contextTopic.Close() diff --git a/scripts/test_antennae_monitoring.sh b/scripts/test_hmmm_monitoring.sh similarity index 94% rename from scripts/test_antennae_monitoring.sh rename to scripts/test_hmmm_monitoring.sh index f5c85687..442fc7ea 100755 --- a/scripts/test_antennae_monitoring.sh +++ b/scripts/test_hmmm_monitoring.sh @@ -1,15 +1,15 @@ #!/bin/bash -# Test script to monitor antennae coordination activity +# Test script to monitor HMMM coordination activity # This script monitors the existing bzzz service logs for coordination patterns LOG_DIR="/tmp/bzzz_logs" -MONITOR_LOG="$LOG_DIR/antennae_monitor_$(date +%Y%m%d_%H%M%S).log" +MONITOR_LOG="$LOG_DIR/hmmm_monitor_$(date +%Y%m%d_%H%M%S).log" # Create log directory mkdir -p "$LOG_DIR" -echo "πŸ”¬ Starting Bzzz Antennae Monitoring Test" +echo "πŸ”¬ Starting Bzzz HMMM Monitoring Test" echo "========================================" echo "Monitor Log: $MONITOR_LOG" echo "" @@ -41,8 +41,8 @@ analyze_coordination_patterns() { local task_activity=$(journalctl -u bzzz.service --since "5 minutes ago" | grep -i "task\|github\|repository" | wc -l) log_event "TASK_ACTIVITY" "Task-related log entries: $task_activity" - # Look for coordination messages (antennae activity) - local coordination_msgs=$(journalctl -u bzzz.service --since "5 minutes ago" | grep -i "antennae\|coordination\|meta" | wc -l) + # Look for coordination messages (HMMM activity) + local coordination_msgs=$(journalctl -u bzzz.service --since "5 minutes ago" | grep -i "hmmm\|coordination\|meta" | wc -l) log_event "COORDINATION" "Coordination-related messages: $coordination_msgs" # Check for error patterns @@ -149,7 +149,7 @@ EOF # Main test execution main() { - echo "Starting antennae coordination monitoring test..." + echo "Starting HMMM coordination monitoring test..." echo "" # Initial analysis of current activity @@ -178,7 +178,7 @@ main() { # Wait for live monitoring to finish wait $MONITOR_PID 2>/dev/null || true - echo "πŸ“Š ANTENNAE MONITORING TEST COMPLETE" + echo "πŸ“Š HMMM MONITORING TEST COMPLETE" echo "====================================" echo "Results saved to: $LOG_DIR/" echo "Monitor Log: $MONITOR_LOG" diff --git a/test/antennae_test.go b/test/hmmm_test.go similarity index 90% rename from test/antennae_test.go rename to test/hmmm_test.go index 13da62e6..0c0d749a 100644 --- a/test/antennae_test.go +++ b/test/hmmm_test.go @@ -9,8 +9,8 @@ import ( "github.com/anthonyrawlins/bzzz/pkg/coordination" ) -// AntennaeTestSuite runs comprehensive tests for the antennae coordination system -type AntennaeTestSuite struct { +// HmmmTestSuite runs comprehensive tests for the HMMM coordination system +type HmmmTestSuite struct { ctx context.Context pubsub *pubsub.PubSub simulator *TaskSimulator @@ -41,15 +41,15 @@ type TestMetrics struct { SuccessfulCoordinations int `json:"successful_coordinations"` } -// NewAntennaeTestSuite creates a new test suite -func NewAntennaeTestSuite(ctx context.Context, ps *pubsub.PubSub) *AntennaeTestSuite { +// NewHmmmTestSuite creates a new test suite +func NewHmmmTestSuite(ctx context.Context, ps *pubsub.PubSub) *HmmmTestSuite { simulator := NewTaskSimulator(ps, ctx) // Initialize coordination components coordinator := coordination.NewMetaCoordinator(ctx, ps) detector := coordination.NewDependencyDetector() - return &AntennaeTestSuite{ + return &HmmmTestSuite{ ctx: ctx, pubsub: ps, simulator: simulator, @@ -59,9 +59,9 @@ func NewAntennaeTestSuite(ctx context.Context, ps *pubsub.PubSub) *AntennaeTestS } } -// RunFullTestSuite executes all antennae coordination tests -func (ats *AntennaeTestSuite) RunFullTestSuite() { - fmt.Println("πŸ§ͺ Starting Antennae Coordination Test Suite") +// RunFullTestSuite executes all HMMM coordination tests +func (ats *HmmmTestSuite) RunFullTestSuite() { + fmt.Println("πŸ§ͺ Starting HMMM Coordination Test Suite") fmt.Println("=" * 50) // Start the task simulator @@ -88,7 +88,7 @@ func (ats *AntennaeTestSuite) RunFullTestSuite() { } // testBasicTaskAnnouncement tests basic task announcement and response -func (ats *AntennaeTestSuite) testBasicTaskAnnouncement() { +func (ats *HmmmTestSuite) testBasicTaskAnnouncement() { testName := "Basic Task Announcement" fmt.Printf(" πŸ“‹ %s\n", testName) @@ -133,7 +133,7 @@ func (ats *AntennaeTestSuite) testBasicTaskAnnouncement() { } // testDependencyDetection tests cross-repository dependency detection -func (ats *AntennaeTestSuite) testDependencyDetection() { +func (ats *HmmmTestSuite) testDependencyDetection() { testName := "Dependency Detection" fmt.Printf(" πŸ”— %s\n", testName) @@ -172,7 +172,7 @@ func (ats *AntennaeTestSuite) testDependencyDetection() { } // testCrossRepositoryCoordination tests coordination across multiple repositories -func (ats *AntennaeTestSuite) testCrossRepositoryCoordination() { +func (ats *HmmmTestSuite) testCrossRepositoryCoordination() { testName := "Cross-Repository Coordination" fmt.Printf(" 🌐 %s\n", testName) @@ -221,7 +221,7 @@ func (ats *AntennaeTestSuite) testCrossRepositoryCoordination() { } // testConflictResolution tests handling of conflicting task assignments -func (ats *AntennaeTestSuite) testConflictResolution() { +func (ats *HmmmTestSuite) testConflictResolution() { testName := "Conflict Resolution" fmt.Printf(" βš”οΈ %s\n", testName) @@ -266,7 +266,7 @@ func (ats *AntennaeTestSuite) testConflictResolution() { } // testEscalationScenarios tests human escalation triggers -func (ats *AntennaeTestSuite) testEscalationScenarios() { +func (ats *HmmmTestSuite) testEscalationScenarios() { testName := "Escalation Scenarios" fmt.Printf(" 🚨 %s\n", testName) @@ -303,7 +303,7 @@ func (ats *AntennaeTestSuite) testEscalationScenarios() { } // testLoadHandling tests system behavior under load -func (ats *AntennaeTestSuite) testLoadHandling() { +func (ats *HmmmTestSuite) testLoadHandling() { testName := "Load Handling" fmt.Printf(" πŸ“ˆ %s\n", testName) @@ -341,7 +341,7 @@ func (ats *AntennaeTestSuite) testLoadHandling() { } // logTestResult logs the result of a test -func (ats *AntennaeTestSuite) logTestResult(result TestResult) { +func (ats *HmmmTestSuite) logTestResult(result TestResult) { status := "❌ FAILED" if result.Success { status = "βœ… PASSED" @@ -360,9 +360,9 @@ func (ats *AntennaeTestSuite) logTestResult(result TestResult) { } // printTestSummary prints a summary of all test results -func (ats *AntennaeTestSuite) printTestSummary() { +func (ats *HmmmTestSuite) printTestSummary() { fmt.Println("\n" + "=" * 50) - fmt.Println("πŸ§ͺ Antennae Test Suite Summary") + fmt.Println("πŸ§ͺ HMMM Test Suite Summary") fmt.Println("=" * 50) passed := 0 @@ -412,7 +412,7 @@ func (ats *AntennaeTestSuite) printTestSummary() { } // GetTestResults returns all test results -func (ats *AntennaeTestSuite) GetTestResults() []TestResult { +func (ats *HmmmTestSuite) GetTestResults() []TestResult { return ats.testResults } @@ -421,4 +421,14 @@ func max(a, b int) int { return a } return b +} + +// Compatibility aliases for the old Antennae naming +// Deprecated: Use HmmmTestSuite instead +type AntennaeTestSuite = HmmmTestSuite + +// NewAntennaeTestSuite is a compatibility alias for NewHmmmTestSuite +// Deprecated: Use NewHmmmTestSuite instead +func NewAntennaeTestSuite(ctx context.Context, ps *pubsub.PubSub) *HmmmTestSuite { + return NewHmmmTestSuite(ctx, ps) } \ No newline at end of file diff --git a/test/task_simulator.go b/test/task_simulator.go index a27f48c3..3dd6dc67 100644 --- a/test/task_simulator.go +++ b/test/task_simulator.go @@ -10,7 +10,7 @@ import ( "github.com/anthonyrawlins/bzzz/pubsub" ) -// TaskSimulator generates realistic task scenarios for testing antennae coordination +// TaskSimulator generates realistic task scenarios for testing HMMM coordination type TaskSimulator struct { pubsub *pubsub.PubSub ctx context.Context @@ -48,7 +48,7 @@ type TaskDependency struct { DependencyType string `json:"dependency_type"` // api_contract, database_schema, config, security } -// CoordinationScenario represents a test scenario for antennae coordination +// CoordinationScenario represents a test scenario for HMMM coordination type CoordinationScenario struct { Name string `json:"name"` Description string `json:"description"` @@ -83,7 +83,7 @@ func (ts *TaskSimulator) Start() { } ts.isRunning = true - fmt.Println("🎭 Starting Task Simulator for Antennae Testing") + fmt.Println("🎭 Starting Task Simulator for HMMM Testing") // Start different simulation routines go ts.simulateTaskAnnouncements() @@ -177,7 +177,7 @@ func (ts *TaskSimulator) runCoordinationScenario(scenario CoordinationScenario) "started_at": time.Now().Unix(), } - if err := ts.pubsub.PublishAntennaeMessage(pubsub.CoordinationRequest, scenarioStart); err != nil { + if err := ts.pubsub.PublishHmmmMessage(pubsub.CoordinationRequest, scenarioStart); err != nil { fmt.Printf("❌ Failed to announce scenario start: %v\n", err) return } @@ -245,7 +245,7 @@ func (ts *TaskSimulator) simulateAgentResponse(response string) { fmt.Printf("πŸ€– Simulated agent response: %s\n", response) - if err := ts.pubsub.PublishAntennaeMessage(pubsub.MetaDiscussion, agentResponse); err != nil { + if err := ts.pubsub.PublishHmmmMessage(pubsub.MetaDiscussion, agentResponse); err != nil { fmt.Printf("❌ Failed to publish agent response: %v\n", err) } }