Files
HCFS/hcfs-python/hcfs/sdk/models.py
2025-07-30 09:34:16 +10:00

335 lines
14 KiB
Python

"""
HCFS SDK Data Models
Pydantic models for SDK operations and configuration.
"""
from typing import Optional, List, Dict, Any, Union, Callable
from datetime import datetime
from enum import Enum
from pydantic import BaseModel, Field, validator
from dataclasses import dataclass
class ContextStatus(str, Enum):
"""Context status enumeration."""
ACTIVE = "active"
ARCHIVED = "archived"
DELETED = "deleted"
DRAFT = "draft"
class SearchType(str, Enum):
"""Search type enumeration."""
SEMANTIC = "semantic"
KEYWORD = "keyword"
HYBRID = "hybrid"
FUZZY = "fuzzy"
class CacheStrategy(str, Enum):
"""Cache strategy enumeration."""
LRU = "lru"
LFU = "lfu"
TTL = "ttl"
FIFO = "fifo"
class RetryStrategy(str, Enum):
"""Retry strategy enumeration."""
EXPONENTIAL_BACKOFF = "exponential_backoff"
LINEAR_BACKOFF = "linear_backoff"
CONSTANT_DELAY = "constant_delay"
FIBONACCI = "fibonacci"
class Context(BaseModel):
"""Context data model for SDK operations."""
id: Optional[int] = None
path: str = Field(..., description="Unique context path")
content: str = Field(..., description="Context content")
summary: Optional[str] = Field(None, description="Brief summary")
author: Optional[str] = Field(None, description="Context author")
tags: List[str] = Field(default_factory=list, description="Context tags")
metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
status: ContextStatus = Field(default=ContextStatus.ACTIVE, description="Context status")
created_at: Optional[datetime] = None
updated_at: Optional[datetime] = None
version: int = Field(default=1, description="Context version")
similarity_score: Optional[float] = Field(None, description="Similarity score (for search results)")
@validator('path')
def validate_path(cls, v):
if not v or not v.startswith('/'):
raise ValueError('Path must start with /')
return v
@validator('content')
def validate_content(cls, v):
if not v or len(v.strip()) == 0:
raise ValueError('Content cannot be empty')
return v
def to_create_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for context creation."""
return {
"path": self.path,
"content": self.content,
"summary": self.summary,
"author": self.author,
"tags": self.tags,
"metadata": self.metadata
}
def to_update_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for context updates (excluding read-only fields)."""
return {
k: v for k, v in {
"content": self.content,
"summary": self.summary,
"tags": self.tags,
"metadata": self.metadata,
"status": self.status.value
}.items() if v is not None
}
class SearchResult(BaseModel):
"""Search result model."""
context: Context
score: float = Field(..., description="Relevance score")
explanation: Optional[str] = Field(None, description="Search result explanation")
highlights: List[str] = Field(default_factory=list, description="Highlighted text snippets")
def __lt__(self, other):
"""Enable sorting by score."""
return self.score < other.score
def __gt__(self, other):
"""Enable sorting by score."""
return self.score > other.score
class ContextFilter(BaseModel):
"""Context filtering options."""
path_prefix: Optional[str] = Field(None, description="Filter by path prefix")
author: Optional[str] = Field(None, description="Filter by author")
status: Optional[ContextStatus] = Field(None, description="Filter by status")
tags: Optional[List[str]] = Field(None, description="Filter by tags")
created_after: Optional[datetime] = Field(None, description="Filter by creation date")
created_before: Optional[datetime] = Field(None, description="Filter by creation date")
content_contains: Optional[str] = Field(None, description="Filter by content substring")
min_content_length: Optional[int] = Field(None, description="Minimum content length")
max_content_length: Optional[int] = Field(None, description="Maximum content length")
def to_query_params(self) -> Dict[str, Any]:
"""Convert to query parameters for API requests."""
params = {}
if self.path_prefix:
params["path_prefix"] = self.path_prefix
if self.author:
params["author"] = self.author
if self.status:
params["status"] = self.status.value
if self.created_after:
params["created_after"] = self.created_after.isoformat()
if self.created_before:
params["created_before"] = self.created_before.isoformat()
if self.content_contains:
params["content_contains"] = self.content_contains
if self.min_content_length is not None:
params["min_content_length"] = self.min_content_length
if self.max_content_length is not None:
params["max_content_length"] = self.max_content_length
return params
class PaginationOptions(BaseModel):
"""Pagination configuration."""
page: int = Field(default=1, ge=1, description="Page number")
page_size: int = Field(default=20, ge=1, le=1000, description="Items per page")
sort_by: Optional[str] = Field(None, description="Sort field")
sort_order: str = Field(default="desc", description="Sort order (asc/desc)")
@validator('sort_order')
def validate_sort_order(cls, v):
if v not in ['asc', 'desc']:
raise ValueError('Sort order must be "asc" or "desc"')
return v
@property
def offset(self) -> int:
"""Calculate offset for database queries."""
return (self.page - 1) * self.page_size
def to_query_params(self) -> Dict[str, Any]:
"""Convert to query parameters."""
params = {
"page": self.page,
"page_size": self.page_size,
"sort_order": self.sort_order
}
if self.sort_by:
params["sort_by"] = self.sort_by
return params
class SearchOptions(BaseModel):
"""Search configuration options."""
search_type: SearchType = Field(default=SearchType.SEMANTIC, description="Type of search")
top_k: int = Field(default=10, ge=1, le=1000, description="Maximum results to return")
similarity_threshold: float = Field(default=0.0, ge=0.0, le=1.0, description="Minimum similarity score")
path_prefix: Optional[str] = Field(None, description="Search within path prefix")
semantic_weight: float = Field(default=0.7, ge=0.0, le=1.0, description="Weight for semantic search in hybrid mode")
include_content: bool = Field(default=True, description="Include full content in results")
include_highlights: bool = Field(default=True, description="Include text highlights")
max_highlights: int = Field(default=3, ge=0, le=10, description="Maximum highlight snippets")
def to_request_dict(self) -> Dict[str, Any]:
"""Convert to API request dictionary."""
return {
"search_type": self.search_type.value,
"top_k": self.top_k,
"similarity_threshold": self.similarity_threshold,
"path_prefix": self.path_prefix,
"semantic_weight": self.semantic_weight,
"include_content": self.include_content,
"include_highlights": self.include_highlights
}
class CacheConfig(BaseModel):
"""Cache configuration."""
enabled: bool = Field(default=True, description="Enable caching")
strategy: CacheStrategy = Field(default=CacheStrategy.LRU, description="Cache eviction strategy")
max_size: int = Field(default=1000, ge=1, description="Maximum cache entries")
ttl_seconds: Optional[int] = Field(default=3600, ge=1, description="Time-to-live in seconds")
memory_limit_mb: Optional[int] = Field(default=100, ge=1, description="Memory limit in MB")
persist_to_disk: bool = Field(default=False, description="Persist cache to disk")
disk_cache_path: Optional[str] = Field(None, description="Disk cache directory")
@validator('ttl_seconds')
def validate_ttl(cls, v, values):
if values.get('strategy') == CacheStrategy.TTL and v is None:
raise ValueError('TTL must be specified for TTL cache strategy')
return v
class RetryConfig(BaseModel):
"""Retry configuration for failed requests."""
enabled: bool = Field(default=True, description="Enable retry logic")
max_attempts: int = Field(default=3, ge=1, le=10, description="Maximum retry attempts")
strategy: RetryStrategy = Field(default=RetryStrategy.EXPONENTIAL_BACKOFF, description="Retry strategy")
base_delay: float = Field(default=1.0, ge=0.1, description="Base delay in seconds")
max_delay: float = Field(default=60.0, ge=1.0, description="Maximum delay in seconds")
backoff_multiplier: float = Field(default=2.0, ge=1.0, description="Backoff multiplier")
jitter: bool = Field(default=True, description="Add random jitter to delays")
retry_on_status: List[int] = Field(
default_factory=lambda: [429, 500, 502, 503, 504],
description="HTTP status codes to retry on"
)
retry_on_timeout: bool = Field(default=True, description="Retry on timeout errors")
class WebSocketConfig(BaseModel):
"""WebSocket connection configuration."""
auto_reconnect: bool = Field(default=True, description="Automatically reconnect on disconnect")
reconnect_interval: float = Field(default=5.0, ge=1.0, description="Reconnect interval in seconds")
max_reconnect_attempts: int = Field(default=10, ge=1, description="Maximum reconnection attempts")
ping_interval: float = Field(default=30.0, ge=1.0, description="Ping interval in seconds")
ping_timeout: float = Field(default=10.0, ge=1.0, description="Ping timeout in seconds")
message_queue_size: int = Field(default=1000, ge=1, description="Maximum queued messages")
class ClientConfig(BaseModel):
"""Main client configuration."""
base_url: str = Field(..., description="HCFS API base URL")
api_key: Optional[str] = Field(None, description="API key for authentication")
jwt_token: Optional[str] = Field(None, description="JWT token for authentication")
timeout: float = Field(default=30.0, ge=1.0, description="Request timeout in seconds")
user_agent: str = Field(default="HCFS-SDK/2.0.0", description="User agent string")
# Advanced configurations
cache: CacheConfig = Field(default_factory=CacheConfig)
retry: RetryConfig = Field(default_factory=RetryConfig)
websocket: WebSocketConfig = Field(default_factory=WebSocketConfig)
# Connection pooling
max_connections: int = Field(default=100, ge=1, description="Maximum connection pool size")
max_keepalive_connections: int = Field(default=20, ge=1, description="Maximum keep-alive connections")
@validator('base_url')
def validate_base_url(cls, v):
if not v.startswith(('http://', 'https://')):
raise ValueError('Base URL must start with http:// or https://')
return v.rstrip('/')
@dataclass
class BatchResult:
"""Result of a batch operation."""
success_count: int
error_count: int
total_items: int
successful_items: List[Any]
failed_items: List[Dict[str, Any]]
execution_time: float
@property
def success_rate(self) -> float:
"""Calculate success rate."""
return self.success_count / self.total_items if self.total_items > 0 else 0.0
@property
def has_errors(self) -> bool:
"""Check if there were any errors."""
return self.error_count > 0
class StreamEvent(BaseModel):
"""WebSocket stream event."""
event_type: str = Field(..., description="Event type (created/updated/deleted)")
data: Dict[str, Any] = Field(..., description="Event data")
timestamp: datetime = Field(default_factory=datetime.utcnow, description="Event timestamp")
context_id: Optional[int] = Field(None, description="Related context ID")
path: Optional[str] = Field(None, description="Related context path")
def is_context_event(self) -> bool:
"""Check if this is a context-related event."""
return self.event_type in ['context_created', 'context_updated', 'context_deleted']
class AnalyticsData(BaseModel):
"""Analytics and usage data."""
operation_count: Dict[str, int] = Field(default_factory=dict, description="Operation counts")
cache_stats: Dict[str, Any] = Field(default_factory=dict, description="Cache statistics")
error_stats: Dict[str, int] = Field(default_factory=dict, description="Error statistics")
performance_stats: Dict[str, float] = Field(default_factory=dict, description="Performance metrics")
session_start: datetime = Field(default_factory=datetime.utcnow, description="Session start time")
def get_cache_hit_rate(self) -> float:
"""Calculate cache hit rate."""
hits = self.cache_stats.get('hits', 0)
misses = self.cache_stats.get('misses', 0)
total = hits + misses
return hits / total if total > 0 else 0.0
def get_error_rate(self) -> float:
"""Calculate overall error rate."""
total_operations = sum(self.operation_count.values())
total_errors = sum(self.error_stats.values())
return total_errors / total_operations if total_operations > 0 else 0.0