335 lines
14 KiB
Python
335 lines
14 KiB
Python
"""
|
|
HCFS SDK Data Models
|
|
|
|
Pydantic models for SDK operations and configuration.
|
|
"""
|
|
|
|
from typing import Optional, List, Dict, Any, Union, Callable
|
|
from datetime import datetime
|
|
from enum import Enum
|
|
from pydantic import BaseModel, Field, validator
|
|
from dataclasses import dataclass
|
|
|
|
|
|
class ContextStatus(str, Enum):
|
|
"""Context status enumeration."""
|
|
ACTIVE = "active"
|
|
ARCHIVED = "archived"
|
|
DELETED = "deleted"
|
|
DRAFT = "draft"
|
|
|
|
|
|
class SearchType(str, Enum):
|
|
"""Search type enumeration."""
|
|
SEMANTIC = "semantic"
|
|
KEYWORD = "keyword"
|
|
HYBRID = "hybrid"
|
|
FUZZY = "fuzzy"
|
|
|
|
|
|
class CacheStrategy(str, Enum):
|
|
"""Cache strategy enumeration."""
|
|
LRU = "lru"
|
|
LFU = "lfu"
|
|
TTL = "ttl"
|
|
FIFO = "fifo"
|
|
|
|
|
|
class RetryStrategy(str, Enum):
|
|
"""Retry strategy enumeration."""
|
|
EXPONENTIAL_BACKOFF = "exponential_backoff"
|
|
LINEAR_BACKOFF = "linear_backoff"
|
|
CONSTANT_DELAY = "constant_delay"
|
|
FIBONACCI = "fibonacci"
|
|
|
|
|
|
class Context(BaseModel):
|
|
"""Context data model for SDK operations."""
|
|
|
|
id: Optional[int] = None
|
|
path: str = Field(..., description="Unique context path")
|
|
content: str = Field(..., description="Context content")
|
|
summary: Optional[str] = Field(None, description="Brief summary")
|
|
author: Optional[str] = Field(None, description="Context author")
|
|
tags: List[str] = Field(default_factory=list, description="Context tags")
|
|
metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
|
|
status: ContextStatus = Field(default=ContextStatus.ACTIVE, description="Context status")
|
|
created_at: Optional[datetime] = None
|
|
updated_at: Optional[datetime] = None
|
|
version: int = Field(default=1, description="Context version")
|
|
similarity_score: Optional[float] = Field(None, description="Similarity score (for search results)")
|
|
|
|
@validator('path')
|
|
def validate_path(cls, v):
|
|
if not v or not v.startswith('/'):
|
|
raise ValueError('Path must start with /')
|
|
return v
|
|
|
|
@validator('content')
|
|
def validate_content(cls, v):
|
|
if not v or len(v.strip()) == 0:
|
|
raise ValueError('Content cannot be empty')
|
|
return v
|
|
|
|
def to_create_dict(self) -> Dict[str, Any]:
|
|
"""Convert to dictionary for context creation."""
|
|
return {
|
|
"path": self.path,
|
|
"content": self.content,
|
|
"summary": self.summary,
|
|
"author": self.author,
|
|
"tags": self.tags,
|
|
"metadata": self.metadata
|
|
}
|
|
|
|
def to_update_dict(self) -> Dict[str, Any]:
|
|
"""Convert to dictionary for context updates (excluding read-only fields)."""
|
|
return {
|
|
k: v for k, v in {
|
|
"content": self.content,
|
|
"summary": self.summary,
|
|
"tags": self.tags,
|
|
"metadata": self.metadata,
|
|
"status": self.status.value
|
|
}.items() if v is not None
|
|
}
|
|
|
|
|
|
class SearchResult(BaseModel):
|
|
"""Search result model."""
|
|
|
|
context: Context
|
|
score: float = Field(..., description="Relevance score")
|
|
explanation: Optional[str] = Field(None, description="Search result explanation")
|
|
highlights: List[str] = Field(default_factory=list, description="Highlighted text snippets")
|
|
|
|
def __lt__(self, other):
|
|
"""Enable sorting by score."""
|
|
return self.score < other.score
|
|
|
|
def __gt__(self, other):
|
|
"""Enable sorting by score."""
|
|
return self.score > other.score
|
|
|
|
|
|
class ContextFilter(BaseModel):
|
|
"""Context filtering options."""
|
|
|
|
path_prefix: Optional[str] = Field(None, description="Filter by path prefix")
|
|
author: Optional[str] = Field(None, description="Filter by author")
|
|
status: Optional[ContextStatus] = Field(None, description="Filter by status")
|
|
tags: Optional[List[str]] = Field(None, description="Filter by tags")
|
|
created_after: Optional[datetime] = Field(None, description="Filter by creation date")
|
|
created_before: Optional[datetime] = Field(None, description="Filter by creation date")
|
|
content_contains: Optional[str] = Field(None, description="Filter by content substring")
|
|
min_content_length: Optional[int] = Field(None, description="Minimum content length")
|
|
max_content_length: Optional[int] = Field(None, description="Maximum content length")
|
|
|
|
def to_query_params(self) -> Dict[str, Any]:
|
|
"""Convert to query parameters for API requests."""
|
|
params = {}
|
|
|
|
if self.path_prefix:
|
|
params["path_prefix"] = self.path_prefix
|
|
if self.author:
|
|
params["author"] = self.author
|
|
if self.status:
|
|
params["status"] = self.status.value
|
|
if self.created_after:
|
|
params["created_after"] = self.created_after.isoformat()
|
|
if self.created_before:
|
|
params["created_before"] = self.created_before.isoformat()
|
|
if self.content_contains:
|
|
params["content_contains"] = self.content_contains
|
|
if self.min_content_length is not None:
|
|
params["min_content_length"] = self.min_content_length
|
|
if self.max_content_length is not None:
|
|
params["max_content_length"] = self.max_content_length
|
|
|
|
return params
|
|
|
|
|
|
class PaginationOptions(BaseModel):
|
|
"""Pagination configuration."""
|
|
|
|
page: int = Field(default=1, ge=1, description="Page number")
|
|
page_size: int = Field(default=20, ge=1, le=1000, description="Items per page")
|
|
sort_by: Optional[str] = Field(None, description="Sort field")
|
|
sort_order: str = Field(default="desc", description="Sort order (asc/desc)")
|
|
|
|
@validator('sort_order')
|
|
def validate_sort_order(cls, v):
|
|
if v not in ['asc', 'desc']:
|
|
raise ValueError('Sort order must be "asc" or "desc"')
|
|
return v
|
|
|
|
@property
|
|
def offset(self) -> int:
|
|
"""Calculate offset for database queries."""
|
|
return (self.page - 1) * self.page_size
|
|
|
|
def to_query_params(self) -> Dict[str, Any]:
|
|
"""Convert to query parameters."""
|
|
params = {
|
|
"page": self.page,
|
|
"page_size": self.page_size,
|
|
"sort_order": self.sort_order
|
|
}
|
|
if self.sort_by:
|
|
params["sort_by"] = self.sort_by
|
|
return params
|
|
|
|
|
|
class SearchOptions(BaseModel):
|
|
"""Search configuration options."""
|
|
|
|
search_type: SearchType = Field(default=SearchType.SEMANTIC, description="Type of search")
|
|
top_k: int = Field(default=10, ge=1, le=1000, description="Maximum results to return")
|
|
similarity_threshold: float = Field(default=0.0, ge=0.0, le=1.0, description="Minimum similarity score")
|
|
path_prefix: Optional[str] = Field(None, description="Search within path prefix")
|
|
semantic_weight: float = Field(default=0.7, ge=0.0, le=1.0, description="Weight for semantic search in hybrid mode")
|
|
include_content: bool = Field(default=True, description="Include full content in results")
|
|
include_highlights: bool = Field(default=True, description="Include text highlights")
|
|
max_highlights: int = Field(default=3, ge=0, le=10, description="Maximum highlight snippets")
|
|
|
|
def to_request_dict(self) -> Dict[str, Any]:
|
|
"""Convert to API request dictionary."""
|
|
return {
|
|
"search_type": self.search_type.value,
|
|
"top_k": self.top_k,
|
|
"similarity_threshold": self.similarity_threshold,
|
|
"path_prefix": self.path_prefix,
|
|
"semantic_weight": self.semantic_weight,
|
|
"include_content": self.include_content,
|
|
"include_highlights": self.include_highlights
|
|
}
|
|
|
|
|
|
class CacheConfig(BaseModel):
|
|
"""Cache configuration."""
|
|
|
|
enabled: bool = Field(default=True, description="Enable caching")
|
|
strategy: CacheStrategy = Field(default=CacheStrategy.LRU, description="Cache eviction strategy")
|
|
max_size: int = Field(default=1000, ge=1, description="Maximum cache entries")
|
|
ttl_seconds: Optional[int] = Field(default=3600, ge=1, description="Time-to-live in seconds")
|
|
memory_limit_mb: Optional[int] = Field(default=100, ge=1, description="Memory limit in MB")
|
|
persist_to_disk: bool = Field(default=False, description="Persist cache to disk")
|
|
disk_cache_path: Optional[str] = Field(None, description="Disk cache directory")
|
|
|
|
@validator('ttl_seconds')
|
|
def validate_ttl(cls, v, values):
|
|
if values.get('strategy') == CacheStrategy.TTL and v is None:
|
|
raise ValueError('TTL must be specified for TTL cache strategy')
|
|
return v
|
|
|
|
|
|
class RetryConfig(BaseModel):
|
|
"""Retry configuration for failed requests."""
|
|
|
|
enabled: bool = Field(default=True, description="Enable retry logic")
|
|
max_attempts: int = Field(default=3, ge=1, le=10, description="Maximum retry attempts")
|
|
strategy: RetryStrategy = Field(default=RetryStrategy.EXPONENTIAL_BACKOFF, description="Retry strategy")
|
|
base_delay: float = Field(default=1.0, ge=0.1, description="Base delay in seconds")
|
|
max_delay: float = Field(default=60.0, ge=1.0, description="Maximum delay in seconds")
|
|
backoff_multiplier: float = Field(default=2.0, ge=1.0, description="Backoff multiplier")
|
|
jitter: bool = Field(default=True, description="Add random jitter to delays")
|
|
retry_on_status: List[int] = Field(
|
|
default_factory=lambda: [429, 500, 502, 503, 504],
|
|
description="HTTP status codes to retry on"
|
|
)
|
|
retry_on_timeout: bool = Field(default=True, description="Retry on timeout errors")
|
|
|
|
|
|
class WebSocketConfig(BaseModel):
|
|
"""WebSocket connection configuration."""
|
|
|
|
auto_reconnect: bool = Field(default=True, description="Automatically reconnect on disconnect")
|
|
reconnect_interval: float = Field(default=5.0, ge=1.0, description="Reconnect interval in seconds")
|
|
max_reconnect_attempts: int = Field(default=10, ge=1, description="Maximum reconnection attempts")
|
|
ping_interval: float = Field(default=30.0, ge=1.0, description="Ping interval in seconds")
|
|
ping_timeout: float = Field(default=10.0, ge=1.0, description="Ping timeout in seconds")
|
|
message_queue_size: int = Field(default=1000, ge=1, description="Maximum queued messages")
|
|
|
|
|
|
class ClientConfig(BaseModel):
|
|
"""Main client configuration."""
|
|
|
|
base_url: str = Field(..., description="HCFS API base URL")
|
|
api_key: Optional[str] = Field(None, description="API key for authentication")
|
|
jwt_token: Optional[str] = Field(None, description="JWT token for authentication")
|
|
timeout: float = Field(default=30.0, ge=1.0, description="Request timeout in seconds")
|
|
user_agent: str = Field(default="HCFS-SDK/2.0.0", description="User agent string")
|
|
|
|
# Advanced configurations
|
|
cache: CacheConfig = Field(default_factory=CacheConfig)
|
|
retry: RetryConfig = Field(default_factory=RetryConfig)
|
|
websocket: WebSocketConfig = Field(default_factory=WebSocketConfig)
|
|
|
|
# Connection pooling
|
|
max_connections: int = Field(default=100, ge=1, description="Maximum connection pool size")
|
|
max_keepalive_connections: int = Field(default=20, ge=1, description="Maximum keep-alive connections")
|
|
|
|
@validator('base_url')
|
|
def validate_base_url(cls, v):
|
|
if not v.startswith(('http://', 'https://')):
|
|
raise ValueError('Base URL must start with http:// or https://')
|
|
return v.rstrip('/')
|
|
|
|
|
|
@dataclass
|
|
class BatchResult:
|
|
"""Result of a batch operation."""
|
|
|
|
success_count: int
|
|
error_count: int
|
|
total_items: int
|
|
successful_items: List[Any]
|
|
failed_items: List[Dict[str, Any]]
|
|
execution_time: float
|
|
|
|
@property
|
|
def success_rate(self) -> float:
|
|
"""Calculate success rate."""
|
|
return self.success_count / self.total_items if self.total_items > 0 else 0.0
|
|
|
|
@property
|
|
def has_errors(self) -> bool:
|
|
"""Check if there were any errors."""
|
|
return self.error_count > 0
|
|
|
|
|
|
class StreamEvent(BaseModel):
|
|
"""WebSocket stream event."""
|
|
|
|
event_type: str = Field(..., description="Event type (created/updated/deleted)")
|
|
data: Dict[str, Any] = Field(..., description="Event data")
|
|
timestamp: datetime = Field(default_factory=datetime.utcnow, description="Event timestamp")
|
|
context_id: Optional[int] = Field(None, description="Related context ID")
|
|
path: Optional[str] = Field(None, description="Related context path")
|
|
|
|
def is_context_event(self) -> bool:
|
|
"""Check if this is a context-related event."""
|
|
return self.event_type in ['context_created', 'context_updated', 'context_deleted']
|
|
|
|
|
|
class AnalyticsData(BaseModel):
|
|
"""Analytics and usage data."""
|
|
|
|
operation_count: Dict[str, int] = Field(default_factory=dict, description="Operation counts")
|
|
cache_stats: Dict[str, Any] = Field(default_factory=dict, description="Cache statistics")
|
|
error_stats: Dict[str, int] = Field(default_factory=dict, description="Error statistics")
|
|
performance_stats: Dict[str, float] = Field(default_factory=dict, description="Performance metrics")
|
|
session_start: datetime = Field(default_factory=datetime.utcnow, description="Session start time")
|
|
|
|
def get_cache_hit_rate(self) -> float:
|
|
"""Calculate cache hit rate."""
|
|
hits = self.cache_stats.get('hits', 0)
|
|
misses = self.cache_stats.get('misses', 0)
|
|
total = hits + misses
|
|
return hits / total if total > 0 else 0.0
|
|
|
|
def get_error_rate(self) -> float:
|
|
"""Calculate overall error rate."""
|
|
total_operations = sum(self.operation_count.values())
|
|
total_errors = sum(self.error_stats.values())
|
|
return total_errors / total_operations if total_operations > 0 else 0.0 |