630 lines
24 KiB
Python
630 lines
24 KiB
Python
"""
|
|
Integration test suite for HCFS components.
|
|
|
|
Tests covering:
|
|
- Full system integration
|
|
- End-to-end workflows
|
|
- Cross-component functionality
|
|
- Performance under load
|
|
- Real-world usage scenarios
|
|
"""
|
|
|
|
import pytest
|
|
import tempfile
|
|
import shutil
|
|
import time
|
|
import asyncio
|
|
from pathlib import Path
|
|
import concurrent.futures
|
|
import threading
|
|
|
|
import sys
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from hcfs.core.context_db import Context
|
|
from hcfs.core.context_db_optimized_fixed import OptimizedContextDatabase
|
|
from hcfs.core.embeddings_optimized import OptimizedEmbeddingManager
|
|
from hcfs.core.context_versioning import VersioningSystem
|
|
from hcfs.core.context_db_trio import TrioContextDatabase
|
|
|
|
|
|
class TestFullSystemIntegration:
|
|
"""Test full HCFS system integration."""
|
|
|
|
@pytest.fixture
|
|
def integrated_system(self):
|
|
"""Create fully integrated HCFS system."""
|
|
temp_dir = Path(tempfile.mkdtemp())
|
|
db_path = temp_dir / "integration_test.db"
|
|
vector_db_path = temp_dir / "integration_vectors.db"
|
|
|
|
# Initialize all components
|
|
context_db = OptimizedContextDatabase(str(db_path))
|
|
embedding_manager = OptimizedEmbeddingManager(
|
|
context_db,
|
|
model_name="mini",
|
|
vector_db_path=str(vector_db_path),
|
|
cache_size=200
|
|
)
|
|
versioning_system = VersioningSystem(str(db_path))
|
|
|
|
yield context_db, embedding_manager, versioning_system
|
|
shutil.rmtree(temp_dir)
|
|
|
|
def test_complete_context_lifecycle(self, integrated_system):
|
|
"""Test complete context lifecycle with all features."""
|
|
context_db, embedding_manager, versioning_system = integrated_system
|
|
|
|
# 1. Create initial context
|
|
context = Context(
|
|
None, "/project/hcfs",
|
|
"HCFS is a context-aware hierarchical filesystem for AI agents",
|
|
"HCFS project description",
|
|
"developer", 1
|
|
)
|
|
|
|
context_id = context_db.store_context(context)
|
|
assert context_id is not None
|
|
|
|
# 2. Generate and store embedding
|
|
embedding = embedding_manager.generate_embedding(context.content)
|
|
embedding_manager.store_embedding(context_id, embedding)
|
|
|
|
# 3. Create version
|
|
initial_version = versioning_system.create_version(
|
|
context_id, "developer", "Initial project description"
|
|
)
|
|
assert initial_version is not None
|
|
|
|
# 4. Update context
|
|
updated_content = "HCFS is an advanced context-aware hierarchical filesystem with ML-powered semantic search"
|
|
context_db.update_context(context_id, content=updated_content)
|
|
|
|
# 5. Update embedding
|
|
new_embedding = embedding_manager.generate_embedding(updated_content)
|
|
embedding_manager.store_embedding(context_id, new_embedding)
|
|
|
|
# 6. Create new version
|
|
updated_version = versioning_system.create_version(
|
|
context_id, "developer", "Added ML and semantic search details"
|
|
)
|
|
|
|
# 7. Test search functionality
|
|
search_results = embedding_manager.semantic_search_optimized(
|
|
"machine learning filesystem", top_k=5, include_contexts=True
|
|
)
|
|
|
|
assert len(search_results) > 0
|
|
found_context = any(result.context_id == context_id for result in search_results)
|
|
assert found_context, "Should find the updated context in search results"
|
|
|
|
# 8. Test version history
|
|
history = versioning_system.get_version_history(context_id)
|
|
assert len(history) == 2
|
|
assert history[0].message == "Added ML and semantic search details"
|
|
assert history[1].message == "Initial project description"
|
|
|
|
# 9. Test rollback
|
|
rollback_version = versioning_system.rollback_to_version(
|
|
context_id, initial_version.version_number, "developer", "Testing rollback"
|
|
)
|
|
|
|
# Verify rollback worked
|
|
current_context = context_db.get_context(context_id)
|
|
assert "HCFS is a context-aware hierarchical filesystem for AI agents" in current_context.content
|
|
|
|
def test_hierarchical_context_inheritance(self, integrated_system):
|
|
"""Test hierarchical context relationships."""
|
|
context_db, embedding_manager, _ = integrated_system
|
|
|
|
# Create hierarchical contexts
|
|
contexts = [
|
|
Context(None, "/", "Root directory context", "Root summary", "user", 1),
|
|
Context(None, "/projects", "Projects directory", "Projects summary", "user", 1),
|
|
Context(None, "/projects/hcfs", "HCFS project", "HCFS summary", "user", 1),
|
|
Context(None, "/projects/hcfs/core", "HCFS core modules", "Core summary", "user", 1),
|
|
]
|
|
|
|
context_ids = []
|
|
for context in contexts:
|
|
context_id = context_db.store_context(context)
|
|
context_ids.append(context_id)
|
|
|
|
# Build embeddings for all contexts
|
|
embedding_manager.build_embeddings_index()
|
|
|
|
# Test hierarchical search
|
|
results = embedding_manager.semantic_search_optimized(
|
|
"HCFS development", path_prefix="/projects", top_k=10, include_contexts=True
|
|
)
|
|
|
|
# Should find HCFS-related contexts under /projects
|
|
assert len(results) >= 2
|
|
hcfs_results = [r for r in results if "hcfs" in r.context.path.lower()]
|
|
assert len(hcfs_results) >= 2
|
|
|
|
def test_multi_user_collaboration(self, integrated_system):
|
|
"""Test multi-user collaboration features."""
|
|
context_db, embedding_manager, versioning_system = integrated_system
|
|
|
|
# Create shared context
|
|
shared_context = Context(
|
|
None, "/shared/document",
|
|
"Shared collaborative document",
|
|
"Team collaboration",
|
|
"user1", 1
|
|
)
|
|
|
|
context_id = context_db.store_context(shared_context)
|
|
|
|
# User 1 creates initial version
|
|
v1 = versioning_system.create_version(context_id, "user1", "Initial draft")
|
|
|
|
# User 2 makes changes
|
|
context_db.update_context(
|
|
context_id,
|
|
content="Shared collaborative document with user2 contributions",
|
|
author="user2"
|
|
)
|
|
v2 = versioning_system.create_version(context_id, "user2", "Added contributions")
|
|
|
|
# User 3 makes changes
|
|
context_db.update_context(
|
|
context_id,
|
|
content="Shared collaborative document with user2 and user3 contributions",
|
|
author="user3"
|
|
)
|
|
v3 = versioning_system.create_version(context_id, "user3", "Final review")
|
|
|
|
# Test version history shows all contributors
|
|
history = versioning_system.get_version_history(context_id)
|
|
authors = {version.author for version in history}
|
|
assert authors == {"user1", "user2", "user3"}
|
|
|
|
# Test rollback to previous version
|
|
rollback = versioning_system.rollback_to_version(
|
|
context_id, v2.version_number, "user1", "Reverting to user2 version"
|
|
)
|
|
|
|
current = context_db.get_context(context_id)
|
|
assert "user2 contributions" in current.content
|
|
assert "user3 contributions" not in current.content
|
|
|
|
|
|
class TestPerformanceIntegration:
|
|
"""Test system performance under integrated load."""
|
|
|
|
@pytest.fixture
|
|
def performance_system(self):
|
|
"""Create system for performance testing."""
|
|
temp_dir = Path(tempfile.mkdtemp())
|
|
db_path = temp_dir / "performance_test.db"
|
|
vector_db_path = temp_dir / "performance_vectors.db"
|
|
|
|
context_db = OptimizedContextDatabase(str(db_path), cache_size=500)
|
|
embedding_manager = OptimizedEmbeddingManager(
|
|
context_db,
|
|
model_name="mini",
|
|
vector_db_path=str(vector_db_path),
|
|
cache_size=300,
|
|
batch_size=16
|
|
)
|
|
versioning_system = VersioningSystem(str(db_path))
|
|
|
|
yield context_db, embedding_manager, versioning_system
|
|
shutil.rmtree(temp_dir)
|
|
|
|
def test_large_scale_context_management(self, performance_system):
|
|
"""Test managing large numbers of contexts."""
|
|
context_db, embedding_manager, versioning_system = performance_system
|
|
|
|
# Create large number of contexts
|
|
num_contexts = 100
|
|
contexts = []
|
|
|
|
start_time = time.time()
|
|
for i in range(num_contexts):
|
|
context = Context(
|
|
None, f"/large_scale/context_{i}",
|
|
f"Large scale test context {i} with detailed content about topic {i % 10}",
|
|
f"Summary for context {i}",
|
|
f"user_{i % 5}", 1
|
|
)
|
|
contexts.append(context)
|
|
|
|
# Batch store contexts
|
|
context_ids = context_db.store_contexts_batch(contexts)
|
|
storage_time = time.time() - start_time
|
|
|
|
assert len(context_ids) == num_contexts
|
|
print(f"Stored {num_contexts} contexts in {storage_time:.3f}s ({num_contexts/storage_time:.1f} contexts/sec)")
|
|
|
|
# Build embeddings index
|
|
start_time = time.time()
|
|
index_stats = embedding_manager.build_embeddings_index(batch_size=20)
|
|
index_time = time.time() - start_time
|
|
|
|
assert index_stats["total_processed"] == num_contexts
|
|
print(f"Built embeddings for {num_contexts} contexts in {index_time:.3f}s")
|
|
|
|
# Test search performance
|
|
search_queries = [
|
|
"detailed content about topic",
|
|
"large scale test",
|
|
"context management",
|
|
"topic 5 information",
|
|
"user collaboration"
|
|
]
|
|
|
|
total_search_time = 0
|
|
for query in search_queries:
|
|
start_time = time.time()
|
|
results = embedding_manager.semantic_search_optimized(query, top_k=10)
|
|
search_time = time.time() - start_time
|
|
total_search_time += search_time
|
|
|
|
assert len(results) > 0
|
|
|
|
avg_search_time = total_search_time / len(search_queries)
|
|
print(f"Average search time: {avg_search_time:.4f}s")
|
|
assert avg_search_time < 0.1 # Should be under 100ms
|
|
|
|
def test_concurrent_system_load(self, performance_system):
|
|
"""Test system under concurrent load."""
|
|
context_db, embedding_manager, versioning_system = performance_system
|
|
|
|
# Pre-populate with some data
|
|
base_contexts = [
|
|
Context(None, f"/concurrent/{i}", f"Base context {i}", f"Summary {i}", "base_user", 1)
|
|
for i in range(20)
|
|
]
|
|
|
|
for context in base_contexts:
|
|
context_db.store_context(context)
|
|
|
|
embedding_manager.build_embeddings_index()
|
|
|
|
def concurrent_worker(worker_id):
|
|
results = []
|
|
|
|
# Each worker performs mixed operations
|
|
for i in range(5):
|
|
operation_type = i % 4
|
|
|
|
if operation_type == 0: # Create context
|
|
context = Context(
|
|
None, f"/worker{worker_id}/context_{i}",
|
|
f"Worker {worker_id} context {i} with specific content",
|
|
f"Worker {worker_id} summary {i}",
|
|
f"worker{worker_id}", 1
|
|
)
|
|
context_id = context_db.store_context(context)
|
|
results.append(("create", context_id))
|
|
|
|
elif operation_type == 1: # Search
|
|
search_results = embedding_manager.semantic_search_optimized(
|
|
f"worker {worker_id} content", top_k=5
|
|
)
|
|
results.append(("search", len(search_results)))
|
|
|
|
elif operation_type == 2: # Update context
|
|
if results: # Only if we have created contexts
|
|
created_contexts = [r for r in results if r[0] == "create"]
|
|
if created_contexts:
|
|
context_id = created_contexts[-1][1]
|
|
try:
|
|
context_db.update_context(
|
|
context_id,
|
|
content=f"Updated by worker {worker_id} iteration {i}"
|
|
)
|
|
results.append(("update", context_id))
|
|
except:
|
|
pass # Context might not exist due to concurrency
|
|
|
|
elif operation_type == 3: # Hybrid search
|
|
hybrid_results = embedding_manager.hybrid_search_optimized(
|
|
f"context {worker_id}", top_k=3
|
|
)
|
|
results.append(("hybrid_search", len(hybrid_results)))
|
|
|
|
return results
|
|
|
|
# Run concurrent workers
|
|
num_workers = 5
|
|
start_time = time.time()
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
|
|
futures = [executor.submit(concurrent_worker, i) for i in range(num_workers)]
|
|
all_results = [future.result() for future in futures]
|
|
|
|
total_time = time.time() - start_time
|
|
|
|
# Verify all workers completed successfully
|
|
assert len(all_results) == num_workers
|
|
for worker_results in all_results:
|
|
assert len(worker_results) >= 3 # Should have completed most operations
|
|
|
|
# Calculate operation statistics
|
|
total_operations = sum(len(worker_results) for worker_results in all_results)
|
|
operations_per_second = total_operations / total_time
|
|
|
|
print(f"Completed {total_operations} operations in {total_time:.3f}s ({operations_per_second:.1f} ops/sec)")
|
|
assert operations_per_second > 10 # Should handle at least 10 operations per second
|
|
|
|
def test_memory_usage_under_load(self, performance_system):
|
|
"""Test memory usage under sustained load."""
|
|
context_db, embedding_manager, _ = performance_system
|
|
|
|
import psutil
|
|
import os
|
|
|
|
process = psutil.Process(os.getpid())
|
|
initial_memory = process.memory_info().rss / 1024 / 1024 # MB
|
|
|
|
# Create contexts in batches and monitor memory
|
|
batch_size = 50
|
|
num_batches = 5
|
|
|
|
for batch_num in range(num_batches):
|
|
# Create batch of contexts
|
|
contexts = [
|
|
Context(
|
|
None, f"/memory_test/batch_{batch_num}/context_{i}",
|
|
f"Memory test context {batch_num}-{i} " + "x" * 100, # Larger content
|
|
f"Memory summary {batch_num}-{i}",
|
|
f"memory_user_{batch_num}", 1
|
|
)
|
|
for i in range(batch_size)
|
|
]
|
|
|
|
# Store contexts and build embeddings
|
|
context_ids = context_db.store_contexts_batch(contexts)
|
|
|
|
# Generate embeddings in batch
|
|
contents = [context.content for context in contexts]
|
|
embeddings = embedding_manager.generate_embeddings_batch(contents)
|
|
|
|
# Store embeddings
|
|
embedding_data = list(zip(context_ids, embeddings))
|
|
embedding_manager.store_embeddings_batch(embedding_data)
|
|
|
|
# Check memory usage
|
|
current_memory = process.memory_info().rss / 1024 / 1024
|
|
memory_increase = current_memory - initial_memory
|
|
|
|
print(f"Batch {batch_num + 1}: Memory usage: {current_memory:.1f} MB (+{memory_increase:.1f} MB)")
|
|
|
|
# Perform some searches to exercise the system
|
|
for query in [f"memory test batch {batch_num}", "context content"]:
|
|
results = embedding_manager.semantic_search_optimized(query, top_k=5)
|
|
assert len(results) >= 0
|
|
|
|
final_memory = process.memory_info().rss / 1024 / 1024
|
|
total_increase = final_memory - initial_memory
|
|
|
|
# Memory increase should be reasonable (less than 200MB for this test)
|
|
print(f"Total memory increase: {total_increase:.1f} MB")
|
|
assert total_increase < 200, f"Memory usage increased by {total_increase:.1f} MB, which is too much"
|
|
|
|
|
|
class TestAsyncIntegration:
|
|
"""Test async/Trio integration."""
|
|
|
|
@pytest.fixture
|
|
def async_system(self):
|
|
"""Create system for async testing."""
|
|
temp_dir = Path(tempfile.mkdtemp())
|
|
db_path = temp_dir / "async_test.db"
|
|
|
|
# Create async-compatible system
|
|
context_db = OptimizedContextDatabase(str(db_path))
|
|
trio_db = TrioContextDatabase(context_db)
|
|
|
|
yield trio_db
|
|
shutil.rmtree(temp_dir)
|
|
|
|
def test_trio_database_operations(self, async_system):
|
|
"""Test Trio async database operations."""
|
|
import trio
|
|
|
|
async def async_test():
|
|
trio_db = async_system
|
|
|
|
# Test async context storage
|
|
context = Context(
|
|
None, "/async/test",
|
|
"Async test content",
|
|
"Async summary",
|
|
"async_user", 1
|
|
)
|
|
|
|
context_id = await trio_db.store_context(context)
|
|
assert context_id is not None
|
|
|
|
# Test async retrieval
|
|
retrieved = await trio_db.get_context(context_id)
|
|
assert retrieved is not None
|
|
assert retrieved.content == context.content
|
|
|
|
# Test async search
|
|
results = await trio_db.search_contexts("async test")
|
|
assert len(results) > 0
|
|
|
|
# Test async update
|
|
await trio_db.update_context(context_id, content="Updated async content")
|
|
|
|
updated = await trio_db.get_context(context_id)
|
|
assert updated.content == "Updated async content"
|
|
|
|
return "Success"
|
|
|
|
# Run async test
|
|
result = trio.run(async_test)
|
|
assert result == "Success"
|
|
|
|
def test_concurrent_async_operations(self, async_system):
|
|
"""Test concurrent async operations."""
|
|
import trio
|
|
|
|
async def async_concurrent_test():
|
|
trio_db = async_system
|
|
|
|
async def async_worker(worker_id):
|
|
results = []
|
|
for i in range(3):
|
|
context = Context(
|
|
None, f"/async_concurrent/{worker_id}/{i}",
|
|
f"Async worker {worker_id} content {i}",
|
|
f"Async summary {worker_id}-{i}",
|
|
f"async_worker_{worker_id}", 1
|
|
)
|
|
|
|
context_id = await trio_db.store_context(context)
|
|
results.append(context_id)
|
|
|
|
return results
|
|
|
|
# Run multiple async workers concurrently
|
|
async with trio.open_nursery() as nursery:
|
|
results = []
|
|
|
|
for worker_id in range(3):
|
|
nursery.start_soon(async_worker, worker_id)
|
|
|
|
return "Concurrent async operations completed"
|
|
|
|
result = trio.run(async_concurrent_test)
|
|
assert "completed" in result
|
|
|
|
|
|
class TestErrorHandlingIntegration:
|
|
"""Test error handling across integrated components."""
|
|
|
|
@pytest.fixture
|
|
def error_test_system(self):
|
|
"""Create system for error testing."""
|
|
temp_dir = Path(tempfile.mkdtemp())
|
|
db_path = temp_dir / "error_test.db"
|
|
vector_db_path = temp_dir / "error_vectors.db"
|
|
|
|
context_db = OptimizedContextDatabase(str(db_path))
|
|
embedding_manager = OptimizedEmbeddingManager(
|
|
context_db,
|
|
model_name="mini",
|
|
vector_db_path=str(vector_db_path)
|
|
)
|
|
versioning_system = VersioningSystem(str(db_path))
|
|
|
|
yield context_db, embedding_manager, versioning_system
|
|
shutil.rmtree(temp_dir)
|
|
|
|
def test_database_corruption_recovery(self, error_test_system):
|
|
"""Test recovery from database issues."""
|
|
context_db, embedding_manager, versioning_system = error_test_system
|
|
|
|
# Create some valid data first
|
|
context = Context(
|
|
None, "/error_test/valid",
|
|
"Valid test content",
|
|
"Valid summary",
|
|
"test_user", 1
|
|
)
|
|
|
|
context_id = context_db.store_context(context)
|
|
assert context_id is not None
|
|
|
|
# Test handling of invalid operations
|
|
with pytest.raises((ValueError, AttributeError, TypeError)):
|
|
# Try to store invalid context
|
|
invalid_context = None
|
|
context_db.store_context(invalid_context)
|
|
|
|
# Verify original data is still intact
|
|
retrieved = context_db.get_context(context_id)
|
|
assert retrieved is not None
|
|
assert retrieved.content == "Valid test content"
|
|
|
|
def test_embedding_generation_errors(self, error_test_system):
|
|
"""Test embedding generation error handling."""
|
|
_, embedding_manager, _ = error_test_system
|
|
|
|
# Test with empty content
|
|
try:
|
|
embedding = embedding_manager.generate_embedding("")
|
|
# Empty string should still generate an embedding
|
|
assert embedding is not None
|
|
except Exception as e:
|
|
# If it fails, it should fail gracefully
|
|
assert isinstance(e, (ValueError, RuntimeError))
|
|
|
|
# Test with very long content
|
|
very_long_text = "x" * 10000
|
|
embedding = embedding_manager.generate_embedding(very_long_text)
|
|
assert embedding is not None
|
|
assert embedding.shape == (384,)
|
|
|
|
def test_concurrent_error_isolation(self, error_test_system):
|
|
"""Test that errors in one thread don't affect others."""
|
|
context_db, embedding_manager, _ = error_test_system
|
|
|
|
def worker_with_error(worker_id):
|
|
try:
|
|
if worker_id == 1: # One worker will fail
|
|
# Try invalid operation
|
|
context_db.get_context(-1) # Invalid ID
|
|
return "error_worker_failed"
|
|
else:
|
|
# Other workers do valid operations
|
|
context = Context(
|
|
None, f"/error_isolation/{worker_id}",
|
|
f"Valid content {worker_id}",
|
|
f"Summary {worker_id}",
|
|
f"user{worker_id}", 1
|
|
)
|
|
context_id = context_db.store_context(context)
|
|
return f"success_{context_id}"
|
|
except Exception as e:
|
|
return f"error_{type(e).__name__}"
|
|
|
|
# Run workers concurrently
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
|
|
futures = [executor.submit(worker_with_error, i) for i in range(3)]
|
|
results = [future.result() for future in futures]
|
|
|
|
# Check that some workers succeeded despite one failing
|
|
success_count = sum(1 for r in results if r.startswith("success_"))
|
|
error_count = sum(1 for r in results if r.startswith("error_"))
|
|
|
|
assert success_count >= 1, "At least one worker should have succeeded"
|
|
assert error_count >= 1, "At least one worker should have failed"
|
|
|
|
|
|
def run_integration_tests():
|
|
"""Run all integration tests."""
|
|
import subprocess
|
|
import sys
|
|
|
|
try:
|
|
# Run pytest on this module
|
|
result = subprocess.run([
|
|
sys.executable, "-m", "pytest", __file__, "-v", "--tb=short", "-x"
|
|
], capture_output=True, text=True, cwd=Path(__file__).parent.parent)
|
|
|
|
print("INTEGRATION TEST RESULTS")
|
|
print("=" * 50)
|
|
print(result.stdout)
|
|
|
|
if result.stderr:
|
|
print("ERRORS:")
|
|
print(result.stderr)
|
|
|
|
return result.returncode == 0
|
|
|
|
except Exception as e:
|
|
print(f"Failed to run tests: {e}")
|
|
return False
|
|
|
|
|
|
if __name__ == "__main__":
|
|
success = run_integration_tests()
|
|
exit(0 if success else 1) |