Phase 2 build initial
This commit is contained in:
8
hcfs-python/tests/__init__.py
Normal file
8
hcfs-python/tests/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
"""
|
||||
HCFS Test Suite
|
||||
|
||||
Comprehensive testing framework for Context-Aware Hierarchical Context File System.
|
||||
"""
|
||||
|
||||
__version__ = "1.0.0"
|
||||
__author__ = "HCFS Development Team"
|
||||
57
hcfs-python/tests/conftest.py
Normal file
57
hcfs-python/tests/conftest.py
Normal file
@@ -0,0 +1,57 @@
|
||||
"""
|
||||
Pytest configuration and shared fixtures for HCFS test suite.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import tempfile
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
# Add the project root to Python path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def temp_test_dir():
|
||||
"""Create a temporary directory for all tests in the session."""
|
||||
temp_dir = Path(tempfile.mkdtemp(prefix="hcfs_test_"))
|
||||
yield temp_dir
|
||||
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||
|
||||
@pytest.fixture
|
||||
def temp_db_path(temp_test_dir):
|
||||
"""Create a temporary database path."""
|
||||
return str(temp_test_dir / f"test_{pytest.current_item.name}.db")
|
||||
|
||||
@pytest.fixture
|
||||
def temp_vector_path(temp_test_dir):
|
||||
"""Create a temporary vector database path."""
|
||||
return str(temp_test_dir / f"test_vectors_{pytest.current_item.name}.db")
|
||||
|
||||
# Configure pytest markers
|
||||
def pytest_configure(config):
|
||||
"""Configure custom pytest markers."""
|
||||
config.addinivalue_line(
|
||||
"markers", "slow: marks tests as slow (deselect with '-m \"not slow\"')"
|
||||
)
|
||||
config.addinivalue_line(
|
||||
"markers", "integration: marks tests as integration tests"
|
||||
)
|
||||
config.addinivalue_line(
|
||||
"markers", "unit: marks tests as unit tests"
|
||||
)
|
||||
|
||||
# Custom pytest collection hook
|
||||
def pytest_collection_modifyitems(config, items):
|
||||
"""Modify test collection to add markers based on test file names."""
|
||||
for item in items:
|
||||
# Mark integration tests
|
||||
if "test_integration" in item.fspath.basename:
|
||||
item.add_marker(pytest.mark.integration)
|
||||
# Mark unit tests
|
||||
elif any(name in item.fspath.basename for name in ["test_context_db", "test_embeddings"]):
|
||||
item.add_marker(pytest.mark.unit)
|
||||
|
||||
# Mark slow tests based on test name patterns
|
||||
if any(pattern in item.name for pattern in ["large_scale", "performance", "concurrent", "load"]):
|
||||
item.add_marker(pytest.mark.slow)
|
||||
464
hcfs-python/tests/test_context_db.py
Normal file
464
hcfs-python/tests/test_context_db.py
Normal file
@@ -0,0 +1,464 @@
|
||||
"""
|
||||
Test suite for Context Database functionality.
|
||||
|
||||
Tests covering:
|
||||
- Basic CRUD operations
|
||||
- Context versioning
|
||||
- Database integrity
|
||||
- Performance characteristics
|
||||
- Error handling
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import tempfile
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import sqlite3
|
||||
|
||||
import sys
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from hcfs.core.context_db import Context, ContextDatabase
|
||||
from hcfs.core.context_db_optimized_fixed import OptimizedContextDatabase
|
||||
from hcfs.core.context_versioning import VersioningSystem
|
||||
|
||||
|
||||
class TestContextDatabase:
|
||||
"""Test basic context database operations."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_db(self):
|
||||
"""Create temporary database for testing."""
|
||||
temp_dir = Path(tempfile.mkdtemp())
|
||||
db_path = temp_dir / "test.db"
|
||||
db = ContextDatabase(str(db_path))
|
||||
yield db
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
@pytest.fixture
|
||||
def sample_context(self):
|
||||
"""Create sample context for testing."""
|
||||
return Context(
|
||||
id=None,
|
||||
path="/test/path",
|
||||
content="Test content for context",
|
||||
summary="Test summary",
|
||||
author="test_user",
|
||||
version=1
|
||||
)
|
||||
|
||||
def test_store_context(self, temp_db, sample_context):
|
||||
"""Test storing a context."""
|
||||
context_id = temp_db.store_context(sample_context)
|
||||
assert context_id is not None
|
||||
assert isinstance(context_id, int)
|
||||
assert context_id > 0
|
||||
|
||||
def test_get_context(self, temp_db, sample_context):
|
||||
"""Test retrieving a context."""
|
||||
context_id = temp_db.store_context(sample_context)
|
||||
retrieved = temp_db.get_context(context_id)
|
||||
|
||||
assert retrieved is not None
|
||||
assert retrieved.path == sample_context.path
|
||||
assert retrieved.content == sample_context.content
|
||||
assert retrieved.summary == sample_context.summary
|
||||
assert retrieved.author == sample_context.author
|
||||
|
||||
def test_get_contexts_by_path(self, temp_db):
|
||||
"""Test path-based context retrieval."""
|
||||
contexts = [
|
||||
Context(None, "/test/path1", "Content 1", "Summary 1", "user1", 1),
|
||||
Context(None, "/test/path2", "Content 2", "Summary 2", "user2", 1),
|
||||
Context(None, "/other/path", "Content 3", "Summary 3", "user3", 1)
|
||||
]
|
||||
|
||||
for context in contexts:
|
||||
temp_db.store_context(context)
|
||||
|
||||
test_contexts = temp_db.get_contexts_by_path("/test")
|
||||
assert len(test_contexts) == 2
|
||||
|
||||
exact_context = temp_db.get_contexts_by_path("/test/path1", exact_match=True)
|
||||
assert len(exact_context) == 1
|
||||
|
||||
def test_update_context(self, temp_db, sample_context):
|
||||
"""Test updating a context."""
|
||||
context_id = temp_db.store_context(sample_context)
|
||||
|
||||
# Update the context
|
||||
updated_content = "Updated content"
|
||||
temp_db.update_context(context_id, content=updated_content)
|
||||
|
||||
retrieved = temp_db.get_context(context_id)
|
||||
assert retrieved.content == updated_content
|
||||
|
||||
def test_delete_context(self, temp_db, sample_context):
|
||||
"""Test deleting a context."""
|
||||
context_id = temp_db.store_context(sample_context)
|
||||
|
||||
# Verify it exists
|
||||
assert temp_db.get_context(context_id) is not None
|
||||
|
||||
# Delete it
|
||||
success = temp_db.delete_context(context_id)
|
||||
assert success
|
||||
|
||||
# Verify it's gone
|
||||
assert temp_db.get_context(context_id) is None
|
||||
|
||||
def test_search_contexts(self, temp_db):
|
||||
"""Test context search functionality."""
|
||||
contexts = [
|
||||
Context(None, "/ml/algorithms", "Machine learning algorithms", "ML summary", "user1", 1),
|
||||
Context(None, "/web/api", "RESTful API development", "API summary", "user2", 1),
|
||||
Context(None, "/db/optimization", "Database query optimization", "DB summary", "user3", 1)
|
||||
]
|
||||
|
||||
for context in contexts:
|
||||
temp_db.store_context(context)
|
||||
|
||||
# Search by content
|
||||
results = temp_db.search_contexts("machine learning")
|
||||
assert len(results) == 1
|
||||
assert "algorithms" in results[0].path
|
||||
|
||||
# Search by path
|
||||
results = temp_db.search_contexts("api")
|
||||
assert len(results) == 1
|
||||
assert "web" in results[0].path
|
||||
|
||||
|
||||
class TestOptimizedContextDatabase:
|
||||
"""Test optimized context database operations."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_optimized_db(self):
|
||||
"""Create temporary optimized database."""
|
||||
temp_dir = Path(tempfile.mkdtemp())
|
||||
db_path = temp_dir / "optimized_test.db"
|
||||
db = OptimizedContextDatabase(str(db_path))
|
||||
yield db
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
def test_batch_operations(self, temp_optimized_db):
|
||||
"""Test batch context operations."""
|
||||
contexts = [
|
||||
Context(None, f"/batch/test{i}", f"Content {i}", f"Summary {i}", f"user{i}", 1)
|
||||
for i in range(10)
|
||||
]
|
||||
|
||||
# Batch store
|
||||
context_ids = temp_optimized_db.store_contexts_batch(contexts)
|
||||
assert len(context_ids) == 10
|
||||
assert all(isinstance(cid, int) for cid in context_ids)
|
||||
|
||||
# Batch retrieve
|
||||
retrieved = temp_optimized_db.get_contexts_batch(context_ids)
|
||||
assert len(retrieved) == 10
|
||||
|
||||
for i, context in enumerate(retrieved):
|
||||
assert context.path == f"/batch/test{i}"
|
||||
assert context.content == f"Content {i}"
|
||||
|
||||
def test_caching_performance(self, temp_optimized_db):
|
||||
"""Test caching functionality."""
|
||||
context = Context(None, "/cache/test", "Cached content", "Cache summary", "user", 1)
|
||||
context_id = temp_optimized_db.store_context(context)
|
||||
|
||||
# First access (cache miss)
|
||||
import time
|
||||
start = time.time()
|
||||
result1 = temp_optimized_db.get_context(context_id)
|
||||
first_time = time.time() - start
|
||||
|
||||
# Second access (cache hit)
|
||||
start = time.time()
|
||||
result2 = temp_optimized_db.get_context(context_id)
|
||||
second_time = time.time() - start
|
||||
|
||||
assert result1.content == result2.content
|
||||
assert second_time < first_time # Should be faster due to caching
|
||||
|
||||
def test_connection_pooling(self, temp_optimized_db):
|
||||
"""Test database connection pooling."""
|
||||
import threading
|
||||
import concurrent.futures
|
||||
|
||||
def worker(worker_id):
|
||||
context = Context(
|
||||
None, f"/worker/{worker_id}",
|
||||
f"Worker {worker_id} content",
|
||||
f"Summary {worker_id}",
|
||||
f"worker{worker_id}", 1
|
||||
)
|
||||
return temp_optimized_db.store_context(context)
|
||||
|
||||
# Test concurrent operations
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [executor.submit(worker, i) for i in range(10)]
|
||||
results = [future.result() for future in futures]
|
||||
|
||||
assert len(results) == 10
|
||||
assert all(isinstance(result, int) for result in results)
|
||||
assert len(set(results)) == 10 # All IDs should be unique
|
||||
|
||||
|
||||
class TestVersioningSystem:
|
||||
"""Test context versioning functionality."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_versioning_db(self):
|
||||
"""Create temporary database with versioning."""
|
||||
temp_dir = Path(tempfile.mkdtemp())
|
||||
db_path = temp_dir / "versioning_test.db"
|
||||
context_db = OptimizedContextDatabase(str(db_path))
|
||||
versioning = VersioningSystem(str(db_path))
|
||||
yield context_db, versioning
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
def test_create_version(self, temp_versioning_db):
|
||||
"""Test creating context versions."""
|
||||
context_db, versioning = temp_versioning_db
|
||||
|
||||
# Create initial context
|
||||
context = Context(None, "/version/test", "Original content", "Original summary", "user", 1)
|
||||
context_id = context_db.store_context(context)
|
||||
|
||||
# Create version
|
||||
version = versioning.create_version(
|
||||
context_id, "user", "Initial version", {"tag": "v1.0"}
|
||||
)
|
||||
|
||||
assert version is not None
|
||||
assert version.context_id == context_id
|
||||
assert version.author == "user"
|
||||
assert version.message == "Initial version"
|
||||
|
||||
def test_version_history(self, temp_versioning_db):
|
||||
"""Test version history retrieval."""
|
||||
context_db, versioning = temp_versioning_db
|
||||
|
||||
# Create context with multiple versions
|
||||
context = Context(None, "/history/test", "Content v1", "Summary v1", "user", 1)
|
||||
context_id = context_db.store_context(context)
|
||||
|
||||
# Create multiple versions
|
||||
for i in range(3):
|
||||
versioning.create_version(
|
||||
context_id, f"user{i}", f"Version {i+1}", {"iteration": i+1}
|
||||
)
|
||||
|
||||
# Update context
|
||||
context_db.update_context(context_id, content=f"Content v{i+2}")
|
||||
|
||||
# Get history
|
||||
history = versioning.get_version_history(context_id)
|
||||
assert len(history) == 3
|
||||
|
||||
# Verify order (newest first)
|
||||
for i, version in enumerate(history):
|
||||
assert version.message == f"Version {3-i}"
|
||||
|
||||
def test_rollback_version(self, temp_versioning_db):
|
||||
"""Test version rollback functionality."""
|
||||
context_db, versioning = temp_versioning_db
|
||||
|
||||
# Create context
|
||||
original_content = "Original content"
|
||||
context = Context(None, "/rollback/test", original_content, "Summary", "user", 1)
|
||||
context_id = context_db.store_context(context)
|
||||
|
||||
# Create version before modification
|
||||
version1 = versioning.create_version(context_id, "user", "Before changes")
|
||||
|
||||
# Modify context
|
||||
modified_content = "Modified content"
|
||||
context_db.update_context(context_id, content=modified_content)
|
||||
|
||||
# Verify modification
|
||||
current = context_db.get_context(context_id)
|
||||
assert current.content == modified_content
|
||||
|
||||
# Rollback
|
||||
rollback_version = versioning.rollback_to_version(
|
||||
context_id, version1.version_number, "user", "Rolling back changes"
|
||||
)
|
||||
|
||||
assert rollback_version is not None
|
||||
|
||||
# Verify rollback (content should be back to original)
|
||||
rolled_back = context_db.get_context(context_id)
|
||||
assert rolled_back.content == original_content
|
||||
|
||||
def test_version_comparison(self, temp_versioning_db):
|
||||
"""Test version comparison."""
|
||||
context_db, versioning = temp_versioning_db
|
||||
|
||||
# Create context with versions
|
||||
context = Context(None, "/compare/test", "Content A", "Summary A", "user", 1)
|
||||
context_id = context_db.store_context(context)
|
||||
|
||||
version1 = versioning.create_version(context_id, "user", "Version A")
|
||||
|
||||
context_db.update_context(context_id, content="Content B", summary="Summary B")
|
||||
version2 = versioning.create_version(context_id, "user", "Version B")
|
||||
|
||||
# Compare versions
|
||||
diff = versioning.compare_versions(context_id, version1.version_number, version2.version_number)
|
||||
|
||||
assert diff is not None
|
||||
assert "Content A" in str(diff)
|
||||
assert "Content B" in str(diff)
|
||||
assert "Summary A" in str(diff)
|
||||
assert "Summary B" in str(diff)
|
||||
|
||||
|
||||
class TestDatabaseIntegrity:
|
||||
"""Test database integrity and error handling."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_db(self):
|
||||
"""Create temporary database."""
|
||||
temp_dir = Path(tempfile.mkdtemp())
|
||||
db_path = temp_dir / "integrity_test.db"
|
||||
db = OptimizedContextDatabase(str(db_path))
|
||||
yield db, db_path
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
def test_database_schema(self, temp_db):
|
||||
"""Test database schema integrity."""
|
||||
db, db_path = temp_db
|
||||
|
||||
# Connect directly to check schema
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Check tables exist
|
||||
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
||||
tables = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
assert "context_blobs" in tables
|
||||
|
||||
# Check context_blobs schema
|
||||
cursor.execute("PRAGMA table_info(context_blobs)")
|
||||
columns = {row[1]: row[2] for row in cursor.fetchall()} # name: type
|
||||
|
||||
expected_columns = {
|
||||
"id": "INTEGER",
|
||||
"path": "TEXT",
|
||||
"content": "TEXT",
|
||||
"summary": "TEXT",
|
||||
"author": "TEXT",
|
||||
"created_at": "TIMESTAMP",
|
||||
"updated_at": "TIMESTAMP",
|
||||
"version": "INTEGER"
|
||||
}
|
||||
|
||||
for col_name, col_type in expected_columns.items():
|
||||
assert col_name in columns
|
||||
|
||||
conn.close()
|
||||
|
||||
def test_constraint_violations(self, temp_db):
|
||||
"""Test handling of constraint violations."""
|
||||
db, _ = temp_db
|
||||
|
||||
# Test invalid context (missing required fields)
|
||||
with pytest.raises((ValueError, TypeError, AttributeError)):
|
||||
invalid_context = Context(None, "", "", None, None, 0) # Empty required fields
|
||||
db.store_context(invalid_context)
|
||||
|
||||
def test_transaction_rollback(self, temp_db):
|
||||
"""Test transaction rollback on errors."""
|
||||
db, db_path = temp_db
|
||||
|
||||
# Create a valid context first
|
||||
context = Context(None, "/transaction/test", "Content", "Summary", "user", 1)
|
||||
context_id = db.store_context(context)
|
||||
|
||||
# Verify it exists
|
||||
assert db.get_context(context_id) is not None
|
||||
|
||||
# Now test that failed operations don't affect existing data
|
||||
try:
|
||||
# This should fail but not corrupt the database
|
||||
db.update_context(999999, content="Should fail") # Non-existent ID
|
||||
except:
|
||||
pass # Expected to fail
|
||||
|
||||
# Verify original context still exists and is unchanged
|
||||
retrieved = db.get_context(context_id)
|
||||
assert retrieved is not None
|
||||
assert retrieved.content == "Content"
|
||||
|
||||
def test_concurrent_access(self, temp_db):
|
||||
"""Test concurrent database access."""
|
||||
db, _ = temp_db
|
||||
|
||||
import threading
|
||||
import time
|
||||
|
||||
results = []
|
||||
errors = []
|
||||
|
||||
def worker(worker_id):
|
||||
try:
|
||||
for i in range(5):
|
||||
context = Context(
|
||||
None, f"/concurrent/{worker_id}/{i}",
|
||||
f"Content {worker_id}-{i}",
|
||||
f"Summary {worker_id}-{i}",
|
||||
f"worker{worker_id}", 1
|
||||
)
|
||||
context_id = db.store_context(context)
|
||||
results.append(context_id)
|
||||
time.sleep(0.001) # Small delay to increase contention
|
||||
except Exception as e:
|
||||
errors.append(e)
|
||||
|
||||
# Run multiple workers concurrently
|
||||
threads = [threading.Thread(target=worker, args=(i,)) for i in range(3)]
|
||||
|
||||
for thread in threads:
|
||||
thread.start()
|
||||
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
|
||||
# Check results
|
||||
assert len(errors) == 0, f"Concurrent access errors: {errors}"
|
||||
assert len(results) == 15 # 3 workers * 5 contexts each
|
||||
assert len(set(results)) == 15 # All IDs should be unique
|
||||
|
||||
|
||||
def run_context_db_tests():
|
||||
"""Run all context database tests."""
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
try:
|
||||
# Run pytest on this module
|
||||
result = subprocess.run([
|
||||
sys.executable, "-m", "pytest", __file__, "-v", "--tb=short"
|
||||
], capture_output=True, text=True, cwd=Path(__file__).parent.parent)
|
||||
|
||||
print("CONTEXT DATABASE TEST RESULTS")
|
||||
print("=" * 50)
|
||||
print(result.stdout)
|
||||
|
||||
if result.stderr:
|
||||
print("ERRORS:")
|
||||
print(result.stderr)
|
||||
|
||||
return result.returncode == 0
|
||||
|
||||
except Exception as e:
|
||||
print(f"Failed to run tests: {e}")
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = run_context_db_tests()
|
||||
exit(0 if success else 1)
|
||||
605
hcfs-python/tests/test_embeddings.py
Normal file
605
hcfs-python/tests/test_embeddings.py
Normal file
@@ -0,0 +1,605 @@
|
||||
"""
|
||||
Test suite for Embedding System functionality.
|
||||
|
||||
Tests covering:
|
||||
- Embedding generation and caching
|
||||
- Vector database operations
|
||||
- Semantic and hybrid search
|
||||
- Performance characteristics
|
||||
- Async compatibility
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import tempfile
|
||||
import shutil
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
import time
|
||||
import threading
|
||||
import concurrent.futures
|
||||
|
||||
import sys
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from hcfs.core.context_db import Context
|
||||
from hcfs.core.context_db_optimized_fixed import OptimizedContextDatabase
|
||||
from hcfs.core.embeddings_optimized import OptimizedEmbeddingManager, VectorSearchResult
|
||||
|
||||
|
||||
class TestEmbeddingGeneration:
|
||||
"""Test embedding generation functionality."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_embedding_system(self):
|
||||
"""Create temporary embedding system."""
|
||||
temp_dir = Path(tempfile.mkdtemp())
|
||||
db_path = temp_dir / "test_context.db"
|
||||
vector_db_path = temp_dir / "test_vectors.db"
|
||||
|
||||
context_db = OptimizedContextDatabase(str(db_path))
|
||||
embedding_manager = OptimizedEmbeddingManager(
|
||||
context_db,
|
||||
model_name="mini",
|
||||
vector_db_path=str(vector_db_path),
|
||||
cache_size=100,
|
||||
batch_size=4
|
||||
)
|
||||
|
||||
yield context_db, embedding_manager
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
def test_single_embedding_generation(self, temp_embedding_system):
|
||||
"""Test generating a single embedding."""
|
||||
_, embedding_manager = temp_embedding_system
|
||||
|
||||
text = "Test embedding generation"
|
||||
embedding = embedding_manager.generate_embedding(text)
|
||||
|
||||
assert isinstance(embedding, np.ndarray)
|
||||
assert embedding.shape == (384,) # MiniLM dimension
|
||||
assert not np.isnan(embedding).any()
|
||||
assert not np.isinf(embedding).any()
|
||||
|
||||
def test_embedding_caching(self, temp_embedding_system):
|
||||
"""Test embedding caching functionality."""
|
||||
_, embedding_manager = temp_embedding_system
|
||||
|
||||
text = "Test caching functionality"
|
||||
|
||||
# First generation (cache miss)
|
||||
start_time = time.time()
|
||||
embedding1 = embedding_manager.generate_embedding(text)
|
||||
first_time = time.time() - start_time
|
||||
|
||||
# Second generation (cache hit)
|
||||
start_time = time.time()
|
||||
embedding2 = embedding_manager.generate_embedding(text)
|
||||
second_time = time.time() - start_time
|
||||
|
||||
# Verify embeddings are identical
|
||||
assert np.allclose(embedding1, embedding2)
|
||||
|
||||
# Cache should be significantly faster
|
||||
assert second_time < first_time * 0.1 # At least 10x faster
|
||||
|
||||
def test_batch_embedding_generation(self, temp_embedding_system):
|
||||
"""Test batch embedding generation."""
|
||||
_, embedding_manager = temp_embedding_system
|
||||
|
||||
texts = [
|
||||
"First test text",
|
||||
"Second test text",
|
||||
"Third test text",
|
||||
"Fourth test text"
|
||||
]
|
||||
|
||||
embeddings = embedding_manager.generate_embeddings_batch(texts)
|
||||
|
||||
assert len(embeddings) == len(texts)
|
||||
assert all(isinstance(emb, np.ndarray) for emb in embeddings)
|
||||
assert all(emb.shape == (384,) for emb in embeddings)
|
||||
|
||||
# Verify embeddings are different for different texts
|
||||
assert not np.allclose(embeddings[0], embeddings[1])
|
||||
|
||||
def test_batch_vs_individual_performance(self, temp_embedding_system):
|
||||
"""Test batch processing performance."""
|
||||
_, embedding_manager = temp_embedding_system
|
||||
|
||||
texts = [f"Performance test text {i}" for i in range(8)]
|
||||
|
||||
# Individual processing
|
||||
start_time = time.time()
|
||||
individual_embeddings = [
|
||||
embedding_manager.generate_embedding(text, use_cache=False)
|
||||
for text in texts
|
||||
]
|
||||
individual_time = time.time() - start_time
|
||||
|
||||
# Clear cache to ensure fair comparison
|
||||
embedding_manager.vector_cache.clear()
|
||||
|
||||
# Batch processing
|
||||
start_time = time.time()
|
||||
batch_embeddings = embedding_manager.generate_embeddings_batch(texts, use_cache=False)
|
||||
batch_time = time.time() - start_time
|
||||
|
||||
# Verify results are equivalent
|
||||
assert len(individual_embeddings) == len(batch_embeddings)
|
||||
for ind, batch in zip(individual_embeddings, batch_embeddings):
|
||||
assert np.allclose(ind, batch, rtol=1e-5)
|
||||
|
||||
# Batch should be faster
|
||||
speedup = individual_time / batch_time
|
||||
assert speedup > 2.0 # At least 2x speedup expected
|
||||
|
||||
|
||||
class TestVectorDatabase:
|
||||
"""Test vector database operations."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_vector_system(self):
|
||||
"""Create temporary vector database system."""
|
||||
temp_dir = Path(tempfile.mkdtemp())
|
||||
db_path = temp_dir / "test_context.db"
|
||||
vector_db_path = temp_dir / "test_vectors.db"
|
||||
|
||||
context_db = OptimizedContextDatabase(str(db_path))
|
||||
embedding_manager = OptimizedEmbeddingManager(
|
||||
context_db,
|
||||
model_name="mini",
|
||||
vector_db_path=str(vector_db_path),
|
||||
cache_size=50
|
||||
)
|
||||
|
||||
yield context_db, embedding_manager
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
def test_embedding_storage_retrieval(self, temp_vector_system):
|
||||
"""Test storing and retrieving embeddings."""
|
||||
_, embedding_manager = temp_vector_system
|
||||
|
||||
# Create test embedding
|
||||
test_embedding = np.random.rand(384).astype(np.float32)
|
||||
context_id = 123
|
||||
|
||||
# Store embedding
|
||||
embedding_manager.store_embedding(context_id, test_embedding)
|
||||
|
||||
# Retrieve embedding
|
||||
retrieved = embedding_manager.get_embedding(context_id)
|
||||
|
||||
assert retrieved is not None
|
||||
assert np.allclose(test_embedding, retrieved, rtol=1e-6)
|
||||
|
||||
def test_batch_embedding_storage(self, temp_vector_system):
|
||||
"""Test batch embedding storage."""
|
||||
_, embedding_manager = temp_vector_system
|
||||
|
||||
# Create test embeddings
|
||||
embeddings_data = [
|
||||
(i, np.random.rand(384).astype(np.float32))
|
||||
for i in range(10, 20)
|
||||
]
|
||||
|
||||
# Store batch
|
||||
embedding_manager.store_embeddings_batch(embeddings_data)
|
||||
|
||||
# Verify all were stored
|
||||
for context_id, original_embedding in embeddings_data:
|
||||
retrieved = embedding_manager.get_embedding(context_id)
|
||||
assert retrieved is not None
|
||||
assert np.allclose(original_embedding, retrieved, rtol=1e-6)
|
||||
|
||||
def test_vector_similarity_search(self, temp_vector_system):
|
||||
"""Test vector similarity search."""
|
||||
_, embedding_manager = temp_vector_system
|
||||
|
||||
# Create and store reference embeddings
|
||||
reference_embedding = np.random.rand(384).astype(np.float32)
|
||||
similar_embedding = reference_embedding + np.random.rand(384).astype(np.float32) * 0.1
|
||||
different_embedding = np.random.rand(384).astype(np.float32)
|
||||
|
||||
embedding_manager.store_embedding(1, reference_embedding)
|
||||
embedding_manager.store_embedding(2, similar_embedding)
|
||||
embedding_manager.store_embedding(3, different_embedding)
|
||||
|
||||
# Search for similar embeddings
|
||||
query_embedding = reference_embedding + np.random.rand(384).astype(np.float32) * 0.05
|
||||
results = embedding_manager.vector_similarity_search(query_embedding, top_k=3)
|
||||
|
||||
assert len(results) <= 3
|
||||
assert all(isinstance(result, VectorSearchResult) for result in results)
|
||||
|
||||
# Results should be sorted by similarity (highest first)
|
||||
scores = [result.score for result in results]
|
||||
assert scores == sorted(scores, reverse=True)
|
||||
|
||||
# Reference embedding should be most similar
|
||||
assert results[0].context_id == 1
|
||||
|
||||
def test_embeddings_index_building(self, temp_vector_system):
|
||||
"""Test building embeddings index."""
|
||||
context_db, embedding_manager = temp_vector_system
|
||||
|
||||
# Create test contexts
|
||||
contexts = [
|
||||
Context(None, f"/test/{i}", f"Test content {i}", f"Summary {i}", "user", 1)
|
||||
for i in range(5)
|
||||
]
|
||||
|
||||
context_ids = []
|
||||
for context in contexts:
|
||||
context_id = context_db.store_context(context)
|
||||
context_ids.append(context_id)
|
||||
|
||||
# Build embeddings index
|
||||
index_stats = embedding_manager.build_embeddings_index(batch_size=2)
|
||||
|
||||
assert index_stats["total_processed"] == 5
|
||||
assert index_stats["embeddings_per_second"] > 0
|
||||
|
||||
# Verify embeddings were created
|
||||
for context_id in context_ids:
|
||||
embedding = embedding_manager.get_embedding(context_id)
|
||||
assert embedding is not None
|
||||
assert embedding.shape == (384,)
|
||||
|
||||
|
||||
class TestSemanticSearch:
|
||||
"""Test semantic search functionality."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_search_system(self):
|
||||
"""Create search system with test data."""
|
||||
temp_dir = Path(tempfile.mkdtemp())
|
||||
db_path = temp_dir / "search_test.db"
|
||||
vector_db_path = temp_dir / "search_vectors.db"
|
||||
|
||||
context_db = OptimizedContextDatabase(str(db_path))
|
||||
embedding_manager = OptimizedEmbeddingManager(
|
||||
context_db,
|
||||
model_name="mini",
|
||||
vector_db_path=str(vector_db_path)
|
||||
)
|
||||
|
||||
# Create test contexts
|
||||
test_contexts = [
|
||||
Context(None, "/ml/algorithms", "Machine learning algorithms and models", "ML summary", "user1", 1),
|
||||
Context(None, "/ml/neural", "Neural networks and deep learning", "NN summary", "user1", 1),
|
||||
Context(None, "/web/api", "RESTful API development", "API summary", "user2", 1),
|
||||
Context(None, "/web/frontend", "Frontend web development", "Frontend summary", "user2", 1),
|
||||
Context(None, "/db/sql", "SQL database queries", "SQL summary", "user3", 1)
|
||||
]
|
||||
|
||||
# Store contexts and build embeddings
|
||||
for context in test_contexts:
|
||||
context_db.store_context(context)
|
||||
|
||||
embedding_manager.build_embeddings_index()
|
||||
|
||||
yield context_db, embedding_manager
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
def test_semantic_search_accuracy(self, temp_search_system):
|
||||
"""Test semantic search accuracy."""
|
||||
_, embedding_manager = temp_search_system
|
||||
|
||||
# Search for ML-related content
|
||||
results = embedding_manager.semantic_search_optimized(
|
||||
"machine learning models", top_k=3, include_contexts=True
|
||||
)
|
||||
|
||||
assert len(results) > 0
|
||||
assert all(isinstance(result, VectorSearchResult) for result in results)
|
||||
assert all(result.context is not None for result in results)
|
||||
|
||||
# Top results should be ML-related
|
||||
top_result = results[0]
|
||||
assert "/ml/" in top_result.context.path
|
||||
assert top_result.score > 0.3 # Reasonable similarity threshold
|
||||
|
||||
def test_semantic_search_with_path_filter(self, temp_search_system):
|
||||
"""Test semantic search with path filtering."""
|
||||
_, embedding_manager = temp_search_system
|
||||
|
||||
# Search only in web-related paths
|
||||
results = embedding_manager.semantic_search_optimized(
|
||||
"development", path_prefix="/web", top_k=5, include_contexts=True
|
||||
)
|
||||
|
||||
assert len(results) > 0
|
||||
# All results should be from /web paths
|
||||
for result in results:
|
||||
assert result.context.path.startswith("/web")
|
||||
|
||||
def test_hybrid_search_functionality(self, temp_search_system):
|
||||
"""Test hybrid search combining semantic and BM25."""
|
||||
_, embedding_manager = temp_search_system
|
||||
|
||||
results = embedding_manager.hybrid_search_optimized(
|
||||
"neural network algorithms",
|
||||
top_k=3,
|
||||
semantic_weight=0.7
|
||||
)
|
||||
|
||||
assert len(results) > 0
|
||||
assert all(isinstance(result, VectorSearchResult) for result in results)
|
||||
|
||||
# Check that metadata includes both scores
|
||||
for result in results:
|
||||
if result.metadata:
|
||||
assert "semantic_score" in result.metadata
|
||||
assert "bm25_score" in result.metadata
|
||||
assert "semantic_weight" in result.metadata
|
||||
|
||||
def test_search_performance(self, temp_search_system):
|
||||
"""Test search performance characteristics."""
|
||||
_, embedding_manager = temp_search_system
|
||||
|
||||
query = "database optimization"
|
||||
|
||||
# Time semantic search
|
||||
start_time = time.time()
|
||||
semantic_results = embedding_manager.semantic_search_optimized(query, top_k=5)
|
||||
semantic_time = time.time() - start_time
|
||||
|
||||
# Time hybrid search
|
||||
start_time = time.time()
|
||||
hybrid_results = embedding_manager.hybrid_search_optimized(query, top_k=5)
|
||||
hybrid_time = time.time() - start_time
|
||||
|
||||
assert semantic_time < 1.0 # Should be under 1 second
|
||||
assert hybrid_time < 2.0 # Hybrid search can be slightly slower
|
||||
|
||||
assert len(semantic_results) > 0
|
||||
assert len(hybrid_results) > 0
|
||||
|
||||
|
||||
class TestConcurrentOperations:
|
||||
"""Test concurrent embedding operations."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_concurrent_system(self):
|
||||
"""Create system for concurrent testing."""
|
||||
temp_dir = Path(tempfile.mkdtemp())
|
||||
db_path = temp_dir / "concurrent_test.db"
|
||||
vector_db_path = temp_dir / "concurrent_vectors.db"
|
||||
|
||||
context_db = OptimizedContextDatabase(str(db_path))
|
||||
embedding_manager = OptimizedEmbeddingManager(
|
||||
context_db,
|
||||
model_name="mini",
|
||||
vector_db_path=str(vector_db_path),
|
||||
cache_size=100
|
||||
)
|
||||
|
||||
yield context_db, embedding_manager
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
def test_concurrent_embedding_generation(self, temp_concurrent_system):
|
||||
"""Test concurrent embedding generation."""
|
||||
_, embedding_manager = temp_concurrent_system
|
||||
|
||||
def generate_embeddings(worker_id):
|
||||
results = []
|
||||
for i in range(3):
|
||||
text = f"Worker {worker_id} text {i}"
|
||||
embedding = embedding_manager.generate_embedding(text)
|
||||
results.append((text, embedding))
|
||||
return results
|
||||
|
||||
# Run concurrent workers
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
|
||||
futures = [executor.submit(generate_embeddings, i) for i in range(3)]
|
||||
all_results = [future.result() for future in futures]
|
||||
|
||||
assert len(all_results) == 3
|
||||
assert all(len(worker_results) == 3 for worker_results in all_results)
|
||||
|
||||
# Verify all embeddings are valid
|
||||
for worker_results in all_results:
|
||||
for text, embedding in worker_results:
|
||||
assert isinstance(embedding, np.ndarray)
|
||||
assert embedding.shape == (384,)
|
||||
|
||||
def test_concurrent_vector_operations(self, temp_concurrent_system):
|
||||
"""Test concurrent vector database operations."""
|
||||
_, embedding_manager = temp_concurrent_system
|
||||
|
||||
def vector_operations(worker_id):
|
||||
results = []
|
||||
base_id = worker_id * 100
|
||||
|
||||
# Store embeddings
|
||||
for i in range(5):
|
||||
context_id = base_id + i
|
||||
embedding = np.random.rand(384).astype(np.float32)
|
||||
embedding_manager.store_embedding(context_id, embedding)
|
||||
results.append((context_id, embedding))
|
||||
|
||||
# Retrieve embeddings
|
||||
retrieved = []
|
||||
for context_id, original in results:
|
||||
retrieved_embedding = embedding_manager.get_embedding(context_id)
|
||||
retrieved.append((context_id, retrieved_embedding))
|
||||
|
||||
return results, retrieved
|
||||
|
||||
# Run concurrent operations
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
|
||||
futures = [executor.submit(vector_operations, i) for i in range(3)]
|
||||
all_results = [future.result() for future in futures]
|
||||
|
||||
# Verify all operations completed successfully
|
||||
for stored, retrieved in all_results:
|
||||
assert len(stored) == 5
|
||||
assert len(retrieved) == 5
|
||||
|
||||
for (stored_id, stored_emb), (retrieved_id, retrieved_emb) in zip(stored, retrieved):
|
||||
assert stored_id == retrieved_id
|
||||
assert np.allclose(stored_emb, retrieved_emb, rtol=1e-6)
|
||||
|
||||
def test_concurrent_search_operations(self, temp_concurrent_system):
|
||||
"""Test concurrent search operations."""
|
||||
context_db, embedding_manager = temp_concurrent_system
|
||||
|
||||
# Set up test data
|
||||
contexts = [
|
||||
Context(None, f"/concurrent/{i}", f"Concurrent test content {i}", f"Summary {i}", "user", 1)
|
||||
for i in range(10)
|
||||
]
|
||||
|
||||
for context in contexts:
|
||||
context_db.store_context(context)
|
||||
|
||||
embedding_manager.build_embeddings_index()
|
||||
|
||||
def search_worker(worker_id):
|
||||
results = []
|
||||
queries = [f"concurrent test {worker_id}", f"content {worker_id}", f"summary {worker_id}"]
|
||||
|
||||
for query in queries:
|
||||
search_results = embedding_manager.semantic_search_optimized(query, top_k=3)
|
||||
results.append((query, len(search_results)))
|
||||
|
||||
return results
|
||||
|
||||
# Run concurrent searches
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
|
||||
futures = [executor.submit(search_worker, i) for i in range(4)]
|
||||
all_results = [future.result() for future in futures]
|
||||
|
||||
# Verify all searches completed
|
||||
for worker_results in all_results:
|
||||
assert len(worker_results) == 3
|
||||
for query, result_count in worker_results:
|
||||
assert result_count >= 0 # Should have some results
|
||||
|
||||
|
||||
class TestEmbeddingStatistics:
|
||||
"""Test embedding system statistics and monitoring."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_stats_system(self):
|
||||
"""Create system for statistics testing."""
|
||||
temp_dir = Path(tempfile.mkdtemp())
|
||||
db_path = temp_dir / "stats_test.db"
|
||||
vector_db_path = temp_dir / "stats_vectors.db"
|
||||
|
||||
context_db = OptimizedContextDatabase(str(db_path))
|
||||
embedding_manager = OptimizedEmbeddingManager(
|
||||
context_db,
|
||||
model_name="mini",
|
||||
vector_db_path=str(vector_db_path)
|
||||
)
|
||||
|
||||
yield context_db, embedding_manager
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
def test_statistics_collection(self, temp_stats_system):
|
||||
"""Test statistics collection."""
|
||||
context_db, embedding_manager = temp_stats_system
|
||||
|
||||
# Create some test data
|
||||
contexts = [
|
||||
Context(None, f"/stats/{i}", f"Stats test {i}", f"Summary {i}", "user", 1)
|
||||
for i in range(5)
|
||||
]
|
||||
|
||||
for context in contexts:
|
||||
context_db.store_context(context)
|
||||
|
||||
embedding_manager.build_embeddings_index()
|
||||
|
||||
# Get statistics
|
||||
stats = embedding_manager.get_statistics()
|
||||
|
||||
assert "database_stats" in stats
|
||||
assert "cache_stats" in stats
|
||||
assert "current_model" in stats
|
||||
|
||||
db_stats = stats["database_stats"]
|
||||
assert db_stats["total_embeddings"] == 5
|
||||
assert db_stats["unique_models"] >= 1
|
||||
assert db_stats["average_dimension"] == 384
|
||||
|
||||
cache_stats = stats["cache_stats"]
|
||||
assert "size" in cache_stats
|
||||
assert "max_size" in cache_stats
|
||||
assert "hit_rate" in cache_stats
|
||||
|
||||
def test_cache_statistics(self, temp_stats_system):
|
||||
"""Test cache statistics tracking."""
|
||||
_, embedding_manager = temp_stats_system
|
||||
|
||||
# Generate some embeddings to populate cache
|
||||
texts = [f"Cache test {i}" for i in range(10)]
|
||||
|
||||
for text in texts:
|
||||
embedding_manager.generate_embedding(text)
|
||||
|
||||
# Access some cached embeddings
|
||||
for text in texts[:5]:
|
||||
embedding_manager.generate_embedding(text) # Cache hits
|
||||
|
||||
cache_stats = embedding_manager.vector_cache.stats()
|
||||
|
||||
assert cache_stats["size"] <= cache_stats["max_size"]
|
||||
assert cache_stats["size"] > 0
|
||||
|
||||
def test_cleanup_operations(self, temp_stats_system):
|
||||
"""Test cleanup operations."""
|
||||
_, embedding_manager = temp_stats_system
|
||||
|
||||
# Store some test embeddings
|
||||
for i in range(10):
|
||||
embedding = np.random.rand(384).astype(np.float32)
|
||||
embedding_manager.store_embedding(i, embedding)
|
||||
|
||||
# Get initial count
|
||||
initial_stats = embedding_manager.get_statistics()
|
||||
initial_count = initial_stats["database_stats"]["total_embeddings"]
|
||||
|
||||
# Clear cache
|
||||
embedding_manager.vector_cache.clear()
|
||||
|
||||
# Cache should be empty
|
||||
cache_stats = embedding_manager.vector_cache.stats()
|
||||
assert cache_stats["size"] == 0
|
||||
|
||||
# But embeddings should still be in database
|
||||
final_stats = embedding_manager.get_statistics()
|
||||
final_count = final_stats["database_stats"]["total_embeddings"]
|
||||
assert final_count == initial_count
|
||||
|
||||
|
||||
def run_embedding_tests():
|
||||
"""Run all embedding tests."""
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
try:
|
||||
# Run pytest on this module
|
||||
result = subprocess.run([
|
||||
sys.executable, "-m", "pytest", __file__, "-v", "--tb=short"
|
||||
], capture_output=True, text=True, cwd=Path(__file__).parent.parent)
|
||||
|
||||
print("EMBEDDING SYSTEM TEST RESULTS")
|
||||
print("=" * 50)
|
||||
print(result.stdout)
|
||||
|
||||
if result.stderr:
|
||||
print("ERRORS:")
|
||||
print(result.stderr)
|
||||
|
||||
return result.returncode == 0
|
||||
|
||||
except Exception as e:
|
||||
print(f"Failed to run tests: {e}")
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = run_embedding_tests()
|
||||
exit(0 if success else 1)
|
||||
630
hcfs-python/tests/test_integration.py
Normal file
630
hcfs-python/tests/test_integration.py
Normal file
@@ -0,0 +1,630 @@
|
||||
"""
|
||||
Integration test suite for HCFS components.
|
||||
|
||||
Tests covering:
|
||||
- Full system integration
|
||||
- End-to-end workflows
|
||||
- Cross-component functionality
|
||||
- Performance under load
|
||||
- Real-world usage scenarios
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import tempfile
|
||||
import shutil
|
||||
import time
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
import concurrent.futures
|
||||
import threading
|
||||
|
||||
import sys
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from hcfs.core.context_db import Context
|
||||
from hcfs.core.context_db_optimized_fixed import OptimizedContextDatabase
|
||||
from hcfs.core.embeddings_optimized import OptimizedEmbeddingManager
|
||||
from hcfs.core.context_versioning import VersioningSystem
|
||||
from hcfs.core.context_db_trio import TrioContextDatabase
|
||||
|
||||
|
||||
class TestFullSystemIntegration:
|
||||
"""Test full HCFS system integration."""
|
||||
|
||||
@pytest.fixture
|
||||
def integrated_system(self):
|
||||
"""Create fully integrated HCFS system."""
|
||||
temp_dir = Path(tempfile.mkdtemp())
|
||||
db_path = temp_dir / "integration_test.db"
|
||||
vector_db_path = temp_dir / "integration_vectors.db"
|
||||
|
||||
# Initialize all components
|
||||
context_db = OptimizedContextDatabase(str(db_path))
|
||||
embedding_manager = OptimizedEmbeddingManager(
|
||||
context_db,
|
||||
model_name="mini",
|
||||
vector_db_path=str(vector_db_path),
|
||||
cache_size=200
|
||||
)
|
||||
versioning_system = VersioningSystem(str(db_path))
|
||||
|
||||
yield context_db, embedding_manager, versioning_system
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
def test_complete_context_lifecycle(self, integrated_system):
|
||||
"""Test complete context lifecycle with all features."""
|
||||
context_db, embedding_manager, versioning_system = integrated_system
|
||||
|
||||
# 1. Create initial context
|
||||
context = Context(
|
||||
None, "/project/hcfs",
|
||||
"HCFS is a context-aware hierarchical filesystem for AI agents",
|
||||
"HCFS project description",
|
||||
"developer", 1
|
||||
)
|
||||
|
||||
context_id = context_db.store_context(context)
|
||||
assert context_id is not None
|
||||
|
||||
# 2. Generate and store embedding
|
||||
embedding = embedding_manager.generate_embedding(context.content)
|
||||
embedding_manager.store_embedding(context_id, embedding)
|
||||
|
||||
# 3. Create version
|
||||
initial_version = versioning_system.create_version(
|
||||
context_id, "developer", "Initial project description"
|
||||
)
|
||||
assert initial_version is not None
|
||||
|
||||
# 4. Update context
|
||||
updated_content = "HCFS is an advanced context-aware hierarchical filesystem with ML-powered semantic search"
|
||||
context_db.update_context(context_id, content=updated_content)
|
||||
|
||||
# 5. Update embedding
|
||||
new_embedding = embedding_manager.generate_embedding(updated_content)
|
||||
embedding_manager.store_embedding(context_id, new_embedding)
|
||||
|
||||
# 6. Create new version
|
||||
updated_version = versioning_system.create_version(
|
||||
context_id, "developer", "Added ML and semantic search details"
|
||||
)
|
||||
|
||||
# 7. Test search functionality
|
||||
search_results = embedding_manager.semantic_search_optimized(
|
||||
"machine learning filesystem", top_k=5, include_contexts=True
|
||||
)
|
||||
|
||||
assert len(search_results) > 0
|
||||
found_context = any(result.context_id == context_id for result in search_results)
|
||||
assert found_context, "Should find the updated context in search results"
|
||||
|
||||
# 8. Test version history
|
||||
history = versioning_system.get_version_history(context_id)
|
||||
assert len(history) == 2
|
||||
assert history[0].message == "Added ML and semantic search details"
|
||||
assert history[1].message == "Initial project description"
|
||||
|
||||
# 9. Test rollback
|
||||
rollback_version = versioning_system.rollback_to_version(
|
||||
context_id, initial_version.version_number, "developer", "Testing rollback"
|
||||
)
|
||||
|
||||
# Verify rollback worked
|
||||
current_context = context_db.get_context(context_id)
|
||||
assert "HCFS is a context-aware hierarchical filesystem for AI agents" in current_context.content
|
||||
|
||||
def test_hierarchical_context_inheritance(self, integrated_system):
|
||||
"""Test hierarchical context relationships."""
|
||||
context_db, embedding_manager, _ = integrated_system
|
||||
|
||||
# Create hierarchical contexts
|
||||
contexts = [
|
||||
Context(None, "/", "Root directory context", "Root summary", "user", 1),
|
||||
Context(None, "/projects", "Projects directory", "Projects summary", "user", 1),
|
||||
Context(None, "/projects/hcfs", "HCFS project", "HCFS summary", "user", 1),
|
||||
Context(None, "/projects/hcfs/core", "HCFS core modules", "Core summary", "user", 1),
|
||||
]
|
||||
|
||||
context_ids = []
|
||||
for context in contexts:
|
||||
context_id = context_db.store_context(context)
|
||||
context_ids.append(context_id)
|
||||
|
||||
# Build embeddings for all contexts
|
||||
embedding_manager.build_embeddings_index()
|
||||
|
||||
# Test hierarchical search
|
||||
results = embedding_manager.semantic_search_optimized(
|
||||
"HCFS development", path_prefix="/projects", top_k=10, include_contexts=True
|
||||
)
|
||||
|
||||
# Should find HCFS-related contexts under /projects
|
||||
assert len(results) >= 2
|
||||
hcfs_results = [r for r in results if "hcfs" in r.context.path.lower()]
|
||||
assert len(hcfs_results) >= 2
|
||||
|
||||
def test_multi_user_collaboration(self, integrated_system):
|
||||
"""Test multi-user collaboration features."""
|
||||
context_db, embedding_manager, versioning_system = integrated_system
|
||||
|
||||
# Create shared context
|
||||
shared_context = Context(
|
||||
None, "/shared/document",
|
||||
"Shared collaborative document",
|
||||
"Team collaboration",
|
||||
"user1", 1
|
||||
)
|
||||
|
||||
context_id = context_db.store_context(shared_context)
|
||||
|
||||
# User 1 creates initial version
|
||||
v1 = versioning_system.create_version(context_id, "user1", "Initial draft")
|
||||
|
||||
# User 2 makes changes
|
||||
context_db.update_context(
|
||||
context_id,
|
||||
content="Shared collaborative document with user2 contributions",
|
||||
author="user2"
|
||||
)
|
||||
v2 = versioning_system.create_version(context_id, "user2", "Added contributions")
|
||||
|
||||
# User 3 makes changes
|
||||
context_db.update_context(
|
||||
context_id,
|
||||
content="Shared collaborative document with user2 and user3 contributions",
|
||||
author="user3"
|
||||
)
|
||||
v3 = versioning_system.create_version(context_id, "user3", "Final review")
|
||||
|
||||
# Test version history shows all contributors
|
||||
history = versioning_system.get_version_history(context_id)
|
||||
authors = {version.author for version in history}
|
||||
assert authors == {"user1", "user2", "user3"}
|
||||
|
||||
# Test rollback to previous version
|
||||
rollback = versioning_system.rollback_to_version(
|
||||
context_id, v2.version_number, "user1", "Reverting to user2 version"
|
||||
)
|
||||
|
||||
current = context_db.get_context(context_id)
|
||||
assert "user2 contributions" in current.content
|
||||
assert "user3 contributions" not in current.content
|
||||
|
||||
|
||||
class TestPerformanceIntegration:
|
||||
"""Test system performance under integrated load."""
|
||||
|
||||
@pytest.fixture
|
||||
def performance_system(self):
|
||||
"""Create system for performance testing."""
|
||||
temp_dir = Path(tempfile.mkdtemp())
|
||||
db_path = temp_dir / "performance_test.db"
|
||||
vector_db_path = temp_dir / "performance_vectors.db"
|
||||
|
||||
context_db = OptimizedContextDatabase(str(db_path), cache_size=500)
|
||||
embedding_manager = OptimizedEmbeddingManager(
|
||||
context_db,
|
||||
model_name="mini",
|
||||
vector_db_path=str(vector_db_path),
|
||||
cache_size=300,
|
||||
batch_size=16
|
||||
)
|
||||
versioning_system = VersioningSystem(str(db_path))
|
||||
|
||||
yield context_db, embedding_manager, versioning_system
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
def test_large_scale_context_management(self, performance_system):
|
||||
"""Test managing large numbers of contexts."""
|
||||
context_db, embedding_manager, versioning_system = performance_system
|
||||
|
||||
# Create large number of contexts
|
||||
num_contexts = 100
|
||||
contexts = []
|
||||
|
||||
start_time = time.time()
|
||||
for i in range(num_contexts):
|
||||
context = Context(
|
||||
None, f"/large_scale/context_{i}",
|
||||
f"Large scale test context {i} with detailed content about topic {i % 10}",
|
||||
f"Summary for context {i}",
|
||||
f"user_{i % 5}", 1
|
||||
)
|
||||
contexts.append(context)
|
||||
|
||||
# Batch store contexts
|
||||
context_ids = context_db.store_contexts_batch(contexts)
|
||||
storage_time = time.time() - start_time
|
||||
|
||||
assert len(context_ids) == num_contexts
|
||||
print(f"Stored {num_contexts} contexts in {storage_time:.3f}s ({num_contexts/storage_time:.1f} contexts/sec)")
|
||||
|
||||
# Build embeddings index
|
||||
start_time = time.time()
|
||||
index_stats = embedding_manager.build_embeddings_index(batch_size=20)
|
||||
index_time = time.time() - start_time
|
||||
|
||||
assert index_stats["total_processed"] == num_contexts
|
||||
print(f"Built embeddings for {num_contexts} contexts in {index_time:.3f}s")
|
||||
|
||||
# Test search performance
|
||||
search_queries = [
|
||||
"detailed content about topic",
|
||||
"large scale test",
|
||||
"context management",
|
||||
"topic 5 information",
|
||||
"user collaboration"
|
||||
]
|
||||
|
||||
total_search_time = 0
|
||||
for query in search_queries:
|
||||
start_time = time.time()
|
||||
results = embedding_manager.semantic_search_optimized(query, top_k=10)
|
||||
search_time = time.time() - start_time
|
||||
total_search_time += search_time
|
||||
|
||||
assert len(results) > 0
|
||||
|
||||
avg_search_time = total_search_time / len(search_queries)
|
||||
print(f"Average search time: {avg_search_time:.4f}s")
|
||||
assert avg_search_time < 0.1 # Should be under 100ms
|
||||
|
||||
def test_concurrent_system_load(self, performance_system):
|
||||
"""Test system under concurrent load."""
|
||||
context_db, embedding_manager, versioning_system = performance_system
|
||||
|
||||
# Pre-populate with some data
|
||||
base_contexts = [
|
||||
Context(None, f"/concurrent/{i}", f"Base context {i}", f"Summary {i}", "base_user", 1)
|
||||
for i in range(20)
|
||||
]
|
||||
|
||||
for context in base_contexts:
|
||||
context_db.store_context(context)
|
||||
|
||||
embedding_manager.build_embeddings_index()
|
||||
|
||||
def concurrent_worker(worker_id):
|
||||
results = []
|
||||
|
||||
# Each worker performs mixed operations
|
||||
for i in range(5):
|
||||
operation_type = i % 4
|
||||
|
||||
if operation_type == 0: # Create context
|
||||
context = Context(
|
||||
None, f"/worker{worker_id}/context_{i}",
|
||||
f"Worker {worker_id} context {i} with specific content",
|
||||
f"Worker {worker_id} summary {i}",
|
||||
f"worker{worker_id}", 1
|
||||
)
|
||||
context_id = context_db.store_context(context)
|
||||
results.append(("create", context_id))
|
||||
|
||||
elif operation_type == 1: # Search
|
||||
search_results = embedding_manager.semantic_search_optimized(
|
||||
f"worker {worker_id} content", top_k=5
|
||||
)
|
||||
results.append(("search", len(search_results)))
|
||||
|
||||
elif operation_type == 2: # Update context
|
||||
if results: # Only if we have created contexts
|
||||
created_contexts = [r for r in results if r[0] == "create"]
|
||||
if created_contexts:
|
||||
context_id = created_contexts[-1][1]
|
||||
try:
|
||||
context_db.update_context(
|
||||
context_id,
|
||||
content=f"Updated by worker {worker_id} iteration {i}"
|
||||
)
|
||||
results.append(("update", context_id))
|
||||
except:
|
||||
pass # Context might not exist due to concurrency
|
||||
|
||||
elif operation_type == 3: # Hybrid search
|
||||
hybrid_results = embedding_manager.hybrid_search_optimized(
|
||||
f"context {worker_id}", top_k=3
|
||||
)
|
||||
results.append(("hybrid_search", len(hybrid_results)))
|
||||
|
||||
return results
|
||||
|
||||
# Run concurrent workers
|
||||
num_workers = 5
|
||||
start_time = time.time()
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
|
||||
futures = [executor.submit(concurrent_worker, i) for i in range(num_workers)]
|
||||
all_results = [future.result() for future in futures]
|
||||
|
||||
total_time = time.time() - start_time
|
||||
|
||||
# Verify all workers completed successfully
|
||||
assert len(all_results) == num_workers
|
||||
for worker_results in all_results:
|
||||
assert len(worker_results) >= 3 # Should have completed most operations
|
||||
|
||||
# Calculate operation statistics
|
||||
total_operations = sum(len(worker_results) for worker_results in all_results)
|
||||
operations_per_second = total_operations / total_time
|
||||
|
||||
print(f"Completed {total_operations} operations in {total_time:.3f}s ({operations_per_second:.1f} ops/sec)")
|
||||
assert operations_per_second > 10 # Should handle at least 10 operations per second
|
||||
|
||||
def test_memory_usage_under_load(self, performance_system):
|
||||
"""Test memory usage under sustained load."""
|
||||
context_db, embedding_manager, _ = performance_system
|
||||
|
||||
import psutil
|
||||
import os
|
||||
|
||||
process = psutil.Process(os.getpid())
|
||||
initial_memory = process.memory_info().rss / 1024 / 1024 # MB
|
||||
|
||||
# Create contexts in batches and monitor memory
|
||||
batch_size = 50
|
||||
num_batches = 5
|
||||
|
||||
for batch_num in range(num_batches):
|
||||
# Create batch of contexts
|
||||
contexts = [
|
||||
Context(
|
||||
None, f"/memory_test/batch_{batch_num}/context_{i}",
|
||||
f"Memory test context {batch_num}-{i} " + "x" * 100, # Larger content
|
||||
f"Memory summary {batch_num}-{i}",
|
||||
f"memory_user_{batch_num}", 1
|
||||
)
|
||||
for i in range(batch_size)
|
||||
]
|
||||
|
||||
# Store contexts and build embeddings
|
||||
context_ids = context_db.store_contexts_batch(contexts)
|
||||
|
||||
# Generate embeddings in batch
|
||||
contents = [context.content for context in contexts]
|
||||
embeddings = embedding_manager.generate_embeddings_batch(contents)
|
||||
|
||||
# Store embeddings
|
||||
embedding_data = list(zip(context_ids, embeddings))
|
||||
embedding_manager.store_embeddings_batch(embedding_data)
|
||||
|
||||
# Check memory usage
|
||||
current_memory = process.memory_info().rss / 1024 / 1024
|
||||
memory_increase = current_memory - initial_memory
|
||||
|
||||
print(f"Batch {batch_num + 1}: Memory usage: {current_memory:.1f} MB (+{memory_increase:.1f} MB)")
|
||||
|
||||
# Perform some searches to exercise the system
|
||||
for query in [f"memory test batch {batch_num}", "context content"]:
|
||||
results = embedding_manager.semantic_search_optimized(query, top_k=5)
|
||||
assert len(results) >= 0
|
||||
|
||||
final_memory = process.memory_info().rss / 1024 / 1024
|
||||
total_increase = final_memory - initial_memory
|
||||
|
||||
# Memory increase should be reasonable (less than 200MB for this test)
|
||||
print(f"Total memory increase: {total_increase:.1f} MB")
|
||||
assert total_increase < 200, f"Memory usage increased by {total_increase:.1f} MB, which is too much"
|
||||
|
||||
|
||||
class TestAsyncIntegration:
|
||||
"""Test async/Trio integration."""
|
||||
|
||||
@pytest.fixture
|
||||
def async_system(self):
|
||||
"""Create system for async testing."""
|
||||
temp_dir = Path(tempfile.mkdtemp())
|
||||
db_path = temp_dir / "async_test.db"
|
||||
|
||||
# Create async-compatible system
|
||||
context_db = OptimizedContextDatabase(str(db_path))
|
||||
trio_db = TrioContextDatabase(context_db)
|
||||
|
||||
yield trio_db
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
def test_trio_database_operations(self, async_system):
|
||||
"""Test Trio async database operations."""
|
||||
import trio
|
||||
|
||||
async def async_test():
|
||||
trio_db = async_system
|
||||
|
||||
# Test async context storage
|
||||
context = Context(
|
||||
None, "/async/test",
|
||||
"Async test content",
|
||||
"Async summary",
|
||||
"async_user", 1
|
||||
)
|
||||
|
||||
context_id = await trio_db.store_context(context)
|
||||
assert context_id is not None
|
||||
|
||||
# Test async retrieval
|
||||
retrieved = await trio_db.get_context(context_id)
|
||||
assert retrieved is not None
|
||||
assert retrieved.content == context.content
|
||||
|
||||
# Test async search
|
||||
results = await trio_db.search_contexts("async test")
|
||||
assert len(results) > 0
|
||||
|
||||
# Test async update
|
||||
await trio_db.update_context(context_id, content="Updated async content")
|
||||
|
||||
updated = await trio_db.get_context(context_id)
|
||||
assert updated.content == "Updated async content"
|
||||
|
||||
return "Success"
|
||||
|
||||
# Run async test
|
||||
result = trio.run(async_test)
|
||||
assert result == "Success"
|
||||
|
||||
def test_concurrent_async_operations(self, async_system):
|
||||
"""Test concurrent async operations."""
|
||||
import trio
|
||||
|
||||
async def async_concurrent_test():
|
||||
trio_db = async_system
|
||||
|
||||
async def async_worker(worker_id):
|
||||
results = []
|
||||
for i in range(3):
|
||||
context = Context(
|
||||
None, f"/async_concurrent/{worker_id}/{i}",
|
||||
f"Async worker {worker_id} content {i}",
|
||||
f"Async summary {worker_id}-{i}",
|
||||
f"async_worker_{worker_id}", 1
|
||||
)
|
||||
|
||||
context_id = await trio_db.store_context(context)
|
||||
results.append(context_id)
|
||||
|
||||
return results
|
||||
|
||||
# Run multiple async workers concurrently
|
||||
async with trio.open_nursery() as nursery:
|
||||
results = []
|
||||
|
||||
for worker_id in range(3):
|
||||
nursery.start_soon(async_worker, worker_id)
|
||||
|
||||
return "Concurrent async operations completed"
|
||||
|
||||
result = trio.run(async_concurrent_test)
|
||||
assert "completed" in result
|
||||
|
||||
|
||||
class TestErrorHandlingIntegration:
|
||||
"""Test error handling across integrated components."""
|
||||
|
||||
@pytest.fixture
|
||||
def error_test_system(self):
|
||||
"""Create system for error testing."""
|
||||
temp_dir = Path(tempfile.mkdtemp())
|
||||
db_path = temp_dir / "error_test.db"
|
||||
vector_db_path = temp_dir / "error_vectors.db"
|
||||
|
||||
context_db = OptimizedContextDatabase(str(db_path))
|
||||
embedding_manager = OptimizedEmbeddingManager(
|
||||
context_db,
|
||||
model_name="mini",
|
||||
vector_db_path=str(vector_db_path)
|
||||
)
|
||||
versioning_system = VersioningSystem(str(db_path))
|
||||
|
||||
yield context_db, embedding_manager, versioning_system
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
def test_database_corruption_recovery(self, error_test_system):
|
||||
"""Test recovery from database issues."""
|
||||
context_db, embedding_manager, versioning_system = error_test_system
|
||||
|
||||
# Create some valid data first
|
||||
context = Context(
|
||||
None, "/error_test/valid",
|
||||
"Valid test content",
|
||||
"Valid summary",
|
||||
"test_user", 1
|
||||
)
|
||||
|
||||
context_id = context_db.store_context(context)
|
||||
assert context_id is not None
|
||||
|
||||
# Test handling of invalid operations
|
||||
with pytest.raises((ValueError, AttributeError, TypeError)):
|
||||
# Try to store invalid context
|
||||
invalid_context = None
|
||||
context_db.store_context(invalid_context)
|
||||
|
||||
# Verify original data is still intact
|
||||
retrieved = context_db.get_context(context_id)
|
||||
assert retrieved is not None
|
||||
assert retrieved.content == "Valid test content"
|
||||
|
||||
def test_embedding_generation_errors(self, error_test_system):
|
||||
"""Test embedding generation error handling."""
|
||||
_, embedding_manager, _ = error_test_system
|
||||
|
||||
# Test with empty content
|
||||
try:
|
||||
embedding = embedding_manager.generate_embedding("")
|
||||
# Empty string should still generate an embedding
|
||||
assert embedding is not None
|
||||
except Exception as e:
|
||||
# If it fails, it should fail gracefully
|
||||
assert isinstance(e, (ValueError, RuntimeError))
|
||||
|
||||
# Test with very long content
|
||||
very_long_text = "x" * 10000
|
||||
embedding = embedding_manager.generate_embedding(very_long_text)
|
||||
assert embedding is not None
|
||||
assert embedding.shape == (384,)
|
||||
|
||||
def test_concurrent_error_isolation(self, error_test_system):
|
||||
"""Test that errors in one thread don't affect others."""
|
||||
context_db, embedding_manager, _ = error_test_system
|
||||
|
||||
def worker_with_error(worker_id):
|
||||
try:
|
||||
if worker_id == 1: # One worker will fail
|
||||
# Try invalid operation
|
||||
context_db.get_context(-1) # Invalid ID
|
||||
return "error_worker_failed"
|
||||
else:
|
||||
# Other workers do valid operations
|
||||
context = Context(
|
||||
None, f"/error_isolation/{worker_id}",
|
||||
f"Valid content {worker_id}",
|
||||
f"Summary {worker_id}",
|
||||
f"user{worker_id}", 1
|
||||
)
|
||||
context_id = context_db.store_context(context)
|
||||
return f"success_{context_id}"
|
||||
except Exception as e:
|
||||
return f"error_{type(e).__name__}"
|
||||
|
||||
# Run workers concurrently
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
|
||||
futures = [executor.submit(worker_with_error, i) for i in range(3)]
|
||||
results = [future.result() for future in futures]
|
||||
|
||||
# Check that some workers succeeded despite one failing
|
||||
success_count = sum(1 for r in results if r.startswith("success_"))
|
||||
error_count = sum(1 for r in results if r.startswith("error_"))
|
||||
|
||||
assert success_count >= 1, "At least one worker should have succeeded"
|
||||
assert error_count >= 1, "At least one worker should have failed"
|
||||
|
||||
|
||||
def run_integration_tests():
|
||||
"""Run all integration tests."""
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
try:
|
||||
# Run pytest on this module
|
||||
result = subprocess.run([
|
||||
sys.executable, "-m", "pytest", __file__, "-v", "--tb=short", "-x"
|
||||
], capture_output=True, text=True, cwd=Path(__file__).parent.parent)
|
||||
|
||||
print("INTEGRATION TEST RESULTS")
|
||||
print("=" * 50)
|
||||
print(result.stdout)
|
||||
|
||||
if result.stderr:
|
||||
print("ERRORS:")
|
||||
print(result.stderr)
|
||||
|
||||
return result.returncode == 0
|
||||
|
||||
except Exception as e:
|
||||
print(f"Failed to run tests: {e}")
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = run_integration_tests()
|
||||
exit(0 if success else 1)
|
||||
Reference in New Issue
Block a user