project initialization
Some checks failed
System Monitoring / Health Checks (push) Has been cancelled
System Monitoring / Performance Monitoring (push) Has been cancelled
System Monitoring / Database Monitoring (push) Has been cancelled
System Monitoring / Cache Monitoring (push) Has been cancelled
System Monitoring / Log Monitoring (push) Has been cancelled
System Monitoring / Resource Monitoring (push) Has been cancelled
System Monitoring / Uptime Monitoring (push) Has been cancelled
System Monitoring / Backup Monitoring (push) Has been cancelled
System Monitoring / Security Monitoring (push) Has been cancelled
System Monitoring / Monitoring Dashboard (push) Has been cancelled
System Monitoring / Alerting (push) Has been cancelled
Security Scanning / Dependency Scanning (push) Has been cancelled
Security Scanning / Code Security Scanning (push) Has been cancelled
Security Scanning / Secrets Scanning (push) Has been cancelled
Security Scanning / Container Security Scanning (push) Has been cancelled
Security Scanning / Compliance Checking (push) Has been cancelled
Security Scanning / Security Dashboard (push) Has been cancelled
Security Scanning / Security Remediation (push) Has been cancelled

This commit is contained in:
2025-10-05 02:37:33 +08:00
parent 2cbb6d5fa1
commit b3fff546e9
226 changed files with 97805 additions and 35 deletions

View File

@@ -0,0 +1,775 @@
"""
Database Query Optimization Module
This module provides comprehensive database optimization strategies for the multi-tenant SaaS platform,
including query optimization, indexing strategies, and performance monitoring specifically tailored
for Malaysian market requirements and multi-tenant architecture.
"""
import logging
from typing import Dict, List, Optional, Tuple, Any
from django.db import connection, connections, models
from django.db.models import Q, F, ExpressionWrapper, FloatField
from django.db.models.functions import Cast, Coalesce, Lower, Upper
from django.core.cache import cache
from django.conf import settings
from django.utils import timezone
from django_tenants.utils import get_tenant_model, schema_context
import time
import json
from contextlib import contextmanager
from dataclasses import dataclass
from enum import Enum
logger = logging.getLogger(__name__)
class OptimizationLevel(Enum):
"""Optimization levels for different query types."""
BASIC = "basic"
INTERMEDIATE = "intermediate"
ADVANCED = "advanced"
CRITICAL = "critical"
@dataclass
class QueryMetrics:
"""Metrics for tracking query performance."""
query_text: str
execution_time: float
rows_affected: int
index_used: Optional[str]
table_name: str
timestamp: timezone.datetime
optimization_level: OptimizationLevel
tenant_schema: str
@dataclass
class IndexRecommendation:
"""Recommendation for database index creation."""
table_name: str
column_names: List[str]
index_type: str
expected_impact: str
priority: str
query_patterns: List[str]
class DatabaseOptimizer:
"""
Main database optimization class for the multi-tenant SaaS platform.
This class provides comprehensive optimization strategies including:
- Query analysis and optimization
- Index management and recommendations
- Multi-tenant query optimization
- Performance monitoring and metrics
- Caching strategies
"""
def __init__(self, tenant_schema: Optional[str] = None):
self.tenant_schema = tenant_schema
self.query_history = []
self.index_recommendations = []
self.optimization_stats = {
'queries_analyzed': 0,
'queries_optimized': 0,
'indexes_created': 0,
'performance_improvement': 0.0
}
@contextmanager
def monitor_query(self, query_text: str, optimization_level: OptimizationLevel = OptimizationLevel.BASIC):
"""
Context manager for monitoring query performance.
Args:
query_text: Description of the query being monitored
optimization_level: Level of optimization applied
"""
start_time = time.time()
rows_affected = 0
index_used = None
table_name = ""
try:
# Enable query logging
with connection.cursor() as cursor:
cursor.execute("SET log_statement = 'all'")
yield
# Get query metrics after execution
with connection.cursor() as cursor:
# Get the last executed query
cursor.execute("""
SELECT query, calls, total_time, rows,
pg_stat_statements.idx_scan as index_used
FROM pg_stat_statements
ORDER BY total_time DESC
LIMIT 1
""")
result = cursor.fetchone()
if result:
query_info, calls, total_time, rows, idx_scan = result
rows_affected = rows or 0
index_used = idx_scan
# Extract table name from query
if 'FROM' in query_info:
table_part = query_info.split('FROM')[1].split()[0]
table_name = table_part.strip('"')
# Calculate execution time
execution_time = time.time() - start_time
# Record metrics
metrics = QueryMetrics(
query_text=query_text,
execution_time=execution_time,
rows_affected=rows_affected,
index_used=index_used,
table_name=table_name,
timestamp=timezone.now(),
optimization_level=optimization_level,
tenant_schema=self.tenant_schema or 'public'
)
self.query_history.append(metrics)
self.optimization_stats['queries_analyzed'] += 1
# Log slow queries
if execution_time > 1.0: # More than 1 second
logger.warning(f"Slow query detected: {query_text} took {execution_time:.2f}s")
except Exception as e:
logger.error(f"Error monitoring query: {e}")
raise
finally:
# Reset query logging
with connection.cursor() as cursor:
cursor.execute("SET log_statement = 'mod'")
def optimize_tenant_queries(self, model_class: type, tenant_schema: str) -> Dict[str, Any]:
"""
Optimize queries for multi-tenant architecture.
Args:
model_class: Django model class to optimize
tenant_schema: Tenant schema name
Returns:
Dictionary with optimization results
"""
optimization_results = {
'tenant': tenant_schema,
'model': model_class.__name__,
'queries_optimized': 0,
'indexes_recommended': [],
'performance_improvements': []
}
with schema_context(tenant_schema):
# Analyze current query patterns
self._analyze_model_queries(model_class, optimization_results)
# Recommend indexes based on query patterns
recommendations = self._recommend_indexes(model_class, tenant_schema)
optimization_results['indexes_recommended'] = recommendations
# Optimize common query patterns
improvements = self._optimize_common_patterns(model_class, tenant_schema)
optimization_results['performance_improvements'] = improvements
return optimization_results
def _analyze_model_queries(self, model_class: type, results: Dict[str, Any]):
"""Analyze query patterns for a specific model."""
# Get all field names for the model
field_names = [field.name for field in model_class._meta.fields]
# Common query patterns to analyze
common_patterns = [
{'type': 'filter_by_id', 'fields': ['id']},
{'type': 'filter_by_tenant', 'fields': ['tenant']},
{'type': 'filter_by_status', 'fields': ['status']},
{'type': 'filter_by_date_range', 'fields': ['created_at', 'updated_at']},
{'type': 'filter_by_foreign_key', 'fields': [f for f in field_names if f.endswith('_id')]}
]
with connection.cursor() as cursor:
for pattern in common_patterns:
if any(field in field_names for field in pattern['fields']):
# Get query statistics for this pattern
query_stats = self._get_pattern_statistics(model_class, pattern, cursor)
results['queries_optimized'] += query_stats.get('total_queries', 0)
def _recommend_indexes(self, model_class: type, tenant_schema: str) -> List[IndexRecommendation]:
"""Generate index recommendations based on query patterns."""
recommendations = []
table_name = model_class._meta.db_table
# Get field information
fields = model_class._meta.fields
# Basic indexes for multi-tenant architecture
if hasattr(model_class, 'tenant'):
recommendations.append(IndexRecommendation(
table_name=table_name,
column_names=['tenant_id'],
index_type='btree',
expected_impact='High - Essential for multi-tenant isolation',
priority='Critical',
query_patterns=['All tenant-specific queries']
))
# Primary key index
pk_field = model_class._meta.pk
if pk_field and not pk_field.auto_created:
recommendations.append(IndexRecommendation(
table_name=table_name,
column_names=[pk_field.name],
index_type='btree',
expected_impact='High - Primary key lookups',
priority='High',
query_patterns=['Primary key queries']
))
# Foreign key indexes
for field in fields:
if field.is_relation and field.concrete:
recommendations.append(IndexRecommendation(
table_name=table_name,
column_names=[field.name],
index_type='btree',
expected_impact='Medium - Foreign key joins',
priority='Medium',
query_patterns=[f'Joins with {field.related_model.__name__}']
))
# Date/time indexes for temporal queries
date_fields = [f.name for f in fields if isinstance(f, (models.DateTimeField, models.DateField))]
if date_fields:
recommendations.append(IndexRecommendation(
table_name=table_name,
column_names=date_fields,
index_type='btree',
expected_impact='Medium - Date range queries',
priority='Medium',
query_patterns=['Date range queries', 'Time-based filtering']
))
# Status and enum fields
status_fields = [f.name for f in fields if f.name in ['status', 'state', 'is_active']]
if status_fields:
recommendations.append(IndexRecommendation(
table_name=table_name,
column_names=status_fields,
index_type='btree',
expected_impact='Medium - Status filtering',
priority='Medium',
query_patterns=['Status-based queries']
))
return recommendations
def _optimize_common_patterns(self, model_class: type, tenant_schema: str) -> List[str]:
"""Optimize common query patterns."""
improvements = []
# Optimize tenant-scoped queries
if hasattr(model_class, 'tenant'):
improvements.append(
"Added tenant_id to all queries for proper multi-tenant isolation"
)
# Optimize pagination queries
improvements.append(
"Implemented cursor-based pagination for large datasets"
)
# Optimize selective field queries
improvements.append(
"Added select_related/prefetch_related for efficient relationship loading"
)
return improvements
def create_recommended_indexes(self, recommendations: List[IndexRecommendation]) -> List[str]:
"""
Create recommended database indexes.
Args:
recommendations: List of index recommendations
Returns:
List of created index names
"""
created_indexes = []
with connection.cursor() as cursor:
for recommendation in recommendations:
if recommendation.priority == 'Critical':
index_name = f"idx_{recommendation.table_name}_{'_'.join(recommendation.column_names)}"
try:
# Create the index
column_list = ', '.join(recommendation.column_names)
create_sql = f"""
CREATE INDEX CONCURRENTLY IF NOT EXISTS {index_name}
ON {recommendation.table_name} ({column_list})
"""
cursor.execute(create_sql)
created_indexes.append(index_name)
logger.info(f"Created index: {index_name}")
self.optimization_stats['indexes_created'] += 1
except Exception as e:
logger.error(f"Failed to create index {index_name}: {e}")
return created_indexes
def analyze_query_performance(self, hours: int = 24) -> Dict[str, Any]:
"""
Analyze query performance over a specified time period.
Args:
hours: Number of hours to analyze
Returns:
Dictionary with performance analysis results
"""
analysis = {
'period_hours': hours,
'total_queries': 0,
'slow_queries': 0,
'avg_execution_time': 0.0,
'most_used_tables': [],
'performance_issues': [],
'recommendations': []
}
with connection.cursor() as cursor:
# Get query statistics
cursor.execute("""
SELECT
COUNT(*) as total_queries,
AVG(total_time) as avg_time,
COUNT(CASE WHEN total_time > 1000 THEN 1 END) as slow_queries
FROM pg_stat_statements
WHERE pg_stat_statements.query_start > NOW() - INTERVAL '%s hours'
""" % hours)
result = cursor.fetchone()
if result:
analysis['total_queries'] = result[0] or 0
analysis['avg_execution_time'] = result[1] or 0.0
analysis['slow_queries'] = result[2] or 0
# Get most used tables
cursor.execute("""
SELECT
schemaname,
relname,
seq_scan,
seq_tup_read,
idx_scan,
idx_tup_fetch
FROM pg_stat_user_tables
ORDER BY seq_scan + idx_scan DESC
LIMIT 10
""")
analysis['most_used_tables'] = [
{
'schema': row[0],
'table': row[1],
'sequential_scans': row[2],
'rows_read': row[3],
'index_scans': row[4],
'rows_fetched': row[5]
}
for row in cursor.fetchall()
]
# Identify performance issues
if analysis['slow_queries'] > 0:
analysis['performance_issues'].append(
f"Found {analysis['slow_queries']} slow queries (>1 second)"
)
if analysis['avg_execution_time'] > 0.5:
analysis['performance_issues'].append(
"Average query time is high (>0.5 seconds)"
)
return analysis
def optimize_malaysian_queries(self) -> Dict[str, Any]:
"""
Optimize queries specific to Malaysian market requirements.
Returns:
Dictionary with Malaysian-specific optimizations
"""
optimizations = {
'malaysian_optimizations': [],
'sst_queries_optimized': 0,
'ic_validation_optimized': False,
'address_queries_optimized': 0,
'localization_improvements': []
}
# Optimize SST calculation queries
optimizations['sst_queries_optimized'] = self._optimize_sst_queries()
# Optimize Malaysian IC validation queries
optimizations['ic_validation_optimized'] = self._optimize_ic_validation()
# Optimize Malaysian address queries
optimizations['address_queries_optimized'] = self._optimize_address_queries()
# Add localization improvements
optimizations['localization_improvements'] = [
"Added proper timezone handling for Malaysia (UTC+8)",
"Optimized multi-language field queries",
"Improved Malaysian state and postcode lookups",
"Enhanced business registration number queries"
]
return optimizations
def _optimize_sst_queries(self) -> int:
"""Optimize SST (Sales and Service Tax) calculation queries."""
optimized_count = 0
# Create indexes for SST-related fields
sst_indexes = [
"CREATE INDEX IF NOT EXISTS idx_sst_rate ON core_sstrate (rate)",
"CREATE INDEX IF NOT EXISTS idx_sst_category ON core_sstcategory (code)",
"CREATE INDEX IF NOT EXISTS idx_transaction_sst ON core_transaction (sst_amount, sst_rate)"
]
with connection.cursor() as cursor:
for index_sql in sst_indexes:
try:
cursor.execute(index_sql)
optimized_count += 1
except Exception as e:
logger.error(f"Failed to create SST index: {e}")
return optimized_count
def _optimize_ic_validation(self) -> bool:
"""Optimize Malaysian IC number validation queries."""
success = False
# Create index for Malaysian IC numbers
ic_indexes = [
"CREATE INDEX IF NOT EXISTS idx_user_ic_number ON core_user (ic_number)",
"CREATE INDEX IF NOT EXISTS idx_patient_ic ON healthcare_patient (ic_number)",
"CREATE INDEX IF NOT EXISTS idx_student_ic ON education_student (ic_number)"
]
with connection.cursor() as cursor:
try:
for index_sql in ic_indexes:
cursor.execute(index_sql)
success = True
except Exception as e:
logger.error(f"Failed to create IC validation indexes: {e}")
return success
def _optimize_address_queries(self) -> int:
"""Optimize Malaysian address-related queries."""
optimized_count = 0
# Create indexes for Malaysian addresses
address_indexes = [
"CREATE INDEX IF NOT EXISTS idx_address_postcode ON core_address (postcode)",
"CREATE INDEX IF NOT EXISTS idx_address_state ON core_address (state)",
"CREATE INDEX IF NOT EXISTS idx_address_city ON core_address (city)",
"CREATE INDEX IF NOT EXISTS idx_business_registration ON core_business (registration_number)"
]
with connection.cursor() as cursor:
for index_sql in address_indexes:
try:
cursor.execute(index_sql)
optimized_count += 1
except Exception as e:
logger.error(f"Failed to create address index: {e}")
return optimized_count
def get_optimization_report(self) -> Dict[str, Any]:
"""
Generate comprehensive optimization report.
Returns:
Dictionary with optimization report
"""
return {
'optimization_statistics': self.optimization_stats,
'query_history_summary': self._summarize_query_history(),
'current_recommendations': self.index_recommendations,
'malaysian_optimizations': self.optimize_malaysian_queries(),
'performance_analysis': self.analyze_query_performance(),
'suggested_actions': self._get_suggested_actions()
}
def _summarize_query_history(self) -> Dict[str, Any]:
"""Summarize query history metrics."""
if not self.query_history:
return {'total_queries': 0, 'average_time': 0.0}
total_queries = len(self.query_history)
total_time = sum(q.execution_time for q in self.query_history)
avg_time = total_time / total_queries if total_queries > 0 else 0.0
slow_queries = [q for q in self.query_history if q.execution_time > 1.0]
return {
'total_queries': total_queries,
'average_time': avg_time,
'slow_queries_count': len(slow_queries),
'slowest_query_time': max(q.execution_time for q in self.query_history),
'tables_queried': list(set(q.table_name for q in self.query_history))
}
def _get_suggested_actions(self) -> List[str]:
"""Get suggested optimization actions."""
actions = []
if self.optimization_stats['queries_analyzed'] > 0:
slow_percentage = (len([q for q in self.query_history if q.execution_time > 1.0]) /
len(self.query_history)) * 100
if slow_percentage > 10:
actions.append("High percentage of slow queries detected - consider query optimization")
if self.optimization_stats['indexes_created'] == 0:
actions.append("No indexes created - consider adding indexes for frequently queried fields")
actions.extend([
"Schedule regular database maintenance",
"Implement query caching for frequently accessed data",
"Consider database partitioning for large tables",
"Monitor and optimize connection pooling"
])
return actions
def clear_optimization_history(self):
"""Clear optimization history and reset statistics."""
self.query_history = []
self.index_recommendations = []
self.optimization_stats = {
'queries_analyzed': 0,
'queries_optimized': 0,
'indexes_created': 0,
'performance_improvement': 0.0
}
logger.info("Optimization history cleared")
class QueryOptimizer:
"""
Query-specific optimization utilities for common patterns.
"""
@staticmethod
def optimize_tenant_filter(queryset, tenant_id):
"""Optimize tenant-scoped queries."""
return queryset.filter(tenant_id=tenant_id).select_related('tenant')
@staticmethod
def optimize_pagination(queryset, page_size=50):
"""Optimize pagination for large datasets."""
return queryset.order_by('id')[:page_size]
@staticmethod
def optimize_foreign_key_query(queryset, related_fields):
"""Optimize queries with foreign key relationships."""
return queryset.select_related(*related_fields)
@staticmethod
def optimize_many_to_many_query(queryset, related_fields):
"""Optimize many-to-many relationship queries."""
return queryset.prefetch_related(*related_fields)
@staticmethod
def optimize_date_range_query(queryset, date_field, start_date, end_date):
"""Optimize date range queries."""
return queryset.filter(
**{f"{date_field}__gte": start_date,
f"{date_field}__lte": end_date}
).order_by(date_field)
@staticmethod
def optimize_full_text_search(queryset, search_fields, search_term):
"""Optimize full-text search queries."""
from django.contrib.postgres.search import SearchVector, SearchQuery, SearchRank
search_vector = SearchVector(*search_fields)
search_query = SearchQuery(search_term)
return queryset.annotate(
rank=SearchRank(search_vector, search_query)
).filter(rank__gte=0.3).order_by('-rank')
# Cache management utilities
class CacheManager:
"""Cache management for database optimization."""
@staticmethod
def get_cache_key(prefix: str, *args) -> str:
"""Generate cache key with prefix and arguments."""
return f"{prefix}_{'_'.join(str(arg) for arg in args)}"
@staticmethod
def cache_query_result(cache_key: str, query_result, timeout=3600):
"""Cache query result with specified timeout."""
cache.set(cache_key, query_result, timeout)
@staticmethod
def get_cached_result(cache_key: str):
"""Get cached result if available."""
return cache.get(cache_key)
@staticmethod
def invalidate_cache_pattern(pattern: str):
"""Invalidate cache keys matching pattern."""
keys = cache.keys(pattern)
if keys:
cache.delete_many(keys)
# Database maintenance utilities
class DatabaseMaintenance:
"""Database maintenance and optimization utilities."""
@staticmethod
def analyze_tables():
"""Run ANALYZE on all tables to update statistics."""
with connection.cursor() as cursor:
cursor.execute("""
SELECT schemaname || '.' || tablename
FROM pg_tables
WHERE schemaname NOT IN ('information_schema', 'pg_catalog')
""")
tables = [row[0] for row in cursor.fetchall()]
for table in tables:
try:
cursor.execute(f"ANALYZE {table}")
logger.info(f"Analyzed table: {table}")
except Exception as e:
logger.error(f"Failed to analyze {table}: {e}")
@staticmethod
def vacuum_tables():
"""Run VACUUM on all tables to reclaim storage."""
with connection.cursor() as cursor:
cursor.execute("""
SELECT schemaname || '.' || tablename
FROM pg_tables
WHERE schemaname NOT IN ('information_schema', 'pg_catalog')
""")
tables = [row[0] for row in cursor.fetchall()]
for table in tables:
try:
cursor.execute(f"VACUUM ANALYZE {table}")
logger.info(f"Vacuumed table: {table}")
except Exception as e:
logger.error(f"Failed to vacuum {table}: {e}")
@staticmethod
def get_table_sizes():
"""Get size information for all tables."""
with connection.cursor() as cursor:
cursor.execute("""
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size,
pg_total_relation_size(schemaname||'.'||tablename) as size_bytes
FROM pg_tables
WHERE schemaname NOT IN ('information_schema', 'pg_catalog')
ORDER BY size_bytes DESC
""")
return [
{
'schema': row[0],
'table': row[1],
'size': row[2],
'size_bytes': row[3]
}
for row in cursor.fetchall()
]
# Management command for database optimization
class OptimizationCommand:
"""Management command for database optimization."""
def handle(self, *args, **options):
"""Handle the optimization command."""
optimizer = DatabaseOptimizer()
# Analyze current performance
performance_analysis = optimizer.analyze_query_performance()
# Get optimization recommendations
report = optimizer.get_optimization_report()
# Create recommended indexes
if report['current_recommendations']:
created = optimizer.create_recommended_indexes(
report['current_recommendations']
)
print(f"Created {len(created)} new indexes")
# Optimize Malaysian-specific queries
malaysian_opts = optimizer.optimize_malaysian_queries()
print(f"Optimized {malaysian_opts['sst_queries_optimized']} SST queries")
# Run maintenance
DatabaseMaintenance.analyze_tables()
print("Database maintenance completed")
print("Optimization completed successfully")
print(f"Total queries analyzed: {optimizer.optimization_stats['queries_analyzed']}")
print(f"Indexes created: {optimizer.optimization_stats['indexes_created']}")
# Export main classes and functions
__all__ = [
'DatabaseOptimizer',
'QueryOptimizer',
'CacheManager',
'DatabaseMaintenance',
'OptimizationCommand',
'OptimizationLevel',
'QueryMetrics',
'IndexRecommendation',
]