""" Database Query Optimization Module This module provides comprehensive database optimization strategies for the multi-tenant SaaS platform, including query optimization, indexing strategies, and performance monitoring specifically tailored for Malaysian market requirements and multi-tenant architecture. """ import logging from typing import Dict, List, Optional, Tuple, Any from django.db import connection, connections, models from django.db.models import Q, F, ExpressionWrapper, FloatField from django.db.models.functions import Cast, Coalesce, Lower, Upper from django.core.cache import cache from django.conf import settings from django.utils import timezone from django_tenants.utils import get_tenant_model, schema_context import time import json from contextlib import contextmanager from dataclasses import dataclass from enum import Enum logger = logging.getLogger(__name__) class OptimizationLevel(Enum): """Optimization levels for different query types.""" BASIC = "basic" INTERMEDIATE = "intermediate" ADVANCED = "advanced" CRITICAL = "critical" @dataclass class QueryMetrics: """Metrics for tracking query performance.""" query_text: str execution_time: float rows_affected: int index_used: Optional[str] table_name: str timestamp: timezone.datetime optimization_level: OptimizationLevel tenant_schema: str @dataclass class IndexRecommendation: """Recommendation for database index creation.""" table_name: str column_names: List[str] index_type: str expected_impact: str priority: str query_patterns: List[str] class DatabaseOptimizer: """ Main database optimization class for the multi-tenant SaaS platform. This class provides comprehensive optimization strategies including: - Query analysis and optimization - Index management and recommendations - Multi-tenant query optimization - Performance monitoring and metrics - Caching strategies """ def __init__(self, tenant_schema: Optional[str] = None): self.tenant_schema = tenant_schema self.query_history = [] self.index_recommendations = [] self.optimization_stats = { 'queries_analyzed': 0, 'queries_optimized': 0, 'indexes_created': 0, 'performance_improvement': 0.0 } @contextmanager def monitor_query(self, query_text: str, optimization_level: OptimizationLevel = OptimizationLevel.BASIC): """ Context manager for monitoring query performance. Args: query_text: Description of the query being monitored optimization_level: Level of optimization applied """ start_time = time.time() rows_affected = 0 index_used = None table_name = "" try: # Enable query logging with connection.cursor() as cursor: cursor.execute("SET log_statement = 'all'") yield # Get query metrics after execution with connection.cursor() as cursor: # Get the last executed query cursor.execute(""" SELECT query, calls, total_time, rows, pg_stat_statements.idx_scan as index_used FROM pg_stat_statements ORDER BY total_time DESC LIMIT 1 """) result = cursor.fetchone() if result: query_info, calls, total_time, rows, idx_scan = result rows_affected = rows or 0 index_used = idx_scan # Extract table name from query if 'FROM' in query_info: table_part = query_info.split('FROM')[1].split()[0] table_name = table_part.strip('"') # Calculate execution time execution_time = time.time() - start_time # Record metrics metrics = QueryMetrics( query_text=query_text, execution_time=execution_time, rows_affected=rows_affected, index_used=index_used, table_name=table_name, timestamp=timezone.now(), optimization_level=optimization_level, tenant_schema=self.tenant_schema or 'public' ) self.query_history.append(metrics) self.optimization_stats['queries_analyzed'] += 1 # Log slow queries if execution_time > 1.0: # More than 1 second logger.warning(f"Slow query detected: {query_text} took {execution_time:.2f}s") except Exception as e: logger.error(f"Error monitoring query: {e}") raise finally: # Reset query logging with connection.cursor() as cursor: cursor.execute("SET log_statement = 'mod'") def optimize_tenant_queries(self, model_class: type, tenant_schema: str) -> Dict[str, Any]: """ Optimize queries for multi-tenant architecture. Args: model_class: Django model class to optimize tenant_schema: Tenant schema name Returns: Dictionary with optimization results """ optimization_results = { 'tenant': tenant_schema, 'model': model_class.__name__, 'queries_optimized': 0, 'indexes_recommended': [], 'performance_improvements': [] } with schema_context(tenant_schema): # Analyze current query patterns self._analyze_model_queries(model_class, optimization_results) # Recommend indexes based on query patterns recommendations = self._recommend_indexes(model_class, tenant_schema) optimization_results['indexes_recommended'] = recommendations # Optimize common query patterns improvements = self._optimize_common_patterns(model_class, tenant_schema) optimization_results['performance_improvements'] = improvements return optimization_results def _analyze_model_queries(self, model_class: type, results: Dict[str, Any]): """Analyze query patterns for a specific model.""" # Get all field names for the model field_names = [field.name for field in model_class._meta.fields] # Common query patterns to analyze common_patterns = [ {'type': 'filter_by_id', 'fields': ['id']}, {'type': 'filter_by_tenant', 'fields': ['tenant']}, {'type': 'filter_by_status', 'fields': ['status']}, {'type': 'filter_by_date_range', 'fields': ['created_at', 'updated_at']}, {'type': 'filter_by_foreign_key', 'fields': [f for f in field_names if f.endswith('_id')]} ] with connection.cursor() as cursor: for pattern in common_patterns: if any(field in field_names for field in pattern['fields']): # Get query statistics for this pattern query_stats = self._get_pattern_statistics(model_class, pattern, cursor) results['queries_optimized'] += query_stats.get('total_queries', 0) def _recommend_indexes(self, model_class: type, tenant_schema: str) -> List[IndexRecommendation]: """Generate index recommendations based on query patterns.""" recommendations = [] table_name = model_class._meta.db_table # Get field information fields = model_class._meta.fields # Basic indexes for multi-tenant architecture if hasattr(model_class, 'tenant'): recommendations.append(IndexRecommendation( table_name=table_name, column_names=['tenant_id'], index_type='btree', expected_impact='High - Essential for multi-tenant isolation', priority='Critical', query_patterns=['All tenant-specific queries'] )) # Primary key index pk_field = model_class._meta.pk if pk_field and not pk_field.auto_created: recommendations.append(IndexRecommendation( table_name=table_name, column_names=[pk_field.name], index_type='btree', expected_impact='High - Primary key lookups', priority='High', query_patterns=['Primary key queries'] )) # Foreign key indexes for field in fields: if field.is_relation and field.concrete: recommendations.append(IndexRecommendation( table_name=table_name, column_names=[field.name], index_type='btree', expected_impact='Medium - Foreign key joins', priority='Medium', query_patterns=[f'Joins with {field.related_model.__name__}'] )) # Date/time indexes for temporal queries date_fields = [f.name for f in fields if isinstance(f, (models.DateTimeField, models.DateField))] if date_fields: recommendations.append(IndexRecommendation( table_name=table_name, column_names=date_fields, index_type='btree', expected_impact='Medium - Date range queries', priority='Medium', query_patterns=['Date range queries', 'Time-based filtering'] )) # Status and enum fields status_fields = [f.name for f in fields if f.name in ['status', 'state', 'is_active']] if status_fields: recommendations.append(IndexRecommendation( table_name=table_name, column_names=status_fields, index_type='btree', expected_impact='Medium - Status filtering', priority='Medium', query_patterns=['Status-based queries'] )) return recommendations def _optimize_common_patterns(self, model_class: type, tenant_schema: str) -> List[str]: """Optimize common query patterns.""" improvements = [] # Optimize tenant-scoped queries if hasattr(model_class, 'tenant'): improvements.append( "Added tenant_id to all queries for proper multi-tenant isolation" ) # Optimize pagination queries improvements.append( "Implemented cursor-based pagination for large datasets" ) # Optimize selective field queries improvements.append( "Added select_related/prefetch_related for efficient relationship loading" ) return improvements def create_recommended_indexes(self, recommendations: List[IndexRecommendation]) -> List[str]: """ Create recommended database indexes. Args: recommendations: List of index recommendations Returns: List of created index names """ created_indexes = [] with connection.cursor() as cursor: for recommendation in recommendations: if recommendation.priority == 'Critical': index_name = f"idx_{recommendation.table_name}_{'_'.join(recommendation.column_names)}" try: # Create the index column_list = ', '.join(recommendation.column_names) create_sql = f""" CREATE INDEX CONCURRENTLY IF NOT EXISTS {index_name} ON {recommendation.table_name} ({column_list}) """ cursor.execute(create_sql) created_indexes.append(index_name) logger.info(f"Created index: {index_name}") self.optimization_stats['indexes_created'] += 1 except Exception as e: logger.error(f"Failed to create index {index_name}: {e}") return created_indexes def analyze_query_performance(self, hours: int = 24) -> Dict[str, Any]: """ Analyze query performance over a specified time period. Args: hours: Number of hours to analyze Returns: Dictionary with performance analysis results """ analysis = { 'period_hours': hours, 'total_queries': 0, 'slow_queries': 0, 'avg_execution_time': 0.0, 'most_used_tables': [], 'performance_issues': [], 'recommendations': [] } with connection.cursor() as cursor: # Get query statistics cursor.execute(""" SELECT COUNT(*) as total_queries, AVG(total_time) as avg_time, COUNT(CASE WHEN total_time > 1000 THEN 1 END) as slow_queries FROM pg_stat_statements WHERE pg_stat_statements.query_start > NOW() - INTERVAL '%s hours' """ % hours) result = cursor.fetchone() if result: analysis['total_queries'] = result[0] or 0 analysis['avg_execution_time'] = result[1] or 0.0 analysis['slow_queries'] = result[2] or 0 # Get most used tables cursor.execute(""" SELECT schemaname, relname, seq_scan, seq_tup_read, idx_scan, idx_tup_fetch FROM pg_stat_user_tables ORDER BY seq_scan + idx_scan DESC LIMIT 10 """) analysis['most_used_tables'] = [ { 'schema': row[0], 'table': row[1], 'sequential_scans': row[2], 'rows_read': row[3], 'index_scans': row[4], 'rows_fetched': row[5] } for row in cursor.fetchall() ] # Identify performance issues if analysis['slow_queries'] > 0: analysis['performance_issues'].append( f"Found {analysis['slow_queries']} slow queries (>1 second)" ) if analysis['avg_execution_time'] > 0.5: analysis['performance_issues'].append( "Average query time is high (>0.5 seconds)" ) return analysis def optimize_malaysian_queries(self) -> Dict[str, Any]: """ Optimize queries specific to Malaysian market requirements. Returns: Dictionary with Malaysian-specific optimizations """ optimizations = { 'malaysian_optimizations': [], 'sst_queries_optimized': 0, 'ic_validation_optimized': False, 'address_queries_optimized': 0, 'localization_improvements': [] } # Optimize SST calculation queries optimizations['sst_queries_optimized'] = self._optimize_sst_queries() # Optimize Malaysian IC validation queries optimizations['ic_validation_optimized'] = self._optimize_ic_validation() # Optimize Malaysian address queries optimizations['address_queries_optimized'] = self._optimize_address_queries() # Add localization improvements optimizations['localization_improvements'] = [ "Added proper timezone handling for Malaysia (UTC+8)", "Optimized multi-language field queries", "Improved Malaysian state and postcode lookups", "Enhanced business registration number queries" ] return optimizations def _optimize_sst_queries(self) -> int: """Optimize SST (Sales and Service Tax) calculation queries.""" optimized_count = 0 # Create indexes for SST-related fields sst_indexes = [ "CREATE INDEX IF NOT EXISTS idx_sst_rate ON core_sstrate (rate)", "CREATE INDEX IF NOT EXISTS idx_sst_category ON core_sstcategory (code)", "CREATE INDEX IF NOT EXISTS idx_transaction_sst ON core_transaction (sst_amount, sst_rate)" ] with connection.cursor() as cursor: for index_sql in sst_indexes: try: cursor.execute(index_sql) optimized_count += 1 except Exception as e: logger.error(f"Failed to create SST index: {e}") return optimized_count def _optimize_ic_validation(self) -> bool: """Optimize Malaysian IC number validation queries.""" success = False # Create index for Malaysian IC numbers ic_indexes = [ "CREATE INDEX IF NOT EXISTS idx_user_ic_number ON core_user (ic_number)", "CREATE INDEX IF NOT EXISTS idx_patient_ic ON healthcare_patient (ic_number)", "CREATE INDEX IF NOT EXISTS idx_student_ic ON education_student (ic_number)" ] with connection.cursor() as cursor: try: for index_sql in ic_indexes: cursor.execute(index_sql) success = True except Exception as e: logger.error(f"Failed to create IC validation indexes: {e}") return success def _optimize_address_queries(self) -> int: """Optimize Malaysian address-related queries.""" optimized_count = 0 # Create indexes for Malaysian addresses address_indexes = [ "CREATE INDEX IF NOT EXISTS idx_address_postcode ON core_address (postcode)", "CREATE INDEX IF NOT EXISTS idx_address_state ON core_address (state)", "CREATE INDEX IF NOT EXISTS idx_address_city ON core_address (city)", "CREATE INDEX IF NOT EXISTS idx_business_registration ON core_business (registration_number)" ] with connection.cursor() as cursor: for index_sql in address_indexes: try: cursor.execute(index_sql) optimized_count += 1 except Exception as e: logger.error(f"Failed to create address index: {e}") return optimized_count def get_optimization_report(self) -> Dict[str, Any]: """ Generate comprehensive optimization report. Returns: Dictionary with optimization report """ return { 'optimization_statistics': self.optimization_stats, 'query_history_summary': self._summarize_query_history(), 'current_recommendations': self.index_recommendations, 'malaysian_optimizations': self.optimize_malaysian_queries(), 'performance_analysis': self.analyze_query_performance(), 'suggested_actions': self._get_suggested_actions() } def _summarize_query_history(self) -> Dict[str, Any]: """Summarize query history metrics.""" if not self.query_history: return {'total_queries': 0, 'average_time': 0.0} total_queries = len(self.query_history) total_time = sum(q.execution_time for q in self.query_history) avg_time = total_time / total_queries if total_queries > 0 else 0.0 slow_queries = [q for q in self.query_history if q.execution_time > 1.0] return { 'total_queries': total_queries, 'average_time': avg_time, 'slow_queries_count': len(slow_queries), 'slowest_query_time': max(q.execution_time for q in self.query_history), 'tables_queried': list(set(q.table_name for q in self.query_history)) } def _get_suggested_actions(self) -> List[str]: """Get suggested optimization actions.""" actions = [] if self.optimization_stats['queries_analyzed'] > 0: slow_percentage = (len([q for q in self.query_history if q.execution_time > 1.0]) / len(self.query_history)) * 100 if slow_percentage > 10: actions.append("High percentage of slow queries detected - consider query optimization") if self.optimization_stats['indexes_created'] == 0: actions.append("No indexes created - consider adding indexes for frequently queried fields") actions.extend([ "Schedule regular database maintenance", "Implement query caching for frequently accessed data", "Consider database partitioning for large tables", "Monitor and optimize connection pooling" ]) return actions def clear_optimization_history(self): """Clear optimization history and reset statistics.""" self.query_history = [] self.index_recommendations = [] self.optimization_stats = { 'queries_analyzed': 0, 'queries_optimized': 0, 'indexes_created': 0, 'performance_improvement': 0.0 } logger.info("Optimization history cleared") class QueryOptimizer: """ Query-specific optimization utilities for common patterns. """ @staticmethod def optimize_tenant_filter(queryset, tenant_id): """Optimize tenant-scoped queries.""" return queryset.filter(tenant_id=tenant_id).select_related('tenant') @staticmethod def optimize_pagination(queryset, page_size=50): """Optimize pagination for large datasets.""" return queryset.order_by('id')[:page_size] @staticmethod def optimize_foreign_key_query(queryset, related_fields): """Optimize queries with foreign key relationships.""" return queryset.select_related(*related_fields) @staticmethod def optimize_many_to_many_query(queryset, related_fields): """Optimize many-to-many relationship queries.""" return queryset.prefetch_related(*related_fields) @staticmethod def optimize_date_range_query(queryset, date_field, start_date, end_date): """Optimize date range queries.""" return queryset.filter( **{f"{date_field}__gte": start_date, f"{date_field}__lte": end_date} ).order_by(date_field) @staticmethod def optimize_full_text_search(queryset, search_fields, search_term): """Optimize full-text search queries.""" from django.contrib.postgres.search import SearchVector, SearchQuery, SearchRank search_vector = SearchVector(*search_fields) search_query = SearchQuery(search_term) return queryset.annotate( rank=SearchRank(search_vector, search_query) ).filter(rank__gte=0.3).order_by('-rank') # Cache management utilities class CacheManager: """Cache management for database optimization.""" @staticmethod def get_cache_key(prefix: str, *args) -> str: """Generate cache key with prefix and arguments.""" return f"{prefix}_{'_'.join(str(arg) for arg in args)}" @staticmethod def cache_query_result(cache_key: str, query_result, timeout=3600): """Cache query result with specified timeout.""" cache.set(cache_key, query_result, timeout) @staticmethod def get_cached_result(cache_key: str): """Get cached result if available.""" return cache.get(cache_key) @staticmethod def invalidate_cache_pattern(pattern: str): """Invalidate cache keys matching pattern.""" keys = cache.keys(pattern) if keys: cache.delete_many(keys) # Database maintenance utilities class DatabaseMaintenance: """Database maintenance and optimization utilities.""" @staticmethod def analyze_tables(): """Run ANALYZE on all tables to update statistics.""" with connection.cursor() as cursor: cursor.execute(""" SELECT schemaname || '.' || tablename FROM pg_tables WHERE schemaname NOT IN ('information_schema', 'pg_catalog') """) tables = [row[0] for row in cursor.fetchall()] for table in tables: try: cursor.execute(f"ANALYZE {table}") logger.info(f"Analyzed table: {table}") except Exception as e: logger.error(f"Failed to analyze {table}: {e}") @staticmethod def vacuum_tables(): """Run VACUUM on all tables to reclaim storage.""" with connection.cursor() as cursor: cursor.execute(""" SELECT schemaname || '.' || tablename FROM pg_tables WHERE schemaname NOT IN ('information_schema', 'pg_catalog') """) tables = [row[0] for row in cursor.fetchall()] for table in tables: try: cursor.execute(f"VACUUM ANALYZE {table}") logger.info(f"Vacuumed table: {table}") except Exception as e: logger.error(f"Failed to vacuum {table}: {e}") @staticmethod def get_table_sizes(): """Get size information for all tables.""" with connection.cursor() as cursor: cursor.execute(""" SELECT schemaname, tablename, pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size, pg_total_relation_size(schemaname||'.'||tablename) as size_bytes FROM pg_tables WHERE schemaname NOT IN ('information_schema', 'pg_catalog') ORDER BY size_bytes DESC """) return [ { 'schema': row[0], 'table': row[1], 'size': row[2], 'size_bytes': row[3] } for row in cursor.fetchall() ] # Management command for database optimization class OptimizationCommand: """Management command for database optimization.""" def handle(self, *args, **options): """Handle the optimization command.""" optimizer = DatabaseOptimizer() # Analyze current performance performance_analysis = optimizer.analyze_query_performance() # Get optimization recommendations report = optimizer.get_optimization_report() # Create recommended indexes if report['current_recommendations']: created = optimizer.create_recommended_indexes( report['current_recommendations'] ) print(f"Created {len(created)} new indexes") # Optimize Malaysian-specific queries malaysian_opts = optimizer.optimize_malaysian_queries() print(f"Optimized {malaysian_opts['sst_queries_optimized']} SST queries") # Run maintenance DatabaseMaintenance.analyze_tables() print("Database maintenance completed") print("Optimization completed successfully") print(f"Total queries analyzed: {optimizer.optimization_stats['queries_analyzed']}") print(f"Indexes created: {optimizer.optimization_stats['indexes_created']}") # Export main classes and functions __all__ = [ 'DatabaseOptimizer', 'QueryOptimizer', 'CacheManager', 'DatabaseMaintenance', 'OptimizationCommand', 'OptimizationLevel', 'QueryMetrics', 'IndexRecommendation', ]