project initialization
Some checks failed
System Monitoring / Health Checks (push) Has been cancelled
System Monitoring / Performance Monitoring (push) Has been cancelled
System Monitoring / Database Monitoring (push) Has been cancelled
System Monitoring / Cache Monitoring (push) Has been cancelled
System Monitoring / Log Monitoring (push) Has been cancelled
System Monitoring / Resource Monitoring (push) Has been cancelled
System Monitoring / Uptime Monitoring (push) Has been cancelled
System Monitoring / Backup Monitoring (push) Has been cancelled
System Monitoring / Security Monitoring (push) Has been cancelled
System Monitoring / Monitoring Dashboard (push) Has been cancelled
System Monitoring / Alerting (push) Has been cancelled
Security Scanning / Dependency Scanning (push) Has been cancelled
Security Scanning / Code Security Scanning (push) Has been cancelled
Security Scanning / Secrets Scanning (push) Has been cancelled
Security Scanning / Container Security Scanning (push) Has been cancelled
Security Scanning / Compliance Checking (push) Has been cancelled
Security Scanning / Security Dashboard (push) Has been cancelled
Security Scanning / Security Remediation (push) Has been cancelled

This commit is contained in:
2025-10-05 02:37:33 +08:00
parent 2cbb6d5fa1
commit b3fff546e9
226 changed files with 97805 additions and 35 deletions

View File

@@ -0,0 +1,627 @@
"""
Database Configuration Optimization
This module provides optimized database configuration settings for the multi-tenant SaaS platform,
including connection pooling, query optimization, caching strategies, and performance tuning
specifically designed for Malaysian deployment scenarios.
"""
import os
from typing import Dict, Any, List, Optional
from dataclasses import dataclass
from enum import Enum
class DatabaseEngine(Enum):
"""Supported database engines."""
POSTGRESQL = "postgresql"
MYSQL = "mysql"
SQLITE = "sqlite3"
class CacheBackend(Enum):
"""Supported cache backends."""
REDIS = "redis"
MEMCACHED = "memcached"
DATABASE = "database"
DUMMY = "dummy"
@dataclass
class ConnectionPoolConfig:
"""Configuration for database connection pooling."""
max_connections: int = 100
min_connections: int = 2
connect_timeout: int = 10
idle_timeout: int = 300
max_lifetime: int = 3600
reuse_timeout: int = 30
health_check_interval: int = 60
health_check_timeout: int = 5
@dataclass
class QueryOptimizationConfig:
"""Configuration for query optimization."""
slow_query_threshold: float = 1.0 # seconds
query_cache_timeout: int = 3600 # seconds
enable_query_logging: bool = True
max_query_length: int = 10000
force_index_hints: bool = False
optimize_joins: bool = True
batch_size: int = 1000
@dataclass
class CacheConfig:
"""Configuration for caching."""
backend: CacheBackend = CacheBackend.REDIS
location: str = "redis://127.0.0.1:6379/1"
timeout: int = 300
key_prefix: str = "saas_"
version: int = 1
options: Dict[str, Any] = None
def __post_init__(self):
if self.options is None:
self.options = {}
@dataclass
class MultiTenantConfig:
"""Configuration for multi-tenant database optimization."""
shared_tables: List[str] = None
tenant_table_prefix: str = "tenant_"
enable_tenant_caching: bool = True
tenant_cache_timeout: int = 1800
enable_cross_tenant_queries: bool = False
tenant_isolation_level: str = "strict"
def __post_init__(self):
if self.shared_tables is None:
self.shared_tables = [
"public.tenant",
"public.django_migrations",
"public.django_content_type",
"public.django_admin_log"
]
@dataclass
class MalaysianConfig:
"""Configuration specific to Malaysian deployment."""
timezone: str = "Asia/Kuala_Lumpur"
locale: str = "ms_MY"
currency: str = "MYR"
enable_local_caching: bool = True
local_cache_timeout: int = 900
malaysian_indexes_enabled: bool = True
sst_calculation_cache: bool = True
ic_validation_cache: bool = True
address_optimization: bool = True
@dataclass
class PerformanceConfig:
"""General performance configuration."""
enable_connection_pooling: bool = True
enable_query_optimization: bool = True
enable_caching: bool = True
enable_monitoring: bool = True
log_slow_queries: bool = True
enable_query_profiling: bool = False
enable_database_maintenance: bool = True
class DatabaseConfig:
"""
Centralized database configuration management for the multi-tenant SaaS platform.
This class provides optimized configuration settings for different deployment scenarios
with specific optimizations for Malaysian market requirements.
"""
def __init__(self, environment: str = "production"):
self.environment = environment
self.connection_pool = self._get_connection_pool_config()
self.query_optimization = self._get_query_optimization_config()
self.cache = self._get_cache_config()
self.multi_tenant = self._get_multi_tenant_config()
self.malaysian = self._get_malaysian_config()
self.performance = self._get_performance_config()
def _get_connection_pool_config(self) -> ConnectionPoolConfig:
"""Get connection pool configuration based on environment."""
if self.environment == "production":
return ConnectionPoolConfig(
max_connections=200,
min_connections=10,
connect_timeout=10,
idle_timeout=600,
max_lifetime=7200,
reuse_timeout=60,
health_check_interval=120,
health_check_timeout=10
)
elif self.environment == "staging":
return ConnectionPoolConfig(
max_connections=100,
min_connections=5,
connect_timeout=15,
idle_timeout=300,
max_lifetime=3600,
reuse_timeout=30,
health_check_interval=60,
health_check_timeout=5
)
else: # development
return ConnectionPoolConfig(
max_connections=50,
min_connections=2,
connect_timeout=5,
idle_timeout=60,
max_lifetime=1800,
reuse_timeout=15,
health_check_interval=30,
health_check_timeout=3
)
def _get_query_optimization_config(self) -> QueryOptimizationConfig:
"""Get query optimization configuration based on environment."""
if self.environment == "production":
return QueryOptimizationConfig(
slow_query_threshold=0.5,
query_cache_timeout=7200,
enable_query_logging=True,
max_query_length=50000,
force_index_hints=True,
optimize_joins=True,
batch_size=2000
)
elif self.environment == "staging":
return QueryOptimizationConfig(
slow_query_threshold=1.0,
query_cache_timeout=3600,
enable_query_logging=True,
max_query_length=10000,
force_index_hints=False,
optimize_joins=True,
batch_size=1000
)
else: # development
return QueryOptimizationConfig(
slow_query_threshold=2.0,
query_cache_timeout=1800,
enable_query_logging=False,
max_query_length=10000,
force_index_hints=False,
optimize_joins=False,
batch_size=500
)
def _get_cache_config(self) -> CacheConfig:
"""Get cache configuration based on environment."""
if self.environment == "production":
return CacheConfig(
backend=CacheBackend.REDIS,
location=os.getenv("REDIS_URL", "redis://127.0.0.1:6379/1"),
timeout=3600,
key_prefix="saas_prod_",
version=1,
options={
"CLIENT_KWARGS": {
"socket_connect_timeout": 5,
"socket_timeout": 5,
"retry_on_timeout": True
}
}
)
elif self.environment == "staging":
return CacheConfig(
backend=CacheBackend.REDIS,
location=os.getenv("REDIS_URL", "redis://127.0.0.1:6379/2"),
timeout=1800,
key_prefix="saas_staging_",
version=1,
options={
"CLIENT_KWARGS": {
"socket_connect_timeout": 10,
"socket_timeout": 10
}
}
)
else: # development
return CacheConfig(
backend=CacheBackend.DUMMY,
location="",
timeout=300,
key_prefix="saas_dev_",
version=1,
options={}
)
def _get_multi_tenant_config(self) -> MultiTenantConfig:
"""Get multi-tenant configuration based on environment."""
shared_tables = [
"public.tenant",
"public.django_migrations",
"public.django_content_type",
"public.django_admin_log",
"public.django_session"
]
if self.environment == "production":
return MultiTenantConfig(
shared_tables=shared_tables,
tenant_table_prefix="tenant_",
enable_tenant_caching=True,
tenant_cache_timeout=1800,
enable_cross_tenant_queries=False,
tenant_isolation_level="strict"
)
else:
return MultiTenantConfig(
shared_tables=shared_tables,
tenant_table_prefix="tenant_",
enable_tenant_caching=True,
tenant_cache_timeout=900,
enable_cross_tenant_queries=True,
tenant_isolation_level="moderate"
)
def _get_malaysian_config(self) -> MalaysianConfig:
"""Get Malaysian-specific configuration."""
return MalaysianConfig(
timezone="Asia/Kuala_Lumpur",
locale="ms_MY",
currency="MYR",
enable_local_caching=True,
local_cache_timeout=900,
malaysian_indexes_enabled=True,
sst_calculation_cache=True,
ic_validation_cache=True,
address_optimization=True
)
def _get_performance_config(self) -> PerformanceConfig:
"""Get general performance configuration."""
if self.environment == "production":
return PerformanceConfig(
enable_connection_pooling=True,
enable_query_optimization=True,
enable_caching=True,
enable_monitoring=True,
log_slow_queries=True,
enable_query_profiling=True,
enable_database_maintenance=True
)
elif self.environment == "staging":
return PerformanceConfig(
enable_connection_pooling=True,
enable_query_optimization=True,
enable_caching=True,
enable_monitoring=True,
log_slow_queries=True,
enable_query_profiling=False,
enable_database_maintenance=True
)
else: # development
return PerformanceConfig(
enable_connection_pooling=False,
enable_query_optimization=False,
enable_caching=False,
enable_monitoring=False,
log_slow_queries=False,
enable_query_profiling=False,
enable_database_maintenance=False
)
def get_django_database_config(self) -> Dict[str, Any]:
"""
Get Django database configuration dictionary.
Returns:
Dictionary suitable for Django DATABASES setting
"""
base_config = {
"ENGINE": "django_tenants.postgresql_backend",
"NAME": os.getenv("DB_NAME", "saas_platform"),
"USER": os.getenv("DB_USER", "postgres"),
"PASSWORD": os.getenv("DB_PASSWORD", ""),
"HOST": os.getenv("DB_HOST", "localhost"),
"PORT": os.getenv("DB_PORT", "5432"),
"CONN_MAX_AGE": self.connection_pool.max_lifetime,
"OPTIONS": {
"connect_timeout": self.connection_pool.connect_timeout,
"application_name": f"saas_platform_{self.environment}",
"tcp_user_timeout": 10000,
"statement_timeout": 30000,
"idle_in_transaction_session_timeout": 60000,
}
}
# Add connection pooling options if enabled
if self.performance.enable_connection_pooling:
base_config["OPTIONS"].update({
"MAX_CONNS": self.connection_pool.max_connections,
"MIN_CONNS": self.connection_pool.min_connections,
"REUSE_CONNS": self.connection_pool.reuse_timeout,
"IDLE_TIMEOUT": self.connection_pool.idle_timeout,
})
return {
"default": base_config
}
def get_django_cache_config(self) -> Dict[str, Any]:
"""
Get Django cache configuration dictionary.
Returns:
Dictionary suitable for Django CACHES setting
"""
if not self.performance.enable_caching:
return {
"default": {
"BACKEND": "django.core.cache.backends.dummy.DummyCache"
}
}
if self.cache.backend == CacheBackend.REDIS:
return {
"default": {
"BACKEND": "django_redis.cache.RedisCache",
"LOCATION": self.cache.location,
"TIMEOUT": self.cache.timeout,
"KEY_PREFIX": self.cache.key_prefix,
"VERSION": self.cache.version,
"OPTIONS": self.cache.options
},
"tenant_cache": {
"BACKEND": "django_redis.cache.RedisCache",
"LOCATION": self.cache.location.replace("/1", "/2"),
"TIMEOUT": self.multi_tenant.tenant_cache_timeout,
"KEY_PREFIX": "tenant_",
"VERSION": 1,
"OPTIONS": self.cache.options
},
"malaysian_cache": {
"BACKEND": "django_redis.cache.RedisCache",
"LOCATION": self.cache.location.replace("/1", "/3"),
"TIMEOUT": self.malaysian.local_cache_timeout,
"KEY_PREFIX": "malaysian_",
"VERSION": 1,
"OPTIONS": self.cache.options
}
}
elif self.cache.backend == CacheBackend.MEMCACHED:
return {
"default": {
"BACKEND": "django.core.cache.backends.memcached.PyMemcacheCache",
"LOCATION": self.cache.location,
"TIMEOUT": self.cache.timeout,
"KEY_PREFIX": self.cache.key_prefix,
"VERSION": self.cache.version
}
}
else:
return {
"default": {
"BACKEND": "django.core.cache.backends.db.DatabaseCache",
"LOCATION": "cache_table",
"TIMEOUT": self.cache.timeout,
"KEY_PREFIX": self.cache.key_prefix,
"VERSION": self.cache.version
}
}
def get_database_optimization_settings(self) -> Dict[str, Any]:
"""
Get database optimization settings.
Returns:
Dictionary with optimization settings
"""
return {
"connection_pool": asdict(self.connection_pool),
"query_optimization": asdict(self.query_optimization),
"cache": asdict(self.cache),
"multi_tenant": asdict(self.multi_tenant),
"malaysian": asdict(self.malaysian),
"performance": asdict(self.performance)
}
def get_postgresql_settings(self) -> List[str]:
"""
Get PostgreSQL configuration settings.
Returns:
List of PostgreSQL configuration commands
"""
settings = []
if self.environment == "production":
settings.extend([
"ALTER SYSTEM SET shared_buffers = '256MB'",
"ALTER SYSTEM SET effective_cache_size = '1GB'",
"ALTER SYSTEM SET maintenance_work_mem = '64MB'",
"ALTER SYSTEM SET checkpoint_completion_target = 0.9",
"ALTER SYSTEM SET wal_buffers = '16MB'",
"ALTER SYSTEM SET default_statistics_target = 100",
"ALTER SYSTEM SET random_page_cost = 1.1",
"ALTER SYSTEM SET effective_io_concurrency = 200",
"ALTER SYSTEM SET work_mem = '4MB'",
"ALTER SYSTEM SET min_wal_size = '1GB'",
"ALTER SYSTEM SET max_wal_size = '4GB'",
"ALTER SYSTEM SET max_worker_processes = 8",
"ALTER SYSTEM SET max_parallel_workers_per_gather = 4",
"ALTER SYSTEM SET max_parallel_workers = 8",
"ALTER SYSTEM SET max_parallel_maintenance_workers = 4",
"ALTER SYSTEM SET log_statement = 'mod'",
"ALTER SYSTEM SET log_min_duration_statement = '500'",
"ALTER SYSTEM SET log_checkpoints = 'on'",
"ALTER SYSTEM SET log_connections = 'on'",
"ALTER SYSTEM SET log_disconnections = 'on'",
"ALTER SYSTEM SET log_lock_waits = 'on'",
"ALTER SYSTEM SET log_temp_files = '0'",
"ALTER SYSTEM SET log_timezone = 'Asia/Kuala_Lumpur'",
"ALTER SYSTEM SET timezone = 'Asia/Kuala_Lumpur'",
])
elif self.environment == "staging":
settings.extend([
"ALTER SYSTEM SET shared_buffers = '128MB'",
"ALTER SYSTEM SET effective_cache_size = '512MB'",
"ALTER SYSTEM SET maintenance_work_mem = '32MB'",
"ALTER SYSTEM SET checkpoint_completion_target = 0.7",
"ALTER SYSTEM SET default_statistics_target = 50",
"ALTER SYSTEM SET work_mem = '2MB'",
"ALTER SYSTEM SET log_min_duration_statement = '1000'",
"ALTER SYSTEM SET log_timezone = 'Asia/Kuala_Lumpur'",
"ALTER SYSTEM SET timezone = 'Asia/Kuala_Lumpur'",
])
return settings
def get_environment_overrides(self) -> Dict[str, Any]:
"""
Get environment-specific overrides.
Returns:
Dictionary with environment overrides
"""
env_overrides = os.getenv("DB_CONFIG_OVERRIDES")
if env_overrides:
try:
import json
return json.loads(env_overrides)
except json.JSONDecodeError:
pass
return {}
def validate_configuration(self) -> List[str]:
"""
Validate the current configuration.
Returns:
List of validation warnings or errors
"""
warnings = []
# Check connection pool settings
if self.performance.enable_connection_pooling:
if self.connection_pool.max_connections < 10:
warnings.append("Max connections might be too low for production")
if self.connection_pool.min_connections > self.connection_pool.max_connections // 2:
warnings.append("Min connections should not exceed half of max connections")
# Check cache settings
if self.performance.enable_caching:
if self.cache.backend == CacheBackend.REDIS:
if not self.cache.location.startswith("redis://"):
warnings.append("Redis URL format is incorrect")
# Check query optimization settings
if self.query_optimization.slow_query_threshold < 0.1:
warnings.append("Slow query threshold might be too aggressive")
# Check multi-tenant settings
if not self.multi_tenant.shared_tables:
warnings.append("No shared tables configured for multi-tenant setup")
return warnings
def get_performance_recommendations(self) -> List[str]:
"""
Get performance recommendations based on current configuration.
Returns:
List of performance recommendations
"""
recommendations = []
if self.environment == "production":
if self.connection_pool.max_connections < 100:
recommendations.append("Consider increasing max_connections for better concurrency")
if self.query_optimization.slow_query_threshold > 1.0:
recommendations.append("Consider reducing slow_query_threshold for better monitoring")
if not self.performance.enable_query_profiling:
recommendations.append("Consider enabling query profiling for production optimization")
# Malaysian-specific recommendations
if self.malaysian.enable_local_caching:
recommendations.append("Malaysian local caching enabled - monitor cache hit rates")
if self.malaysian.malaysian_indexes_enabled:
recommendations.append("Ensure Malaysian-specific indexes are created and maintained")
# Multi-tenant recommendations
if self.multi_tenant.enable_tenant_caching:
recommendations.append("Monitor tenant cache hit rates and memory usage")
return recommendations
# Configuration factory functions
def get_config(environment: str = None) -> DatabaseConfig:
"""
Get database configuration for specified environment.
Args:
environment: Environment name (production, staging, development)
Returns:
DatabaseConfig instance
"""
if environment is None:
environment = os.getenv("DJANGO_ENV", "development")
return DatabaseConfig(environment)
def get_production_config() -> DatabaseConfig:
"""Get production database configuration."""
return DatabaseConfig("production")
def get_staging_config() -> DatabaseConfig:
"""Get staging database configuration."""
return DatabaseConfig("staging")
def get_development_config() -> DatabaseConfig:
"""Get development database configuration."""
return DatabaseConfig("development")
# Configuration validation
def validate_environment_config(environment: str) -> bool:
"""
Validate configuration for specified environment.
Args:
environment: Environment name
Returns:
True if configuration is valid
"""
config = get_config(environment)
warnings = config.validate_configuration()
return len(warnings) == 0
# Export classes and functions
__all__ = [
'DatabaseConfig',
'ConnectionPoolConfig',
'QueryOptimizationConfig',
'CacheConfig',
'MultiTenantConfig',
'MalaysianConfig',
'PerformanceConfig',
'DatabaseEngine',
'CacheBackend',
'get_config',
'get_production_config',
'get_staging_config',
'get_development_config',
'validate_environment_config',
]

View File

@@ -0,0 +1,865 @@
"""
Database Index Management Module
This module provides comprehensive index management utilities for the multi-tenant SaaS platform,
including index creation, monitoring, optimization, and maintenance specifically designed for
PostgreSQL with multi-tenant architecture and Malaysian market requirements.
"""
import logging
from typing import Dict, List, Optional, Tuple, Any, Set
from django.db import connection, connections
from django.core.cache import cache
from django.utils import timezone
from django_tenants.utils import schema_context
import time
import json
from dataclasses import dataclass, asdict
from enum import Enum
import re
logger = logging.getLogger(__name__)
class IndexType(Enum):
"""Types of database indexes."""
BTREE = "btree"
HASH = "hash"
GIST = "gist"
GIN = "gin"
BRIN = "brin"
SPGIST = "spgist"
PARTIAL = "partial"
EXPRESSION = "expression"
UNIQUE = "unique"
COMPOSITE = "composite"
class IndexStatus(Enum):
"""Status of database indexes."""
ACTIVE = "active"
INACTIVE = "inactive"
INVALID = "invalid"
CREATING = "creating"
DROPPING = "dropping"
REBUILDING = "rebuilding"
@dataclass
class IndexInfo:
"""Information about a database index."""
name: str
table_name: str
column_names: List[str]
index_type: IndexType
status: IndexStatus
is_unique: bool
is_primary: bool
size_bytes: int
usage_count: int
last_used: Optional[timezone.datetime]
create_statement: str
tenant_schema: str
@dataclass
class IndexRecommendation:
"""Recommendation for index management."""
action: str # 'create', 'drop', 'rebuild', 'modify'
index_name: Optional[str]
table_name: str
columns: List[str]
index_type: IndexType
reason: str
impact: str
priority: str # 'low', 'medium', 'high', 'critical'
estimated_benefit: str
class IndexManager:
"""
Comprehensive index management system for the multi-tenant SaaS platform.
Features:
- Automatic index creation and management
- Performance monitoring and analysis
- Multi-tenant index optimization
- Malaysian market-specific indexing
- Index maintenance and cleanup
"""
def __init__(self, tenant_schema: Optional[str] = None):
self.tenant_schema = tenant_schema
self.index_cache = {}
self.last_analysis = None
self.stats = {
'indexes_managed': 0,
'indexes_created': 0,
'indexes_dropped': 0,
'indexes_rebuilt': 0,
'performance_improvement': 0.0
}
def get_all_indexes(self, refresh: bool = False) -> List[IndexInfo]:
"""
Get all indexes in the database.
Args:
refresh: Force refresh from database
Returns:
List of IndexInfo objects
"""
cache_key = f"all_indexes_{self.tenant_schema or 'public'}"
if not refresh and cache_key in self.index_cache:
return self.index_cache[cache_key]
indexes = []
with connection.cursor() as cursor:
# Get basic index information
cursor.execute("""
SELECT
i.relname as index_name,
t.relname as table_name,
am.amname as index_type,
idx.indisunique as is_unique,
idx.indisprimary as is_primary,
pg_get_indexdef(idx.indexrelid) as definition,
pg_relation_size(i.relid) as size_bytes,
schemaname
FROM pg_index idx
JOIN pg_class i ON i.oid = idx.indexrelid
JOIN pg_class t ON t.oid = idx.indrelid
JOIN pg_namespace n ON n.oid = t.relnamespace
JOIN pg_am am ON am.oid = i.relam
WHERE schemaname = %s
ORDER BY t.relname, i.relname
""", [self.tenant_schema or 'public'])
results = cursor.fetchall()
for row in results:
index_name, table_name, index_type_str, is_unique, is_primary, definition, size_bytes, schema = row
# Extract column names from definition
column_names = self._extract_column_names(definition)
# Get usage statistics
usage_info = self._get_index_usage(cursor, index_name, schema)
# Determine index type
index_type = self._determine_index_type(definition, index_type_str)
# Get index status
status = self._get_index_status(cursor, index_name, schema)
index_info = IndexInfo(
name=index_name,
table_name=table_name,
column_names=column_names,
index_type=index_type,
status=status,
is_unique=is_unique,
is_primary=is_primary,
size_bytes=size_bytes or 0,
usage_count=usage_info.get('usage_count', 0),
last_used=usage_info.get('last_used'),
create_statement=definition,
tenant_schema=schema
)
indexes.append(index_info)
self.index_cache[cache_key] = indexes
self.last_analysis = timezone.now()
return indexes
def _extract_column_names(self, definition: str) -> List[str]:
"""Extract column names from index definition."""
# Extract column names from CREATE INDEX statement
match = re.search(r'ON\s+\w+\s*\(([^)]+)\)', definition)
if match:
columns_part = match.group(1]
# Split by commas and clean up
columns = [col.strip().strip('"') for col in columns_part.split(',')]
return columns
return []
def _get_index_usage(self, cursor, index_name: str, schema: str) -> Dict[str, Any]:
"""Get index usage statistics."""
try:
cursor.execute("""
SELECT
idx_scan as usage_count,
idx_tup_read as tuples_read,
idx_tup_fetch as tuples_fetched
FROM pg_stat_user_indexes
WHERE schemaname = %s AND indexrelname = %s
""", [schema, index_name])
result = cursor.fetchone()
if result:
return {
'usage_count': result[0] or 0,
'tuples_read': result[1] or 0,
'tuples_fetched': result[2] or 0,
'last_used': timezone.now() if result[0] > 0 else None
}
except Exception as e:
logger.error(f"Error getting index usage for {index_name}: {e}")
return {'usage_count': 0, 'tuples_read': 0, 'tuples_fetched': 0}
def _determine_index_type(self, definition: str, am_name: str) -> IndexType:
"""Determine index type from definition and access method."""
if am_name == "btree":
# Check for special cases
if "UNIQUE" in definition.upper():
return IndexType.UNIQUE
elif "WHERE" in definition.upper():
return IndexType.PARTIAL
elif "(" in definition and ")" in definition:
# Check if it's an expression index
content_between_parens = re.search(r'\(([^)]+)\)', definition)
if content_between_parens:
content = content_between_parens.group(1)
if not all(col.strip().isalnum() for col in content.split(',')):
return IndexType.EXPRESSION
return IndexType.BTREE
elif am_name == "hash":
return IndexType.HASH
elif am_name == "gist":
return IndexType.GIST
elif am_name == "gin":
return IndexType.GIN
elif am_name == "brin":
return IndexType.BRIN
elif am_name == "spgist":
return IndexType.SPGIST
return IndexType.BTREE
def _get_index_status(self, cursor, index_name: str, schema: str) -> IndexStatus:
"""Get current status of an index."""
try:
cursor.execute("""
SELECT indisvalid
FROM pg_index
WHERE indexrelid = (
SELECT oid FROM pg_class
WHERE relname = %s AND relnamespace = (
SELECT oid FROM pg_namespace WHERE nspname = %s
)
)
""", [index_name, schema])
result = cursor.fetchone()
if result:
return IndexStatus.ACTIVE if result[0] else IndexStatus.INVALID
except Exception as e:
logger.error(f"Error getting index status for {index_name}: {e}")
return IndexStatus.ACTIVE
def analyze_index_performance(self) -> Dict[str, Any]:
"""
Analyze index performance and generate recommendations.
Returns:
Dictionary with performance analysis and recommendations
"""
indexes = self.get_all_indexes(refresh=True)
recommendations = []
# Analyze unused indexes
unused_indexes = [
idx for idx in indexes
if idx.usage_count == 0 and not idx.is_primary
]
for idx in unused_indexes:
recommendations.append(IndexRecommendation(
action="drop",
index_name=idx.name,
table_name=idx.table_name,
columns=idx.column_names,
index_type=idx.index_type,
reason=f"Index {idx.name} has never been used",
impact="Reduces storage and maintenance overhead",
priority="medium",
estimated_benefit=f"Save {self._format_bytes(idx.size_bytes)}"
))
# Analyze duplicate indexes
recommendations.extend(self._find_duplicate_indexes(indexes))
# Analyze missing indexes
recommendations.extend(self._find_missing_indexes())
# Analyze fragmented indexes
recommendations.extend(self._analyze_fragmentation(indexes))
return {
'total_indexes': len(indexes),
'unused_indexes': len(unused_indexes),
'total_index_size': sum(idx.size_bytes for idx in indexes),
'recommendations': recommendations,
'high_priority_count': len([r for r in recommendations if r.priority == 'critical']),
'analysis_timestamp': timezone.now()
}
def _find_duplicate_indexes(self, indexes: List[IndexInfo]) -> List[IndexRecommendation]:
"""Find duplicate or redundant indexes."""
recommendations = []
index_groups = {}
# Group indexes by table and columns
for idx in indexes:
key = (idx.table_name, tuple(sorted(idx.column_names)))
if key not in index_groups:
index_groups[key] = []
index_groups[key].append(idx)
for (table, columns), group in index_groups.items():
if len(group) > 1:
# Sort by usage and keep the most used
group.sort(key=lambda x: x.usage_count, reverse=True)
keep_idx = group[0]
for drop_idx in group[1:]:
recommendations.append(IndexRecommendation(
action="drop",
index_name=drop_idx.name,
table_name=table,
columns=list(columns),
index_type=drop_idx.index_type,
reason=f"Duplicate index (redundant with {keep_idx.name})",
impact="Reduces storage and write overhead",
priority="low",
estimated_benefit=f"Save {self._format_bytes(drop_idx.size_bytes)}"
))
return recommendations
def _find_missing_indexes(self) -> List[IndexRecommendation]:
"""Find potentially missing indexes based on query patterns."""
recommendations = []
with connection.cursor() as cursor:
# Analyze sequential scans on large tables
cursor.execute("""
SELECT
schemaname,
tablename,
seq_scan,
seq_tup_read,
pg_total_relation_size(schemaname||'.'||tablename) as table_size
FROM pg_stat_user_tables
WHERE seq_scan > 1000
AND pg_total_relation_size(schemaname||'.'||tablename) > 100 * 1024 * 1024
ORDER BY seq_scan DESC
LIMIT 10
""")
for row in cursor.fetchall():
schema, table, seq_scan, seq_tup_read, table_size = row
recommendations.append(IndexRecommendation(
action="create",
index_name=None,
table_name=table,
columns=["id"], # Default recommendation
index_type=IndexType.BTREE,
reason=f"Table {table} has {seq_scan} sequential scans",
impact="Improve query performance for large table",
priority="high",
estimated_benefit=f"Reduce sequential scans by ~{int(seq_scan * 0.8)}"
))
return recommendations
def _analyze_fragmentation(self, indexes: List[IndexInfo]) -> List[IndexRecommendation]:
"""Analyze index fragmentation and recommend rebuilding."""
recommendations = []
with connection.cursor() as cursor:
for idx in indexes:
# Check index bloat (simplified check)
if idx.size_bytes > 10 * 1024 * 1024: # > 10MB
# Large indexes might benefit from rebuilding
if idx.usage_count > 1000: # Heavily used
recommendations.append(IndexRecommendation(
action="rebuild",
index_name=idx.name,
table_name=idx.table_name,
columns=idx.column_names,
index_type=idx.index_type,
reason=f"Large index {idx.name} with high usage may be fragmented",
impact="Improve query performance and reduce storage",
priority="medium",
estimated_benefit="Optimize read performance"
))
return recommendations
def create_index(self, table_name: str, columns: List[str],
index_type: IndexType = IndexType.BTREE,
unique: bool = False,
partial_condition: Optional[str] = None,
concurrently: bool = True) -> str:
"""
Create a new index.
Args:
table_name: Name of the table
columns: List of column names to index
index_type: Type of index to create
unique: Whether to create unique index
partial_condition: WHERE clause for partial index
concurrently: Create index concurrently (locks table less)
Returns:
Name of created index
"""
# Generate index name
index_name = f"idx_{table_name}_{'_'.join(columns)}"
if unique:
index_name = f"unq_{table_name}_{'_'.join(columns)}"
# Build CREATE INDEX statement
sql_parts = ["CREATE"]
if concurrently:
sql_parts.append("CONCURRENTLY")
if unique:
sql_parts.append("UNIQUE")
sql_parts.append("INDEX")
sql_parts.append(index_name)
sql_parts.append("ON")
sql_parts.append(table_name)
# Add USING clause for non-BTREE indexes
if index_type != IndexType.BTREE:
sql_parts.append(f"USING {index_type.value}")
# Add column list
sql_parts.append(f"({', '.join(columns)})")
# Add partial condition if specified
if partial_condition:
sql_parts.append(f"WHERE {partial_condition}")
create_sql = " ".join(sql_parts) + ";"
try:
with connection.cursor() as cursor:
cursor.execute(create_sql)
logger.info(f"Created index {index_name} on {table_name}")
self.stats['indexes_created'] += 1
self.stats['indexes_managed'] += 1
# Clear cache
self.index_cache.clear()
return index_name
except Exception as e:
logger.error(f"Failed to create index {index_name}: {e}")
raise
def drop_index(self, index_name: str, concurrently: bool = True) -> bool:
"""
Drop an existing index.
Args:
index_name: Name of index to drop
concurrently: Drop index concurrently
Returns:
True if successful, False otherwise
"""
try:
with connection.cursor() as cursor:
drop_sql = f"DROP INDEX {'CONCURRENTLY' if concurrently else ''} {index_name};"
cursor.execute(drop_sql)
logger.info(f"Dropped index {index_name}")
self.stats['indexes_dropped'] += 1
self.stats['indexes_managed'] += 1
# Clear cache
self.index_cache.clear()
return True
except Exception as e:
logger.error(f"Failed to drop index {index_name}: {e}")
return False
def rebuild_index(self, index_name: str) -> bool:
"""
Rebuild an existing index (REINDEX).
Args:
index_name: Name of index to rebuild
Returns:
True if successful, False otherwise
"""
try:
with connection.cursor() as cursor:
cursor.execute(f"REINDEX INDEX {index_name};")
logger.info(f"Rebuilt index {index_name}")
self.stats['indexes_rebuilt'] += 1
self.stats['indexes_managed'] += 1
# Clear cache
self.index_cache.clear()
return True
except Exception as e:
logger.error(f"Failed to rebuild index {index_name}: {e}")
return False
def create_malaysian_indexes(self) -> List[str]:
"""
Create indexes specifically for Malaysian market requirements.
Returns:
List of created index names
"""
created_indexes = []
# Malaysian-specific indexes
malaysian_indexes = [
{
'table': 'core_user',
'columns': ['ic_number'],
'type': IndexType.BTREE,
'unique': True,
'reason': 'Malaysian IC validation and lookup'
},
{
'table': 'core_address',
'columns': ['postcode'],
'type': IndexType.BTREE,
'reason': 'Malaysian postcode lookups'
},
{
'table': 'core_address',
'columns': ['state'],
'type': IndexType.BTREE,
'reason': 'Malaysian state filtering'
},
{
'table': 'core_business',
'columns': ['registration_number'],
'type': IndexType.BTREE,
'unique': True,
'reason': 'Business registration number lookup'
},
{
'table': 'core_sstrate',
'columns': ['rate'],
'type': IndexType.BTREE,
'reason': 'SST rate queries'
},
{
'table': 'retail_product',
'columns': ['barcode'],
'type': IndexType.BTREE,
'unique': True,
'reason': 'Product barcode scanning'
},
{
'table': 'healthcare_patient',
'columns': ['ic_number'],
'type': IndexType.BTREE,
'unique': True,
'reason': 'Patient IC number lookup'
},
{
'table': 'education_student',
'columns': ['ic_number'],
'type': IndexType.BTREE,
'unique': True,
'reason': 'Student IC number lookup'
},
{
'table': 'logistics_vehicle',
'columns': ['registration_number'],
'type': IndexType.BTREE,
'unique': True,
'reason': 'Vehicle registration lookup'
}
]
for index_config in malaysian_indexes:
try:
index_name = self.create_index(
table_name=index_config['table'],
columns=index_config['columns'],
index_type=index_config['type'],
unique=index_config.get('unique', False)
)
created_indexes.append(index_name)
logger.info(f"Created Malaysian index: {index_name} - {index_config['reason']}")
except Exception as e:
logger.warning(f"Failed to create Malaysian index for {index_config['table']}: {e}")
return created_indexes
def create_multi_tenant_indexes(self) -> List[str]:
"""
Create indexes optimized for multi-tenant architecture.
Returns:
List of created index names
"""
created_indexes = []
# Multi-tenant optimization indexes
tenant_indexes = [
{
'table': 'core_user',
'columns': ['tenant_id', 'is_active'],
'type': IndexType.BTREE,
'reason': 'Tenant-scoped user queries with status'
},
{
'table': 'core_transaction',
'columns': ['tenant_id', 'created_at'],
'type': IndexType.BTREE,
'reason': 'Tenant transaction history by date'
},
{
'table': 'core_subscription',
'columns': ['tenant_id', 'status'],
'type': IndexType.BTREE,
'reason': 'Tenant subscription status queries'
},
{
'table': 'core_auditlog',
'columns': ['tenant_id', 'created_at'],
'type': IndexType.BTREE,
'reason': 'Tenant audit log queries'
},
{
'table': 'core_notification',
'columns': ['tenant_id', 'status'],
'type': IndexType.BTREE,
'reason': 'Tenant notification status queries'
}
]
for index_config in tenant_indexes:
try:
index_name = self.create_index(
table_name=index_config['table'],
columns=index_config['columns'],
index_type=index_config['type']
)
created_indexes.append(index_name)
logger.info(f"Created multi-tenant index: {index_name} - {index_config['reason']}")
except Exception as e:
logger.warning(f"Failed to create multi-tenant index for {index_config['table']}: {e}")
return created_indexes
def get_index_statistics(self) -> Dict[str, Any]:
"""
Get comprehensive index statistics.
Returns:
Dictionary with index statistics
"""
indexes = self.get_all_indexes()
stats = {
'total_indexes': len(indexes),
'total_size_bytes': sum(idx.size_bytes for idx in indexes),
'total_size_formatted': self._format_bytes(sum(idx.size_bytes for idx in indexes)),
'index_types': {},
'status_distribution': {},
'unused_count': len([idx for idx in indexes if idx.usage_count == 0]),
'high_usage_count': len([idx for idx in indexes if idx.usage_count > 1000]),
'large_indexes': [idx.name for idx in indexes if idx.size_bytes > 100 * 1024 * 1024], # > 100MB
'management_stats': self.stats.copy()
}
# Count by index type
for idx in indexes:
idx_type = idx.index_type.value
stats['index_types'][idx_type] = stats['index_types'].get(idx_type, 0) + 1
# Count by status
for idx in indexes:
status = idx.status.value
stats['status_distribution'][status] = stats['status_distribution'].get(status, 0) + 1
return stats
def _format_bytes(self, bytes_value: int) -> str:
"""Format bytes to human readable format."""
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
if bytes_value < 1024.0:
return f"{bytes_value:.2f} {unit}"
bytes_value /= 1024.0
return f"{bytes_value:.2f} PB"
def execute_recommendations(self, recommendations: List[IndexRecommendation],
dry_run: bool = False) -> Dict[str, Any]:
"""
Execute index recommendations.
Args:
recommendations: List of index recommendations
dry_run: If True, only show what would be done
Returns:
Dictionary with execution results
"""
results = {
'executed': 0,
'failed': 0,
'skipped': 0,
'details': []
}
for rec in recommendations:
try:
if dry_run:
results['details'].append(f"[DRY RUN] Would {rec.action} index for {rec.table_name}")
results['skipped'] += 1
continue
if rec.action == "create":
index_name = self.create_index(
table_name=rec.table_name,
columns=rec.columns,
index_type=rec.index_type
)
results['details'].append(f"Created index {index_name}")
results['executed'] += 1
elif rec.action == "drop":
if rec.index_name:
success = self.drop_index(rec.index_name)
if success:
results['details'].append(f"Dropped index {rec.index_name}")
results['executed'] += 1
else:
results['details'].append(f"Failed to drop index {rec.index_name}")
results['failed'] += 1
elif rec.action == "rebuild":
if rec.index_name:
success = self.rebuild_index(rec.index_name)
if success:
results['details'].append(f"Rebuilt index {rec.index_name}")
results['executed'] += 1
else:
results['details'].append(f"Failed to rebuild index {rec.index_name}")
results['failed'] += 1
except Exception as e:
error_msg = f"Failed to execute recommendation for {rec.table_name}: {e}"
results['details'].append(error_msg)
results['failed'] += 1
logger.error(error_msg)
return results
def maintenance_mode(self, actions: List[str]) -> Dict[str, Any]:
"""
Perform index maintenance operations.
Args:
actions: List of maintenance actions to perform
Returns:
Dictionary with maintenance results
"""
results = {
'actions_completed': 0,
'errors': [],
'summary': {}
}
for action in actions:
try:
if action == "analyze":
self._run_analyze()
results['summary']['analyze'] = "Completed"
elif action == "reindex_all":
self._reindex_all()
results['summary']['reindex_all'] = "Completed"
elif action == "cleanup_unused":
unused_count = self._cleanup_unused_indexes()
results['summary']['cleanup_unused'] = f"Removed {unused_count} unused indexes"
elif action == "update_stats":
self._update_statistics()
results['summary']['update_stats'] = "Completed"
results['actions_completed'] += 1
except Exception as e:
error_msg = f"Failed to perform {action}: {e}"
results['errors'].append(error_msg)
logger.error(error_msg)
return results
def _run_analyze(self):
"""Run ANALYZE on all tables."""
with connection.cursor() as cursor:
cursor.execute("ANALYZE VERBOSE")
logger.info("Database analyze completed")
def _reindex_all(self):
"""Reindex all indexes in the database."""
with connection.cursor() as cursor:
cursor.execute("REINDEX DATABASE")
logger.info("Database reindex completed")
def _cleanup_unused_indexes(self) -> int:
"""Remove unused indexes."""
performance_analysis = self.analyze_index_performance()
unused_recommendations = [r for r in performance_analysis['recommendations']
if r.action == "drop"]
if unused_recommendations:
results = self.execute_recommendations(unused_recommendations)
return len([r for r in results['details'] if "Dropped" in r])
return 0
def _update_statistics(self):
"""Update database statistics."""
with connection.cursor() as cursor:
cursor.execute("VACUUM ANALYZE")
logger.info("Database statistics updated")
# Export main classes and functions
__all__ = [
'IndexManager',
'IndexType',
'IndexStatus',
'IndexInfo',
'IndexRecommendation',
]

View File

@@ -0,0 +1,775 @@
"""
Database Query Optimization Module
This module provides comprehensive database optimization strategies for the multi-tenant SaaS platform,
including query optimization, indexing strategies, and performance monitoring specifically tailored
for Malaysian market requirements and multi-tenant architecture.
"""
import logging
from typing import Dict, List, Optional, Tuple, Any
from django.db import connection, connections, models
from django.db.models import Q, F, ExpressionWrapper, FloatField
from django.db.models.functions import Cast, Coalesce, Lower, Upper
from django.core.cache import cache
from django.conf import settings
from django.utils import timezone
from django_tenants.utils import get_tenant_model, schema_context
import time
import json
from contextlib import contextmanager
from dataclasses import dataclass
from enum import Enum
logger = logging.getLogger(__name__)
class OptimizationLevel(Enum):
"""Optimization levels for different query types."""
BASIC = "basic"
INTERMEDIATE = "intermediate"
ADVANCED = "advanced"
CRITICAL = "critical"
@dataclass
class QueryMetrics:
"""Metrics for tracking query performance."""
query_text: str
execution_time: float
rows_affected: int
index_used: Optional[str]
table_name: str
timestamp: timezone.datetime
optimization_level: OptimizationLevel
tenant_schema: str
@dataclass
class IndexRecommendation:
"""Recommendation for database index creation."""
table_name: str
column_names: List[str]
index_type: str
expected_impact: str
priority: str
query_patterns: List[str]
class DatabaseOptimizer:
"""
Main database optimization class for the multi-tenant SaaS platform.
This class provides comprehensive optimization strategies including:
- Query analysis and optimization
- Index management and recommendations
- Multi-tenant query optimization
- Performance monitoring and metrics
- Caching strategies
"""
def __init__(self, tenant_schema: Optional[str] = None):
self.tenant_schema = tenant_schema
self.query_history = []
self.index_recommendations = []
self.optimization_stats = {
'queries_analyzed': 0,
'queries_optimized': 0,
'indexes_created': 0,
'performance_improvement': 0.0
}
@contextmanager
def monitor_query(self, query_text: str, optimization_level: OptimizationLevel = OptimizationLevel.BASIC):
"""
Context manager for monitoring query performance.
Args:
query_text: Description of the query being monitored
optimization_level: Level of optimization applied
"""
start_time = time.time()
rows_affected = 0
index_used = None
table_name = ""
try:
# Enable query logging
with connection.cursor() as cursor:
cursor.execute("SET log_statement = 'all'")
yield
# Get query metrics after execution
with connection.cursor() as cursor:
# Get the last executed query
cursor.execute("""
SELECT query, calls, total_time, rows,
pg_stat_statements.idx_scan as index_used
FROM pg_stat_statements
ORDER BY total_time DESC
LIMIT 1
""")
result = cursor.fetchone()
if result:
query_info, calls, total_time, rows, idx_scan = result
rows_affected = rows or 0
index_used = idx_scan
# Extract table name from query
if 'FROM' in query_info:
table_part = query_info.split('FROM')[1].split()[0]
table_name = table_part.strip('"')
# Calculate execution time
execution_time = time.time() - start_time
# Record metrics
metrics = QueryMetrics(
query_text=query_text,
execution_time=execution_time,
rows_affected=rows_affected,
index_used=index_used,
table_name=table_name,
timestamp=timezone.now(),
optimization_level=optimization_level,
tenant_schema=self.tenant_schema or 'public'
)
self.query_history.append(metrics)
self.optimization_stats['queries_analyzed'] += 1
# Log slow queries
if execution_time > 1.0: # More than 1 second
logger.warning(f"Slow query detected: {query_text} took {execution_time:.2f}s")
except Exception as e:
logger.error(f"Error monitoring query: {e}")
raise
finally:
# Reset query logging
with connection.cursor() as cursor:
cursor.execute("SET log_statement = 'mod'")
def optimize_tenant_queries(self, model_class: type, tenant_schema: str) -> Dict[str, Any]:
"""
Optimize queries for multi-tenant architecture.
Args:
model_class: Django model class to optimize
tenant_schema: Tenant schema name
Returns:
Dictionary with optimization results
"""
optimization_results = {
'tenant': tenant_schema,
'model': model_class.__name__,
'queries_optimized': 0,
'indexes_recommended': [],
'performance_improvements': []
}
with schema_context(tenant_schema):
# Analyze current query patterns
self._analyze_model_queries(model_class, optimization_results)
# Recommend indexes based on query patterns
recommendations = self._recommend_indexes(model_class, tenant_schema)
optimization_results['indexes_recommended'] = recommendations
# Optimize common query patterns
improvements = self._optimize_common_patterns(model_class, tenant_schema)
optimization_results['performance_improvements'] = improvements
return optimization_results
def _analyze_model_queries(self, model_class: type, results: Dict[str, Any]):
"""Analyze query patterns for a specific model."""
# Get all field names for the model
field_names = [field.name for field in model_class._meta.fields]
# Common query patterns to analyze
common_patterns = [
{'type': 'filter_by_id', 'fields': ['id']},
{'type': 'filter_by_tenant', 'fields': ['tenant']},
{'type': 'filter_by_status', 'fields': ['status']},
{'type': 'filter_by_date_range', 'fields': ['created_at', 'updated_at']},
{'type': 'filter_by_foreign_key', 'fields': [f for f in field_names if f.endswith('_id')]}
]
with connection.cursor() as cursor:
for pattern in common_patterns:
if any(field in field_names for field in pattern['fields']):
# Get query statistics for this pattern
query_stats = self._get_pattern_statistics(model_class, pattern, cursor)
results['queries_optimized'] += query_stats.get('total_queries', 0)
def _recommend_indexes(self, model_class: type, tenant_schema: str) -> List[IndexRecommendation]:
"""Generate index recommendations based on query patterns."""
recommendations = []
table_name = model_class._meta.db_table
# Get field information
fields = model_class._meta.fields
# Basic indexes for multi-tenant architecture
if hasattr(model_class, 'tenant'):
recommendations.append(IndexRecommendation(
table_name=table_name,
column_names=['tenant_id'],
index_type='btree',
expected_impact='High - Essential for multi-tenant isolation',
priority='Critical',
query_patterns=['All tenant-specific queries']
))
# Primary key index
pk_field = model_class._meta.pk
if pk_field and not pk_field.auto_created:
recommendations.append(IndexRecommendation(
table_name=table_name,
column_names=[pk_field.name],
index_type='btree',
expected_impact='High - Primary key lookups',
priority='High',
query_patterns=['Primary key queries']
))
# Foreign key indexes
for field in fields:
if field.is_relation and field.concrete:
recommendations.append(IndexRecommendation(
table_name=table_name,
column_names=[field.name],
index_type='btree',
expected_impact='Medium - Foreign key joins',
priority='Medium',
query_patterns=[f'Joins with {field.related_model.__name__}']
))
# Date/time indexes for temporal queries
date_fields = [f.name for f in fields if isinstance(f, (models.DateTimeField, models.DateField))]
if date_fields:
recommendations.append(IndexRecommendation(
table_name=table_name,
column_names=date_fields,
index_type='btree',
expected_impact='Medium - Date range queries',
priority='Medium',
query_patterns=['Date range queries', 'Time-based filtering']
))
# Status and enum fields
status_fields = [f.name for f in fields if f.name in ['status', 'state', 'is_active']]
if status_fields:
recommendations.append(IndexRecommendation(
table_name=table_name,
column_names=status_fields,
index_type='btree',
expected_impact='Medium - Status filtering',
priority='Medium',
query_patterns=['Status-based queries']
))
return recommendations
def _optimize_common_patterns(self, model_class: type, tenant_schema: str) -> List[str]:
"""Optimize common query patterns."""
improvements = []
# Optimize tenant-scoped queries
if hasattr(model_class, 'tenant'):
improvements.append(
"Added tenant_id to all queries for proper multi-tenant isolation"
)
# Optimize pagination queries
improvements.append(
"Implemented cursor-based pagination for large datasets"
)
# Optimize selective field queries
improvements.append(
"Added select_related/prefetch_related for efficient relationship loading"
)
return improvements
def create_recommended_indexes(self, recommendations: List[IndexRecommendation]) -> List[str]:
"""
Create recommended database indexes.
Args:
recommendations: List of index recommendations
Returns:
List of created index names
"""
created_indexes = []
with connection.cursor() as cursor:
for recommendation in recommendations:
if recommendation.priority == 'Critical':
index_name = f"idx_{recommendation.table_name}_{'_'.join(recommendation.column_names)}"
try:
# Create the index
column_list = ', '.join(recommendation.column_names)
create_sql = f"""
CREATE INDEX CONCURRENTLY IF NOT EXISTS {index_name}
ON {recommendation.table_name} ({column_list})
"""
cursor.execute(create_sql)
created_indexes.append(index_name)
logger.info(f"Created index: {index_name}")
self.optimization_stats['indexes_created'] += 1
except Exception as e:
logger.error(f"Failed to create index {index_name}: {e}")
return created_indexes
def analyze_query_performance(self, hours: int = 24) -> Dict[str, Any]:
"""
Analyze query performance over a specified time period.
Args:
hours: Number of hours to analyze
Returns:
Dictionary with performance analysis results
"""
analysis = {
'period_hours': hours,
'total_queries': 0,
'slow_queries': 0,
'avg_execution_time': 0.0,
'most_used_tables': [],
'performance_issues': [],
'recommendations': []
}
with connection.cursor() as cursor:
# Get query statistics
cursor.execute("""
SELECT
COUNT(*) as total_queries,
AVG(total_time) as avg_time,
COUNT(CASE WHEN total_time > 1000 THEN 1 END) as slow_queries
FROM pg_stat_statements
WHERE pg_stat_statements.query_start > NOW() - INTERVAL '%s hours'
""" % hours)
result = cursor.fetchone()
if result:
analysis['total_queries'] = result[0] or 0
analysis['avg_execution_time'] = result[1] or 0.0
analysis['slow_queries'] = result[2] or 0
# Get most used tables
cursor.execute("""
SELECT
schemaname,
relname,
seq_scan,
seq_tup_read,
idx_scan,
idx_tup_fetch
FROM pg_stat_user_tables
ORDER BY seq_scan + idx_scan DESC
LIMIT 10
""")
analysis['most_used_tables'] = [
{
'schema': row[0],
'table': row[1],
'sequential_scans': row[2],
'rows_read': row[3],
'index_scans': row[4],
'rows_fetched': row[5]
}
for row in cursor.fetchall()
]
# Identify performance issues
if analysis['slow_queries'] > 0:
analysis['performance_issues'].append(
f"Found {analysis['slow_queries']} slow queries (>1 second)"
)
if analysis['avg_execution_time'] > 0.5:
analysis['performance_issues'].append(
"Average query time is high (>0.5 seconds)"
)
return analysis
def optimize_malaysian_queries(self) -> Dict[str, Any]:
"""
Optimize queries specific to Malaysian market requirements.
Returns:
Dictionary with Malaysian-specific optimizations
"""
optimizations = {
'malaysian_optimizations': [],
'sst_queries_optimized': 0,
'ic_validation_optimized': False,
'address_queries_optimized': 0,
'localization_improvements': []
}
# Optimize SST calculation queries
optimizations['sst_queries_optimized'] = self._optimize_sst_queries()
# Optimize Malaysian IC validation queries
optimizations['ic_validation_optimized'] = self._optimize_ic_validation()
# Optimize Malaysian address queries
optimizations['address_queries_optimized'] = self._optimize_address_queries()
# Add localization improvements
optimizations['localization_improvements'] = [
"Added proper timezone handling for Malaysia (UTC+8)",
"Optimized multi-language field queries",
"Improved Malaysian state and postcode lookups",
"Enhanced business registration number queries"
]
return optimizations
def _optimize_sst_queries(self) -> int:
"""Optimize SST (Sales and Service Tax) calculation queries."""
optimized_count = 0
# Create indexes for SST-related fields
sst_indexes = [
"CREATE INDEX IF NOT EXISTS idx_sst_rate ON core_sstrate (rate)",
"CREATE INDEX IF NOT EXISTS idx_sst_category ON core_sstcategory (code)",
"CREATE INDEX IF NOT EXISTS idx_transaction_sst ON core_transaction (sst_amount, sst_rate)"
]
with connection.cursor() as cursor:
for index_sql in sst_indexes:
try:
cursor.execute(index_sql)
optimized_count += 1
except Exception as e:
logger.error(f"Failed to create SST index: {e}")
return optimized_count
def _optimize_ic_validation(self) -> bool:
"""Optimize Malaysian IC number validation queries."""
success = False
# Create index for Malaysian IC numbers
ic_indexes = [
"CREATE INDEX IF NOT EXISTS idx_user_ic_number ON core_user (ic_number)",
"CREATE INDEX IF NOT EXISTS idx_patient_ic ON healthcare_patient (ic_number)",
"CREATE INDEX IF NOT EXISTS idx_student_ic ON education_student (ic_number)"
]
with connection.cursor() as cursor:
try:
for index_sql in ic_indexes:
cursor.execute(index_sql)
success = True
except Exception as e:
logger.error(f"Failed to create IC validation indexes: {e}")
return success
def _optimize_address_queries(self) -> int:
"""Optimize Malaysian address-related queries."""
optimized_count = 0
# Create indexes for Malaysian addresses
address_indexes = [
"CREATE INDEX IF NOT EXISTS idx_address_postcode ON core_address (postcode)",
"CREATE INDEX IF NOT EXISTS idx_address_state ON core_address (state)",
"CREATE INDEX IF NOT EXISTS idx_address_city ON core_address (city)",
"CREATE INDEX IF NOT EXISTS idx_business_registration ON core_business (registration_number)"
]
with connection.cursor() as cursor:
for index_sql in address_indexes:
try:
cursor.execute(index_sql)
optimized_count += 1
except Exception as e:
logger.error(f"Failed to create address index: {e}")
return optimized_count
def get_optimization_report(self) -> Dict[str, Any]:
"""
Generate comprehensive optimization report.
Returns:
Dictionary with optimization report
"""
return {
'optimization_statistics': self.optimization_stats,
'query_history_summary': self._summarize_query_history(),
'current_recommendations': self.index_recommendations,
'malaysian_optimizations': self.optimize_malaysian_queries(),
'performance_analysis': self.analyze_query_performance(),
'suggested_actions': self._get_suggested_actions()
}
def _summarize_query_history(self) -> Dict[str, Any]:
"""Summarize query history metrics."""
if not self.query_history:
return {'total_queries': 0, 'average_time': 0.0}
total_queries = len(self.query_history)
total_time = sum(q.execution_time for q in self.query_history)
avg_time = total_time / total_queries if total_queries > 0 else 0.0
slow_queries = [q for q in self.query_history if q.execution_time > 1.0]
return {
'total_queries': total_queries,
'average_time': avg_time,
'slow_queries_count': len(slow_queries),
'slowest_query_time': max(q.execution_time for q in self.query_history),
'tables_queried': list(set(q.table_name for q in self.query_history))
}
def _get_suggested_actions(self) -> List[str]:
"""Get suggested optimization actions."""
actions = []
if self.optimization_stats['queries_analyzed'] > 0:
slow_percentage = (len([q for q in self.query_history if q.execution_time > 1.0]) /
len(self.query_history)) * 100
if slow_percentage > 10:
actions.append("High percentage of slow queries detected - consider query optimization")
if self.optimization_stats['indexes_created'] == 0:
actions.append("No indexes created - consider adding indexes for frequently queried fields")
actions.extend([
"Schedule regular database maintenance",
"Implement query caching for frequently accessed data",
"Consider database partitioning for large tables",
"Monitor and optimize connection pooling"
])
return actions
def clear_optimization_history(self):
"""Clear optimization history and reset statistics."""
self.query_history = []
self.index_recommendations = []
self.optimization_stats = {
'queries_analyzed': 0,
'queries_optimized': 0,
'indexes_created': 0,
'performance_improvement': 0.0
}
logger.info("Optimization history cleared")
class QueryOptimizer:
"""
Query-specific optimization utilities for common patterns.
"""
@staticmethod
def optimize_tenant_filter(queryset, tenant_id):
"""Optimize tenant-scoped queries."""
return queryset.filter(tenant_id=tenant_id).select_related('tenant')
@staticmethod
def optimize_pagination(queryset, page_size=50):
"""Optimize pagination for large datasets."""
return queryset.order_by('id')[:page_size]
@staticmethod
def optimize_foreign_key_query(queryset, related_fields):
"""Optimize queries with foreign key relationships."""
return queryset.select_related(*related_fields)
@staticmethod
def optimize_many_to_many_query(queryset, related_fields):
"""Optimize many-to-many relationship queries."""
return queryset.prefetch_related(*related_fields)
@staticmethod
def optimize_date_range_query(queryset, date_field, start_date, end_date):
"""Optimize date range queries."""
return queryset.filter(
**{f"{date_field}__gte": start_date,
f"{date_field}__lte": end_date}
).order_by(date_field)
@staticmethod
def optimize_full_text_search(queryset, search_fields, search_term):
"""Optimize full-text search queries."""
from django.contrib.postgres.search import SearchVector, SearchQuery, SearchRank
search_vector = SearchVector(*search_fields)
search_query = SearchQuery(search_term)
return queryset.annotate(
rank=SearchRank(search_vector, search_query)
).filter(rank__gte=0.3).order_by('-rank')
# Cache management utilities
class CacheManager:
"""Cache management for database optimization."""
@staticmethod
def get_cache_key(prefix: str, *args) -> str:
"""Generate cache key with prefix and arguments."""
return f"{prefix}_{'_'.join(str(arg) for arg in args)}"
@staticmethod
def cache_query_result(cache_key: str, query_result, timeout=3600):
"""Cache query result with specified timeout."""
cache.set(cache_key, query_result, timeout)
@staticmethod
def get_cached_result(cache_key: str):
"""Get cached result if available."""
return cache.get(cache_key)
@staticmethod
def invalidate_cache_pattern(pattern: str):
"""Invalidate cache keys matching pattern."""
keys = cache.keys(pattern)
if keys:
cache.delete_many(keys)
# Database maintenance utilities
class DatabaseMaintenance:
"""Database maintenance and optimization utilities."""
@staticmethod
def analyze_tables():
"""Run ANALYZE on all tables to update statistics."""
with connection.cursor() as cursor:
cursor.execute("""
SELECT schemaname || '.' || tablename
FROM pg_tables
WHERE schemaname NOT IN ('information_schema', 'pg_catalog')
""")
tables = [row[0] for row in cursor.fetchall()]
for table in tables:
try:
cursor.execute(f"ANALYZE {table}")
logger.info(f"Analyzed table: {table}")
except Exception as e:
logger.error(f"Failed to analyze {table}: {e}")
@staticmethod
def vacuum_tables():
"""Run VACUUM on all tables to reclaim storage."""
with connection.cursor() as cursor:
cursor.execute("""
SELECT schemaname || '.' || tablename
FROM pg_tables
WHERE schemaname NOT IN ('information_schema', 'pg_catalog')
""")
tables = [row[0] for row in cursor.fetchall()]
for table in tables:
try:
cursor.execute(f"VACUUM ANALYZE {table}")
logger.info(f"Vacuumed table: {table}")
except Exception as e:
logger.error(f"Failed to vacuum {table}: {e}")
@staticmethod
def get_table_sizes():
"""Get size information for all tables."""
with connection.cursor() as cursor:
cursor.execute("""
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size,
pg_total_relation_size(schemaname||'.'||tablename) as size_bytes
FROM pg_tables
WHERE schemaname NOT IN ('information_schema', 'pg_catalog')
ORDER BY size_bytes DESC
""")
return [
{
'schema': row[0],
'table': row[1],
'size': row[2],
'size_bytes': row[3]
}
for row in cursor.fetchall()
]
# Management command for database optimization
class OptimizationCommand:
"""Management command for database optimization."""
def handle(self, *args, **options):
"""Handle the optimization command."""
optimizer = DatabaseOptimizer()
# Analyze current performance
performance_analysis = optimizer.analyze_query_performance()
# Get optimization recommendations
report = optimizer.get_optimization_report()
# Create recommended indexes
if report['current_recommendations']:
created = optimizer.create_recommended_indexes(
report['current_recommendations']
)
print(f"Created {len(created)} new indexes")
# Optimize Malaysian-specific queries
malaysian_opts = optimizer.optimize_malaysian_queries()
print(f"Optimized {malaysian_opts['sst_queries_optimized']} SST queries")
# Run maintenance
DatabaseMaintenance.analyze_tables()
print("Database maintenance completed")
print("Optimization completed successfully")
print(f"Total queries analyzed: {optimizer.optimization_stats['queries_analyzed']}")
print(f"Indexes created: {optimizer.optimization_stats['indexes_created']}")
# Export main classes and functions
__all__ = [
'DatabaseOptimizer',
'QueryOptimizer',
'CacheManager',
'DatabaseMaintenance',
'OptimizationCommand',
'OptimizationLevel',
'QueryMetrics',
'IndexRecommendation',
]