"""
Database Index Management Module

This module provides comprehensive index management utilities for the multi-tenant SaaS platform,
including index creation, monitoring, optimization, and maintenance specifically designed for
PostgreSQL with multi-tenant architecture and Malaysian market requirements.
"""

import logging
from typing import Dict, List, Optional, Tuple, Any, Set
from django.db import connection, connections
from django.core.cache import cache
from django.utils import timezone
from django_tenants.utils import schema_context
import time
import json
from dataclasses import dataclass, asdict
from enum import Enum
import re


logger = logging.getLogger(__name__)


class IndexType(Enum):
    """Types of database indexes."""
    BTREE = "btree"
    HASH = "hash"
    GIST = "gist"
    GIN = "gin"
    BRIN = "brin"
    SPGIST = "spgist"
    PARTIAL = "partial"
    EXPRESSION = "expression"
    UNIQUE = "unique"
    COMPOSITE = "composite"


class IndexStatus(Enum):
    """Status of database indexes."""
    ACTIVE = "active"
    INACTIVE = "inactive"
    INVALID = "invalid"
    CREATING = "creating"
    DROPPING = "dropping"
    REBUILDING = "rebuilding"


@dataclass
class IndexInfo:
    """Information about a database index."""
    name: str
    table_name: str
    column_names: List[str]
    index_type: IndexType
    status: IndexStatus
    is_unique: bool
    is_primary: bool
    size_bytes: int
    usage_count: int
    last_used: Optional[timezone.datetime]
    create_statement: str
    tenant_schema: str


@dataclass
class IndexRecommendation:
    """Recommendation for index management."""
    action: str  # 'create', 'drop', 'rebuild', 'modify'
    index_name: Optional[str]
    table_name: str
    columns: List[str]
    index_type: IndexType
    reason: str
    impact: str
    priority: str  # 'low', 'medium', 'high', 'critical'
    estimated_benefit: str


class IndexManager:
    """
    Comprehensive index management system for the multi-tenant SaaS platform.

    Features:
    - Automatic index creation and management
    - Performance monitoring and analysis
    - Multi-tenant index optimization
    - Malaysian market-specific indexing
    - Index maintenance and cleanup
    """

    def __init__(self, tenant_schema: Optional[str] = None):
        self.tenant_schema = tenant_schema
        self.index_cache = {}
        self.last_analysis = None
        self.stats = {
            'indexes_managed': 0,
            'indexes_created': 0,
            'indexes_dropped': 0,
            'indexes_rebuilt': 0,
            'performance_improvement': 0.0
        }

    def get_all_indexes(self, refresh: bool = False) -> List[IndexInfo]:
        """
        Get all indexes in the database.

        Args:
            refresh: Force refresh from database

        Returns:
            List of IndexInfo objects
        """
        cache_key = f"all_indexes_{self.tenant_schema or 'public'}"

        if not refresh and cache_key in self.index_cache:
            return self.index_cache[cache_key]

        indexes = []

        with connection.cursor() as cursor:
            # Get basic index information
            cursor.execute("""
                SELECT
                    i.relname as index_name,
                    t.relname as table_name,
                    am.amname as index_type,
                    idx.indisunique as is_unique,
                    idx.indisprimary as is_primary,
                    pg_get_indexdef(idx.indexrelid) as definition,
                    pg_relation_size(i.relid) as size_bytes,
                    schemaname
                FROM pg_index idx
                JOIN pg_class i ON i.oid = idx.indexrelid
                JOIN pg_class t ON t.oid = idx.indrelid
                JOIN pg_namespace n ON n.oid = t.relnamespace
                JOIN pg_am am ON am.oid = i.relam
                WHERE schemaname = %s
                ORDER BY t.relname, i.relname
            """, [self.tenant_schema or 'public'])

            results = cursor.fetchall()

            for row in results:
                index_name, table_name, index_type_str, is_unique, is_primary, definition, size_bytes, schema = row

                # Extract column names from definition
                column_names = self._extract_column_names(definition)

                # Get usage statistics
                usage_info = self._get_index_usage(cursor, index_name, schema)

                # Determine index type
                index_type = self._determine_index_type(definition, index_type_str)

                # Get index status
                status = self._get_index_status(cursor, index_name, schema)

                index_info = IndexInfo(
                    name=index_name,
                    table_name=table_name,
                    column_names=column_names,
                    index_type=index_type,
                    status=status,
                    is_unique=is_unique,
                    is_primary=is_primary,
                    size_bytes=size_bytes or 0,
                    usage_count=usage_info.get('usage_count', 0),
                    last_used=usage_info.get('last_used'),
                    create_statement=definition,
                    tenant_schema=schema
                )

                indexes.append(index_info)

        self.index_cache[cache_key] = indexes
        self.last_analysis = timezone.now()
        return indexes

    def _extract_column_names(self, definition: str) -> List[str]:
        """Extract column names from index definition."""
        # Extract column names from CREATE INDEX statement
        match = re.search(r'ON\s+\w+\s*\(([^)]+)\)', definition)
        if match:
            columns_part = match.group(1]
            # Split by commas and clean up
            columns = [col.strip().strip('"') for col in columns_part.split(',')]
            return columns
        return []

    def _get_index_usage(self, cursor, index_name: str, schema: str) -> Dict[str, Any]:
        """Get index usage statistics."""
        try:
            cursor.execute("""
                SELECT
                    idx_scan as usage_count,
                    idx_tup_read as tuples_read,
                    idx_tup_fetch as tuples_fetched
                FROM pg_stat_user_indexes
                WHERE schemaname = %s AND indexrelname = %s
            """, [schema, index_name])

            result = cursor.fetchone()
            if result:
                return {
                    'usage_count': result[0] or 0,
                    'tuples_read': result[1] or 0,
                    'tuples_fetched': result[2] or 0,
                    'last_used': timezone.now() if result[0] > 0 else None
                }
        except Exception as e:
            logger.error(f"Error getting index usage for {index_name}: {e}")

        return {'usage_count': 0, 'tuples_read': 0, 'tuples_fetched': 0}

    def _determine_index_type(self, definition: str, am_name: str) -> IndexType:
        """Determine index type from definition and access method."""
        if am_name == "btree":
            # Check for special cases
            if "UNIQUE" in definition.upper():
                return IndexType.UNIQUE
            elif "WHERE" in definition.upper():
                return IndexType.PARTIAL
            elif "(" in definition and ")" in definition:
                # Check if it's an expression index
                content_between_parens = re.search(r'\(([^)]+)\)', definition)
                if content_between_parens:
                    content = content_between_parens.group(1)
                    if not all(col.strip().isalnum() for col in content.split(',')):
                        return IndexType.EXPRESSION
            return IndexType.BTREE
        elif am_name == "hash":
            return IndexType.HASH
        elif am_name == "gist":
            return IndexType.GIST
        elif am_name == "gin":
            return IndexType.GIN
        elif am_name == "brin":
            return IndexType.BRIN
        elif am_name == "spgist":
            return IndexType.SPGIST

        return IndexType.BTREE

    def _get_index_status(self, cursor, index_name: str, schema: str) -> IndexStatus:
        """Get current status of an index."""
        try:
            cursor.execute("""
                SELECT indisvalid
                FROM pg_index
                WHERE indexrelid = (
                    SELECT oid FROM pg_class
                    WHERE relname = %s AND relnamespace = (
                        SELECT oid FROM pg_namespace WHERE nspname = %s
                    )
                )
            """, [index_name, schema])

            result = cursor.fetchone()
            if result:
                return IndexStatus.ACTIVE if result[0] else IndexStatus.INVALID
        except Exception as e:
            logger.error(f"Error getting index status for {index_name}: {e}")

        return IndexStatus.ACTIVE

    def analyze_index_performance(self) -> Dict[str, Any]:
        """
        Analyze index performance and generate recommendations.

        Returns:
            Dictionary with performance analysis and recommendations
        """
        indexes = self.get_all_indexes(refresh=True)
        recommendations = []

        # Analyze unused indexes
        unused_indexes = [
            idx for idx in indexes
            if idx.usage_count == 0 and not idx.is_primary
        ]

        for idx in unused_indexes:
            recommendations.append(IndexRecommendation(
                action="drop",
                index_name=idx.name,
                table_name=idx.table_name,
                columns=idx.column_names,
                index_type=idx.index_type,
                reason=f"Index {idx.name} has never been used",
                impact="Reduces storage and maintenance overhead",
                priority="medium",
                estimated_benefit=f"Save {self._format_bytes(idx.size_bytes)}"
            ))

        # Analyze duplicate indexes
        recommendations.extend(self._find_duplicate_indexes(indexes))

        # Analyze missing indexes
        recommendations.extend(self._find_missing_indexes())

        # Analyze fragmented indexes
        recommendations.extend(self._analyze_fragmentation(indexes))

        return {
            'total_indexes': len(indexes),
            'unused_indexes': len(unused_indexes),
            'total_index_size': sum(idx.size_bytes for idx in indexes),
            'recommendations': recommendations,
            'high_priority_count': len([r for r in recommendations if r.priority == 'critical']),
            'analysis_timestamp': timezone.now()
        }

    def _find_duplicate_indexes(self, indexes: List[IndexInfo]) -> List[IndexRecommendation]:
        """Find duplicate or redundant indexes."""
        recommendations = []
        index_groups = {}

        # Group indexes by table and columns
        for idx in indexes:
            key = (idx.table_name, tuple(sorted(idx.column_names)))
            if key not in index_groups:
                index_groups[key] = []
            index_groups[key].append(idx)

        for (table, columns), group in index_groups.items():
            if len(group) > 1:
                # Sort by usage and keep the most used
                group.sort(key=lambda x: x.usage_count, reverse=True)
                keep_idx = group[0]

                for drop_idx in group[1:]:
                    recommendations.append(IndexRecommendation(
                        action="drop",
                        index_name=drop_idx.name,
                        table_name=table,
                        columns=list(columns),
                        index_type=drop_idx.index_type,
                        reason=f"Duplicate index (redundant with {keep_idx.name})",
                        impact="Reduces storage and write overhead",
                        priority="low",
                        estimated_benefit=f"Save {self._format_bytes(drop_idx.size_bytes)}"
                    ))

        return recommendations

    def _find_missing_indexes(self) -> List[IndexRecommendation]:
        """Find potentially missing indexes based on query patterns."""
        recommendations = []

        with connection.cursor() as cursor:
            # Analyze sequential scans on large tables
            cursor.execute("""
                SELECT
                    schemaname,
                    tablename,
                    seq_scan,
                    seq_tup_read,
                    pg_total_relation_size(schemaname||'.'||tablename) as table_size
                FROM pg_stat_user_tables
                WHERE seq_scan > 1000
                AND pg_total_relation_size(schemaname||'.'||tablename) > 100 * 1024 * 1024
                ORDER BY seq_scan DESC
                LIMIT 10
            """)

            for row in cursor.fetchall():
                schema, table, seq_scan, seq_tup_read, table_size = row

                recommendations.append(IndexRecommendation(
                    action="create",
                    index_name=None,
                    table_name=table,
                    columns=["id"],  # Default recommendation
                    index_type=IndexType.BTREE,
                    reason=f"Table {table} has {seq_scan} sequential scans",
                    impact="Improve query performance for large table",
                    priority="high",
                    estimated_benefit=f"Reduce sequential scans by ~{int(seq_scan * 0.8)}"
                ))

        return recommendations

    def _analyze_fragmentation(self, indexes: List[IndexInfo]) -> List[IndexRecommendation]:
        """Analyze index fragmentation and recommend rebuilding."""
        recommendations = []

        with connection.cursor() as cursor:
            for idx in indexes:
                # Check index bloat (simplified check)
                if idx.size_bytes > 10 * 1024 * 1024:  # > 10MB
                    # Large indexes might benefit from rebuilding
                    if idx.usage_count > 1000:  # Heavily used
                        recommendations.append(IndexRecommendation(
                            action="rebuild",
                            index_name=idx.name,
                            table_name=idx.table_name,
                            columns=idx.column_names,
                            index_type=idx.index_type,
                            reason=f"Large index {idx.name} with high usage may be fragmented",
                            impact="Improve query performance and reduce storage",
                            priority="medium",
                            estimated_benefit="Optimize read performance"
                        ))

        return recommendations

    def create_index(self, table_name: str, columns: List[str],
                   index_type: IndexType = IndexType.BTREE,
                   unique: bool = False,
                   partial_condition: Optional[str] = None,
                   concurrently: bool = True) -> str:
        """
        Create a new index.

        Args:
            table_name: Name of the table
            columns: List of column names to index
            index_type: Type of index to create
            unique: Whether to create unique index
            partial_condition: WHERE clause for partial index
            concurrently: Create index concurrently (locks table less)

        Returns:
            Name of created index
        """
        # Generate index name
        index_name = f"idx_{table_name}_{'_'.join(columns)}"

        if unique:
            index_name = f"unq_{table_name}_{'_'.join(columns)}"

        # Build CREATE INDEX statement
        sql_parts = ["CREATE"]

        if concurrently:
            sql_parts.append("CONCURRENTLY")

        if unique:
            sql_parts.append("UNIQUE")

        sql_parts.append("INDEX")
        sql_parts.append(index_name)
        sql_parts.append("ON")
        sql_parts.append(table_name)

        # Add USING clause for non-BTREE indexes
        if index_type != IndexType.BTREE:
            sql_parts.append(f"USING {index_type.value}")

        # Add column list
        sql_parts.append(f"({', '.join(columns)})")

        # Add partial condition if specified
        if partial_condition:
            sql_parts.append(f"WHERE {partial_condition}")

        create_sql = " ".join(sql_parts) + ";"

        try:
            with connection.cursor() as cursor:
                cursor.execute(create_sql)

            logger.info(f"Created index {index_name} on {table_name}")
            self.stats['indexes_created'] += 1
            self.stats['indexes_managed'] += 1

            # Clear cache
            self.index_cache.clear()

            return index_name

        except Exception as e:
            logger.error(f"Failed to create index {index_name}: {e}")
            raise

    def drop_index(self, index_name: str, concurrently: bool = True) -> bool:
        """
        Drop an existing index.

        Args:
            index_name: Name of index to drop
            concurrently: Drop index concurrently

        Returns:
            True if successful, False otherwise
        """
        try:
            with connection.cursor() as cursor:
                drop_sql = f"DROP INDEX {'CONCURRENTLY' if concurrently else ''} {index_name};"
                cursor.execute(drop_sql)

            logger.info(f"Dropped index {index_name}")
            self.stats['indexes_dropped'] += 1
            self.stats['indexes_managed'] += 1

            # Clear cache
            self.index_cache.clear()

            return True

        except Exception as e:
            logger.error(f"Failed to drop index {index_name}: {e}")
            return False

    def rebuild_index(self, index_name: str) -> bool:
        """
        Rebuild an existing index (REINDEX).

        Args:
            index_name: Name of index to rebuild

        Returns:
            True if successful, False otherwise
        """
        try:
            with connection.cursor() as cursor:
                cursor.execute(f"REINDEX INDEX {index_name};")

            logger.info(f"Rebuilt index {index_name}")
            self.stats['indexes_rebuilt'] += 1
            self.stats['indexes_managed'] += 1

            # Clear cache
            self.index_cache.clear()

            return True

        except Exception as e:
            logger.error(f"Failed to rebuild index {index_name}: {e}")
            return False

    def create_malaysian_indexes(self) -> List[str]:
        """
        Create indexes specifically for Malaysian market requirements.

        Returns:
            List of created index names
        """
        created_indexes = []

        # Malaysian-specific indexes
        malaysian_indexes = [
            {
                'table': 'core_user',
                'columns': ['ic_number'],
                'type': IndexType.BTREE,
                'unique': True,
                'reason': 'Malaysian IC validation and lookup'
            },
            {
                'table': 'core_address',
                'columns': ['postcode'],
                'type': IndexType.BTREE,
                'reason': 'Malaysian postcode lookups'
            },
            {
                'table': 'core_address',
                'columns': ['state'],
                'type': IndexType.BTREE,
                'reason': 'Malaysian state filtering'
            },
            {
                'table': 'core_business',
                'columns': ['registration_number'],
                'type': IndexType.BTREE,
                'unique': True,
                'reason': 'Business registration number lookup'
            },
            {
                'table': 'core_sstrate',
                'columns': ['rate'],
                'type': IndexType.BTREE,
                'reason': 'SST rate queries'
            },
            {
                'table': 'retail_product',
                'columns': ['barcode'],
                'type': IndexType.BTREE,
                'unique': True,
                'reason': 'Product barcode scanning'
            },
            {
                'table': 'healthcare_patient',
                'columns': ['ic_number'],
                'type': IndexType.BTREE,
                'unique': True,
                'reason': 'Patient IC number lookup'
            },
            {
                'table': 'education_student',
                'columns': ['ic_number'],
                'type': IndexType.BTREE,
                'unique': True,
                'reason': 'Student IC number lookup'
            },
            {
                'table': 'logistics_vehicle',
                'columns': ['registration_number'],
                'type': IndexType.BTREE,
                'unique': True,
                'reason': 'Vehicle registration lookup'
            }
        ]

        for index_config in malaysian_indexes:
            try:
                index_name = self.create_index(
                    table_name=index_config['table'],
                    columns=index_config['columns'],
                    index_type=index_config['type'],
                    unique=index_config.get('unique', False)
                )
                created_indexes.append(index_name)
                logger.info(f"Created Malaysian index: {index_name} - {index_config['reason']}")

            except Exception as e:
                logger.warning(f"Failed to create Malaysian index for {index_config['table']}: {e}")

        return created_indexes

    def create_multi_tenant_indexes(self) -> List[str]:
        """
        Create indexes optimized for multi-tenant architecture.

        Returns:
            List of created index names
        """
        created_indexes = []

        # Multi-tenant optimization indexes
        tenant_indexes = [
            {
                'table': 'core_user',
                'columns': ['tenant_id', 'is_active'],
                'type': IndexType.BTREE,
                'reason': 'Tenant-scoped user queries with status'
            },
            {
                'table': 'core_transaction',
                'columns': ['tenant_id', 'created_at'],
                'type': IndexType.BTREE,
                'reason': 'Tenant transaction history by date'
            },
            {
                'table': 'core_subscription',
                'columns': ['tenant_id', 'status'],
                'type': IndexType.BTREE,
                'reason': 'Tenant subscription status queries'
            },
            {
                'table': 'core_auditlog',
                'columns': ['tenant_id', 'created_at'],
                'type': IndexType.BTREE,
                'reason': 'Tenant audit log queries'
            },
            {
                'table': 'core_notification',
                'columns': ['tenant_id', 'status'],
                'type': IndexType.BTREE,
                'reason': 'Tenant notification status queries'
            }
        ]

        for index_config in tenant_indexes:
            try:
                index_name = self.create_index(
                    table_name=index_config['table'],
                    columns=index_config['columns'],
                    index_type=index_config['type']
                )
                created_indexes.append(index_name)
                logger.info(f"Created multi-tenant index: {index_name} - {index_config['reason']}")

            except Exception as e:
                logger.warning(f"Failed to create multi-tenant index for {index_config['table']}: {e}")

        return created_indexes

    def get_index_statistics(self) -> Dict[str, Any]:
        """
        Get comprehensive index statistics.

        Returns:
            Dictionary with index statistics
        """
        indexes = self.get_all_indexes()

        stats = {
            'total_indexes': len(indexes),
            'total_size_bytes': sum(idx.size_bytes for idx in indexes),
            'total_size_formatted': self._format_bytes(sum(idx.size_bytes for idx in indexes)),
            'index_types': {},
            'status_distribution': {},
            'unused_count': len([idx for idx in indexes if idx.usage_count == 0]),
            'high_usage_count': len([idx for idx in indexes if idx.usage_count > 1000]),
            'large_indexes': [idx.name for idx in indexes if idx.size_bytes > 100 * 1024 * 1024],  # > 100MB
            'management_stats': self.stats.copy()
        }

        # Count by index type
        for idx in indexes:
            idx_type = idx.index_type.value
            stats['index_types'][idx_type] = stats['index_types'].get(idx_type, 0) + 1

        # Count by status
        for idx in indexes:
            status = idx.status.value
            stats['status_distribution'][status] = stats['status_distribution'].get(status, 0) + 1

        return stats

    def _format_bytes(self, bytes_value: int) -> str:
        """Format bytes to human readable format."""
        for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
            if bytes_value < 1024.0:
                return f"{bytes_value:.2f} {unit}"
            bytes_value /= 1024.0
        return f"{bytes_value:.2f} PB"

    def execute_recommendations(self, recommendations: List[IndexRecommendation],
                              dry_run: bool = False) -> Dict[str, Any]:
        """
        Execute index recommendations.

        Args:
            recommendations: List of index recommendations
            dry_run: If True, only show what would be done

        Returns:
            Dictionary with execution results
        """
        results = {
            'executed': 0,
            'failed': 0,
            'skipped': 0,
            'details': []
        }

        for rec in recommendations:
            try:
                if dry_run:
                    results['details'].append(f"[DRY RUN] Would {rec.action} index for {rec.table_name}")
                    results['skipped'] += 1
                    continue

                if rec.action == "create":
                    index_name = self.create_index(
                        table_name=rec.table_name,
                        columns=rec.columns,
                        index_type=rec.index_type
                    )
                    results['details'].append(f"Created index {index_name}")
                    results['executed'] += 1

                elif rec.action == "drop":
                    if rec.index_name:
                        success = self.drop_index(rec.index_name)
                        if success:
                            results['details'].append(f"Dropped index {rec.index_name}")
                            results['executed'] += 1
                        else:
                            results['details'].append(f"Failed to drop index {rec.index_name}")
                            results['failed'] += 1

                elif rec.action == "rebuild":
                    if rec.index_name:
                        success = self.rebuild_index(rec.index_name)
                        if success:
                            results['details'].append(f"Rebuilt index {rec.index_name}")
                            results['executed'] += 1
                        else:
                            results['details'].append(f"Failed to rebuild index {rec.index_name}")
                            results['failed'] += 1

            except Exception as e:
                error_msg = f"Failed to execute recommendation for {rec.table_name}: {e}"
                results['details'].append(error_msg)
                results['failed'] += 1
                logger.error(error_msg)

        return results

    def maintenance_mode(self, actions: List[str]) -> Dict[str, Any]:
        """
        Perform index maintenance operations.

        Args:
            actions: List of maintenance actions to perform

        Returns:
            Dictionary with maintenance results
        """
        results = {
            'actions_completed': 0,
            'errors': [],
            'summary': {}
        }

        for action in actions:
            try:
                if action == "analyze":
                    self._run_analyze()
                    results['summary']['analyze'] = "Completed"

                elif action == "reindex_all":
                    self._reindex_all()
                    results['summary']['reindex_all'] = "Completed"

                elif action == "cleanup_unused":
                    unused_count = self._cleanup_unused_indexes()
                    results['summary']['cleanup_unused'] = f"Removed {unused_count} unused indexes"

                elif action == "update_stats":
                    self._update_statistics()
                    results['summary']['update_stats'] = "Completed"

                results['actions_completed'] += 1

            except Exception as e:
                error_msg = f"Failed to perform {action}: {e}"
                results['errors'].append(error_msg)
                logger.error(error_msg)

        return results

    def _run_analyze(self):
        """Run ANALYZE on all tables."""
        with connection.cursor() as cursor:
            cursor.execute("ANALYZE VERBOSE")
        logger.info("Database analyze completed")

    def _reindex_all(self):
        """Reindex all indexes in the database."""
        with connection.cursor() as cursor:
            cursor.execute("REINDEX DATABASE")
        logger.info("Database reindex completed")

    def _cleanup_unused_indexes(self) -> int:
        """Remove unused indexes."""
        performance_analysis = self.analyze_index_performance()
        unused_recommendations = [r for r in performance_analysis['recommendations']
                                if r.action == "drop"]

        if unused_recommendations:
            results = self.execute_recommendations(unused_recommendations)
            return len([r for r in results['details'] if "Dropped" in r])

        return 0

    def _update_statistics(self):
        """Update database statistics."""
        with connection.cursor() as cursor:
            cursor.execute("VACUUM ANALYZE")
        logger.info("Database statistics updated")


# Export main classes and functions
__all__ = [
    'IndexManager',
    'IndexType',
    'IndexStatus',
    'IndexInfo',
    'IndexRecommendation',
]