Files
multitenetsaas/backend/monitoring/views.py
AHMET YILMAZ b3fff546e9
Some checks failed
System Monitoring / Health Checks (push) Has been cancelled
System Monitoring / Performance Monitoring (push) Has been cancelled
System Monitoring / Database Monitoring (push) Has been cancelled
System Monitoring / Cache Monitoring (push) Has been cancelled
System Monitoring / Log Monitoring (push) Has been cancelled
System Monitoring / Resource Monitoring (push) Has been cancelled
System Monitoring / Uptime Monitoring (push) Has been cancelled
System Monitoring / Backup Monitoring (push) Has been cancelled
System Monitoring / Security Monitoring (push) Has been cancelled
System Monitoring / Monitoring Dashboard (push) Has been cancelled
System Monitoring / Alerting (push) Has been cancelled
Security Scanning / Dependency Scanning (push) Has been cancelled
Security Scanning / Code Security Scanning (push) Has been cancelled
Security Scanning / Secrets Scanning (push) Has been cancelled
Security Scanning / Container Security Scanning (push) Has been cancelled
Security Scanning / Compliance Checking (push) Has been cancelled
Security Scanning / Security Dashboard (push) Has been cancelled
Security Scanning / Security Remediation (push) Has been cancelled
project initialization
2025-10-05 02:37:33 +08:00

481 lines
18 KiB
Python

"""
Django views for monitoring and metrics endpoints.
"""
import json
from datetime import datetime, timedelta
from typing import Dict, Any, Optional
from django.http import JsonResponse, HttpResponse
from django.views.generic import TemplateView
from django.contrib.auth.mixins import LoginRequiredMixin
from django.db import connection
from django.utils import timezone
from django.conf import settings
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework import status
from prometheus_client import generate_latest, REGISTRY, CONTENT_TYPE_LATEST
from prometheus_client.parser import text_string_to_metric_families
from .middleware import MetricsView
from .exporters import metrics_collector
from .alerts import alert_manager, Alert, AlertSeverity, AlertCategory
class MetricsView(APIView):
"""View for Prometheus metrics endpoint."""
def get(self, request):
"""Get Prometheus metrics."""
try:
# Collect current metrics
metrics_collector.collect_once()
# Generate Prometheus format
metrics_data = generate_latest(REGISTRY)
return HttpResponse(
metrics_data,
content_type=CONTENT_TYPE_LATEST
)
except Exception as e:
return JsonResponse(
{'error': f'Failed to generate metrics: {str(e)}'},
status=status.HTTP_500_INTERNAL_SERVER_ERROR
)
class HealthCheckView(APIView):
"""Health check endpoint."""
def get(self, request):
"""Comprehensive health check."""
try:
health_status = {
'status': 'healthy',
'timestamp': timezone.now().isoformat(),
'version': getattr(settings, 'VERSION', '1.0.0'),
'environment': getattr(settings, 'ENVIRONMENT', 'development'),
'checks': {}
}
# Database health
try:
with connection.cursor() as cursor:
cursor.execute("SELECT 1")
cursor.fetchone()
health_status['checks']['database'] = {
'status': 'healthy',
'response_time': self._measure_response_time(self._check_database)
}
except Exception as e:
health_status['checks']['database'] = {
'status': 'unhealthy',
'error': str(e)
}
health_status['status'] = 'degraded'
# Cache health
try:
from django.core.cache import cache
cache.set('health_check', 'test', 1)
cache.get('health_check')
health_status['checks']['cache'] = {
'status': 'healthy',
'response_time': self._measure_response_time(self._check_cache)
}
except Exception as e:
health_status['checks']['cache'] = {
'status': 'unhealthy',
'error': str(e)
}
health_status['status'] = 'degraded'
# Storage health
try:
storage_health = self._check_storage()
health_status['checks']['storage'] = storage_health
if storage_health['status'] != 'healthy':
health_status['status'] = 'degraded'
except Exception as e:
health_status['checks']['storage'] = {
'status': 'unhealthy',
'error': str(e)
}
health_status['status'] = 'degraded'
# External services health
external_services = self._check_external_services()
health_status['checks']['external_services'] = external_services
if any(service['status'] != 'healthy' for service in external_services.values()):
health_status['status'] = 'degraded'
# Malaysian services health
malaysian_services = self._check_malaysian_services()
health_status['checks']['malaysian_services'] = malaysian_services
if any(service['status'] != 'healthy' for service in malaysian_services.values()):
health_status['status'] = 'degraded'
# Determine HTTP status code
if health_status['status'] == 'healthy':
http_status = status.HTTP_200_OK
elif health_status['status'] == 'degraded':
http_status = status.HTTP_503_SERVICE_UNAVAILABLE
else:
http_status = status.HTTP_500_INTERNAL_SERVER_ERROR
return Response(health_status, status=http_status)
except Exception as e:
return Response(
{'status': 'unhealthy', 'error': str(e)},
status=status.HTTP_500_INTERNAL_SERVER_ERROR
)
def _measure_response_time(self, func) -> float:
"""Measure response time of a function."""
import time
start_time = time.time()
func()
return time.time() - start_time
def _check_database(self):
"""Check database connectivity."""
with connection.cursor() as cursor:
cursor.execute("SELECT 1")
cursor.fetchone()
def _check_cache(self):
"""Check cache functionality."""
from django.core.cache import cache
cache.set('health_check', 'test', 1)
result = cache.get('health_check')
if result != 'test':
raise Exception("Cache functionality failed")
def _check_storage(self) -> Dict[str, Any]:
"""Check storage availability."""
try:
import os
# Check media directory
media_path = getattr(settings, 'MEDIA_ROOT', '/media')
if not os.path.exists(media_path):
return {'status': 'unhealthy', 'error': 'Media directory not found'}
# Check write permissions
test_file = os.path.join(media_path, 'health_check_test.tmp')
try:
with open(test_file, 'w') as f:
f.write('test')
os.remove(test_file)
except Exception as e:
return {'status': 'unhealthy', 'error': f'Write permission error: {str(e)}'}
# Check disk space
disk_usage = os.statvfs(media_path)
free_space_percent = (disk_usage.f_bavail * disk_usage.f_frsize) / (disk_usage.f_blocks * disk_usage.f_frsize) * 100
if free_space_percent < 10:
return {
'status': 'degraded',
'error': f'Low disk space: {free_space_percent:.1f}% free'
}
return {'status': 'healthy', 'free_space_percent': free_space_percent}
except Exception as e:
return {'status': 'unhealthy', 'error': str(e)}
def _check_external_services(self) -> Dict[str, Dict[str, Any]]:
"""Check external services health."""
services = {}
# Check email service
try:
from django.core.mail import get_connection
connection = get_connection()
connection.open()
connection.close()
services['email'] = {'status': 'healthy'}
except Exception as e:
services['email'] = {'status': 'unhealthy', 'error': str(e)}
# Check Redis (if configured)
try:
import redis
redis_client = redis.from_url(settings.REDIS_URL)
redis_client.ping()
services['redis'] = {'status': 'healthy'}
except Exception as e:
services['redis'] = {'status': 'unhealthy', 'error': str(e)}
# Check external APIs (if configured)
external_apis = getattr(settings, 'EXTERNAL_APIS', {})
for api_name, api_config in external_apis.items():
try:
import requests
response = requests.get(
api_config['health_url'],
timeout=api_config.get('timeout', 5)
)
if response.status_code == 200:
services[api_name] = {'status': 'healthy'}
else:
services[api_name] = {
'status': 'unhealthy',
'error': f'HTTP {response.status_code}'
}
except Exception as e:
services[api_name] = {'status': 'unhealthy', 'error': str(e)}
return services
def _check_malaysian_services(self) -> Dict[str, Dict[str, Any]]:
"""Check Malaysian-specific services."""
services = {}
# Check Malaysian postcode service
try:
from core.services.malaysian_services import MalaysianPostcodeService
postcode_service = MalaysianPostcodeService()
result = postcode_service.lookup_postcode('50000')
services['postcode_service'] = {
'status': 'healthy' if result else 'degraded'
}
except Exception as e:
services['postcode_service'] = {'status': 'unhealthy', 'error': str(e)}
# Check SST calculation service
try:
from core.services.malaysian_services import SSTCalculationService
sst_service = SSTCalculationService()
result = sst_service.calculate_sst(100, 'standard', 'Johor')
services['sst_service'] = {
'status': 'healthy' if result is not None else 'degraded'
}
except Exception as e:
services['sst_service'] = {'status': 'unhealthy', 'error': str(e)}
# Check IC validation service
try:
from core.services.malaysian_services import ICValidationService
ic_service = ICValidationService()
result = ic_service.validate_ic('1234567890')
services['ic_validation_service'] = {
'status': 'healthy' if result is not None else 'degraded'
}
except Exception as e:
services['ic_validation_service'] = {'status': 'unhealthy', 'error': str(e)}
return services
class AlertsView(APIView):
"""Alerts management endpoint."""
def get(self, request):
"""Get alerts."""
try:
# Check for new alerts
alert_manager.check_rules()
# Get query parameters
severity = request.query_params.get('severity')
category = request.query_params.get('category')
status = request.query_params.get('status', 'active')
hours = int(request.query_params.get('hours', 24))
# Get alerts
if status == 'active':
alerts = alert_manager.get_active_alerts(severity=severity, category=category)
else:
alerts = alert_manager.get_alert_history(hours=hours)
if severity:
alerts = [a for a in alerts if a.severity == severity]
if category:
alerts = [a for a in alerts if a.category == category]
# Convert to response format
response_data = {
'alerts': [alert.to_dict() for alert in alerts],
'summary': self._get_alerts_summary(alerts),
'timestamp': timezone.now().isoformat()
}
return Response(response_data)
except Exception as e:
return Response(
{'error': f'Failed to get alerts: {str(e)}'},
status=status.HTTP_500_INTERNAL_SERVER_ERROR
)
def post(self, request):
"""Create manual alert."""
try:
data = request.data
alert = Alert(
title=data['title'],
description=data['description'],
severity=data.get('severity', AlertSeverity.INFO),
category=data.get('category', AlertCategory.SYSTEM),
metadata=data.get('metadata', {})
)
alert_manager.trigger_alert(alert)
return Response(
{'message': 'Alert created successfully', 'alert_id': alert.id},
status=status.HTTP_201_CREATED
)
except Exception as e:
return Response(
{'error': f'Failed to create alert: {str(e)}'},
status=status.HTTP_500_INTERNAL_SERVER_ERROR
)
def _get_alerts_summary(self, alerts) -> Dict[str, Any]:
"""Get alerts summary statistics."""
summary = {
'total': len(alerts),
'by_severity': {},
'by_category': {},
'by_status': {}
}
for alert in alerts:
# Count by severity
summary['by_severity'][alert.severity] = summary['by_severity'].get(alert.severity, 0) + 1
# Count by category
summary['by_category'][alert.category] = summary['by_category'].get(alert.category, 0) + 1
# Count by status
status = alert.get_status()
summary['by_status'][status] = summary['by_status'].get(status, 0) + 1
return summary
class AlertActionView(APIView):
"""Alert management actions."""
def post(self, request, alert_id: str, action: str):
"""Perform alert actions."""
try:
if action == 'acknowledge':
user = request.user.username if request.user.is_authenticated else 'api_user'
alert_manager.acknowledge_alert(alert_id, user)
return Response({'message': f'Alert {alert_id} acknowledged'})
elif action == 'resolve':
user = request.user.username if request.user.is_authenticated else 'api_user'
alert_manager.resolve_alert(alert_id, user)
return Response({'message': f'Alert {alert_id} resolved'})
else:
return Response(
{'error': f'Unknown action: {action}'},
status=status.HTTP_400_BAD_REQUEST
)
except Exception as e:
return Response(
{'error': f'Failed to perform action {action} on alert {alert_id}: {str(e)}'},
status=status.HTTP_500_INTERNAL_SERVER_ERROR
)
class MonitoringDashboardView(LoginRequiredMixin, TemplateView):
"""Monitoring dashboard template view."""
template_name = 'monitoring/dashboard.html'
def get_context_data(self, **kwargs):
"""Get dashboard context data."""
context = super().get_context_data(**kwargs)
# Get current alerts
context['active_alerts'] = alert_manager.get_active_alerts()
context['alert_summary'] = self._get_alerts_summary(context['active_alerts'])
# Get system metrics
context['system_metrics'] = self._get_system_metrics()
# Get business metrics
context['business_metrics'] = self._get_business_metrics()
# Malaysian-specific metrics
context['malaysian_metrics'] = self._get_malaysian_metrics()
return context
def _get_alerts_summary(self, alerts) -> Dict[str, Any]:
"""Get alerts summary."""
summary = {
'total': len(alerts),
'critical': len([a for a in alerts if a.severity == AlertSeverity.CRITICAL]),
'error': len([a for a in alerts if a.severity == AlertSeverity.ERROR]),
'warning': len([a for a in alerts if a.severity == AlertSeverity.WARNING]),
'info': len([a for a in alerts if a.severity == AlertSeverity.INFO]),
}
return summary
def _get_system_metrics(self) -> Dict[str, Any]:
"""Get system metrics."""
try:
import psutil
return {
'cpu_usage': psutil.cpu_percent(interval=1),
'memory_usage': psutil.virtual_memory().percent,
'disk_usage': psutil.disk_usage('/').percent,
'load_average': psutil.getloadavg()[0],
'uptime': datetime.now() - datetime.fromtimestamp(psutil.boot_time()),
}
except Exception:
return {}
def _get_business_metrics(self) -> Dict[str, Any]:
"""Get business metrics."""
try:
from django.contrib.auth import get_user_model
from core.models import Transaction
User = get_user_model()
# Active users
active_users = User.objects.filter(
is_active=True,
last_login__gte=timezone.now() - timedelta(minutes=30)
).count()
# Today's transactions
today_transactions = Transaction.objects.filter(
created_at__date=timezone.now().date(),
status='completed'
).count()
return {
'active_users': active_users,
'today_transactions': today_transactions,
}
except Exception:
return {}
def _get_malaysian_metrics(self) -> Dict[str, Any]:
"""Get Malaysian-specific metrics."""
try:
from core.models import MalaysianICValidation, SSTCalculation
return {
'ic_validations_today': MalaysianICValidation.objects.filter(
created_at__date=timezone.now().date()
).count(),
'sst_calculations_today': SSTCalculation.objects.filter(
created_at__date=timezone.now().date()
).count(),
}
except Exception:
return {}
class MetricsDashboardView(LoginRequiredMixin, TemplateView):
"""Metrics dashboard template view."""
template_name = 'monitoring/metrics_dashboard.html'