Files
multitenetsaas/.github/workflows/monitoring.yml
AHMET YILMAZ b3fff546e9
Some checks failed
System Monitoring / Health Checks (push) Has been cancelled
System Monitoring / Performance Monitoring (push) Has been cancelled
System Monitoring / Database Monitoring (push) Has been cancelled
System Monitoring / Cache Monitoring (push) Has been cancelled
System Monitoring / Log Monitoring (push) Has been cancelled
System Monitoring / Resource Monitoring (push) Has been cancelled
System Monitoring / Uptime Monitoring (push) Has been cancelled
System Monitoring / Backup Monitoring (push) Has been cancelled
System Monitoring / Security Monitoring (push) Has been cancelled
System Monitoring / Monitoring Dashboard (push) Has been cancelled
System Monitoring / Alerting (push) Has been cancelled
Security Scanning / Dependency Scanning (push) Has been cancelled
Security Scanning / Code Security Scanning (push) Has been cancelled
Security Scanning / Secrets Scanning (push) Has been cancelled
Security Scanning / Container Security Scanning (push) Has been cancelled
Security Scanning / Compliance Checking (push) Has been cancelled
Security Scanning / Security Dashboard (push) Has been cancelled
Security Scanning / Security Remediation (push) Has been cancelled
project initialization
2025-10-05 02:37:33 +08:00

375 lines
11 KiB
YAML

name: System Monitoring
on:
schedule:
- cron: '0 */6 * * *' # Every 6 hours
workflow_dispatch:
push:
branches: [ main, develop ]
jobs:
health-checks:
name: Health Checks
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Run production health checks
run: |
# API Health
curl -f https://api.malaysian-sme-platform.com/health/ || exit 1
curl -f https://api.malaysian-sme-platform.com/api/health/ || exit 1
# Application Health
curl -f https://app.malaysian-sme-platform.com/ || exit 1
# Database Health
curl -f https://api.malaysian-sme-platform.com/api/health/database/ || exit 1
# Cache Health
curl -f https://api.malaysian-sme-platform.com/api/health/cache/ || exit 1
- name: Run staging health checks
run: |
curl -f https://staging.malaysian-sme-platform.com/health/ || exit 1
curl -f https://staging.malaysian-sme-platform.com/api/health/ || exit 1
performance-monitoring:
name: Performance Monitoring
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up k6
uses: grafana/k6-action@v0.3.0
with:
filename: tests/performance/monitoring.js
- name: Run performance monitoring
run: |
cd tests/performance
k6 run monitoring.js \
--env PROD_URL=https://api.malaysian-sme-platform.com \
--env STAGING_URL=https://staging.malaysian-sme-platform.com
- name: Upload performance results
uses: actions/upload-artifact@v3
with:
name: performance-monitoring-results
path: tests/performance/results/
database-monitoring:
name: Database Monitoring
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.10
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install psycopg2-binary pandas matplotlib
- name: Run database health checks
env:
DATABASE_URL: ${{ secrets.PRODUCTION_DATABASE_URL }}
run: |
python scripts/database-health-check.py
- name: Generate database metrics report
env:
DATABASE_URL: ${{ secrets.PRODUCTION_DATABASE_URL }}
run: |
python scripts/database-metrics.py --output database-metrics.json
- name: Upload database reports
uses: actions/upload-artifact@v3
with:
name: database-monitoring-reports
path: |
database-metrics.json
database-health-report.json
cache-monitoring:
name: Cache Monitoring
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.10
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install redis pandas
- name: Run Redis health checks
env:
REDIS_URL: ${{ secrets.PRODUCTION_REDIS_URL }}
run: |
python scripts/redis-health-check.py
- name: Generate cache metrics report
env:
REDIS_URL: ${{ secrets.PRODUCTION_REDIS_URL }}
run: |
python scripts/cache-metrics.py --output cache-metrics.json
- name: Upload cache reports
uses: actions/upload-artifact@v3
with:
name: cache-monitoring-reports
path: |
cache-metrics.json
redis-health-report.json
log-monitoring:
name: Log Monitoring
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Analyze application logs
env:
LOGS_ACCESS_KEY: ${{ secrets.LOGS_ACCESS_KEY }}
run: |
python scripts/log-analysis.py \
--hours 6 \
--error-threshold 10 \
--warning-threshold 50 \
--output log-analysis-report.json
- name: Check for critical errors
run: |
python scripts/critical-error-check.py \
--hours 1 \
--notification-webhook ${{ secrets.SLACK_WEBHOOK }}
- name: Upload log reports
uses: actions/upload-artifact@v3
with:
name: log-monitoring-reports
path: |
log-analysis-report.json
error-summary.json
resource-monitoring:
name: Resource Monitoring
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Monitor system resources
env:
MONITORING_API_KEY: ${{ secrets.MONITORING_API_KEY }}
run: |
python scripts/resource-monitoring.py \
--output resource-metrics.json
- name: Check resource thresholds
run: |
python scripts/resource-threshold-check.py \
--cpu-threshold 80 \
--memory-threshold 85 \
--disk-threshold 90 \
--output threshold-report.json
- name: Upload resource reports
uses: actions/upload-artifact@v3
with:
name: resource-monitoring-reports
path: |
resource-metrics.json
threshold-report.json
uptime-monitoring:
name: Uptime Monitoring
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Check service uptime
run: |
python scripts/uptime-check.py \
--services api,app,admin,static \
--timeout 30 \
--output uptime-report.json
- name: Verify SSL certificates
run: |
python scripts/ssl-check.py \
--domains api.malaysian-sme-platform.com,app.malaysian-sme-platform.com \
--output ssl-report.json
- name: Upload uptime reports
uses: actions/upload-artifact@v3
with:
name: uptime-monitoring-reports
path: |
uptime-report.json
ssl-report.json
backup-monitoring:
name: Backup Monitoring
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Check backup status
env:
BACKUP_ACCESS_KEY: ${{ secrets.BACKUP_ACCESS_KEY }}
run: |
python scripts/backup-check.py \
--hours 24 \
--output backup-report.json
- name: Verify backup integrity
run: |
python scripts/backup-integrity.py \
--verify-latest 3 \
--output integrity-report.json
- name: Upload backup reports
uses: actions/upload-artifact@v3
with:
name: backup-monitoring-reports
path: |
backup-report.json
integrity-report.json
security-monitoring:
name: Security Monitoring
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Check for security events
env:
SECURITY_API_KEY: ${{ secrets.SECURITY_API_KEY }}
run: |
python scripts/security-monitoring.py \
--hours 6 \
--output security-events.json
- name: Analyze authentication patterns
env:
AUTH_LOGS_ACCESS_KEY: ${{ secrets.AUTH_LOGS_ACCESS_KEY }}
run: |
python scripts/auth-pattern-analysis.py \
--hours 24 \
--output auth-patterns.json
- name: Upload security reports
uses: actions/upload-artifact@v3
with:
name: security-monitoring-reports
path: |
security-events.json
auth-patterns.json
monitoring-dashboard:
name: Monitoring Dashboard
runs-on: ubuntu-latest
needs: [health-checks, performance-monitoring, database-monitoring, cache-monitoring, log-monitoring, resource-monitoring, uptime-monitoring, backup-monitoring, security-monitoring]
if: always()
steps:
- name: Download all reports
uses: actions/download-artifact@v3
- name: Generate monitoring dashboard
run: |
python scripts/generate-monitoring-dashboard.py
- name: Upload monitoring dashboard
uses: actions/upload-artifact@v3
with:
name: monitoring-dashboard
path: monitoring-dashboard.html
- name: Send monitoring summary to Slack
uses: 8398a7/action-slack@v3
with:
status: ${{ job.status }}
channel: '#monitoring'
webhook_url: ${{ secrets.SLACK_WEBHOOK }}
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK }}
alerting:
name: Alerting
runs-on: ubuntu-latest
needs: monitoring-dashboard
if: failure()
steps:
- name: Create alert issue
uses: actions/github-script@v6
with:
script: |
github.rest.issues.create({
owner: context.repo.owner,
repo: context.repo.repo,
title: `🚨 Monitoring Alert - ${{ github.run_number }}`,
body: `Monitoring checks failed for run #${{ github.run_number }}.
**Time:** ${{ github.event_name }} at ${{ github.run_started_at }}
**Repository:** ${{ github.repository }}
Please review the monitoring reports and investigate the issues.
📋 **Monitoring Reports:**
- [Health Checks](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})
- [Performance Monitoring](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})
- [Database Monitoring](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})
- [Cache Monitoring](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})
- [Log Monitoring](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})
- [Resource Monitoring](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})
- [Uptime Monitoring](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})
- [Backup Monitoring](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})
- [Security Monitoring](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})
🎯 **Immediate Actions:**
1. Review failed monitoring checks
2. Investigate service availability
3. Check system resources
4. Verify backup integrity
5. Address any security events
This issue was automatically created by the monitoring system.`,
labels: ['monitoring', 'alert', 'priority-critical']
});
- name: Send emergency notification
uses: 8398a7/action-slack@v3
with:
status: failure
channel: '#emergency'
webhook_url: ${{ secrets.EMERGENCY_SLACK_WEBHOOK }}
env:
SLACK_WEBHOOK_URL: ${{ secrets.EMERGENCY_SLACK_WEBHOOK }}