diff --git a/backend/app/api/routes/alerts.py b/backend/app/api/routes/alerts.py index ada309f..bba8624 100644 --- a/backend/app/api/routes/alerts.py +++ b/backend/app/api/routes/alerts.py @@ -12,8 +12,10 @@ from app.schemas.alert import ( AlertDefinitionTestResponse, AlertDefinitionUpdate, AlertStatusResponse, + StandardAlertReferenceItem, ) from app.services.alerts import ( + get_standard_alert_reference, get_alert_status, invalidate_alert_cache, run_scalar_sql_for_target, @@ -44,6 +46,14 @@ async def list_alert_status( return payload +@router.get("/standard-reference", response_model=list[StandardAlertReferenceItem]) +async def list_standard_alert_reference( + user: User = Depends(get_current_user), +) -> list[StandardAlertReferenceItem]: + _ = user + return [StandardAlertReferenceItem(**item) for item in get_standard_alert_reference()] + + @router.get("/definitions", response_model=list[AlertDefinitionOut]) async def list_alert_definitions( user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db) diff --git a/backend/app/schemas/alert.py b/backend/app/schemas/alert.py index 46f2a4d..e04f980 100644 --- a/backend/app/schemas/alert.py +++ b/backend/app/schemas/alert.py @@ -72,3 +72,12 @@ class AlertStatusResponse(BaseModel): alerts: list[AlertStatusItem] warning_count: int alert_count: int + + +class StandardAlertReferenceItem(BaseModel): + key: str + name: str + checks: str + comparison: str + warning: str + alert: str diff --git a/backend/app/services/alerts.py b/backend/app/services/alerts.py index 3f9327c..199205d 100644 --- a/backend/app/services/alerts.py +++ b/backend/app/services/alerts.py @@ -42,6 +42,106 @@ def invalidate_alert_cache() -> None: _status_cache["data"] = None +def get_standard_alert_reference() -> list[dict[str, str]]: + return [ + { + "key": "target_reachability", + "name": "Target Reachability", + "checks": "Connection to target database can be established.", + "comparison": "-", + "warning": "-", + "alert": "On connection failure", + }, + { + "key": "connectivity_rtt_ms", + "name": "Connectivity Latency", + "checks": "Connection handshake duration (milliseconds).", + "comparison": "gte", + "warning": "1000 ms", + "alert": "2500 ms", + }, + { + "key": "collector_freshness_seconds", + "name": "Collector Freshness", + "checks": "Age of newest metric sample.", + "comparison": "gte", + "warning": f"{settings.poll_interval_seconds * 2} s (poll interval x2)", + "alert": f"{settings.poll_interval_seconds * 4} s (poll interval x4)", + }, + { + "key": "active_connections_ratio", + "name": "Active Connection Ratio", + "checks": ( + "active_connections / total_connections " + f"(evaluated only when total sessions >= {settings.alert_active_connection_ratio_min_total_connections})." + ), + "comparison": "gte", + "warning": "0.70", + "alert": "0.90", + }, + { + "key": "cache_hit_ratio_low", + "name": "Cache Hit Ratio", + "checks": "Buffer cache efficiency (lower is worse).", + "comparison": "lte", + "warning": "0.95", + "alert": "0.90", + }, + { + "key": "locks_total", + "name": "Lock Pressure", + "checks": "Current total lock count.", + "comparison": "gte", + "warning": "50", + "alert": "100", + }, + { + "key": "checkpoints_req_15m", + "name": "Checkpoint Pressure (15m)", + "checks": "Increase of requested checkpoints in last 15 minutes.", + "comparison": "gte", + "warning": "5", + "alert": "15", + }, + { + "key": "rollback_ratio", + "name": "Rollback Ratio", + "checks": ( + f"rollback / (commit + rollback) in last {settings.alert_rollback_ratio_window_minutes} minutes " + f"(evaluated only when >= {settings.alert_rollback_ratio_min_total_transactions} transactions " + f"and >= {settings.alert_rollback_ratio_min_rollbacks} rollbacks)." + ), + "comparison": "gte", + "warning": "0.10", + "alert": "0.25", + }, + { + "key": "deadlocks_60m", + "name": "Deadlocks (60m)", + "checks": "Increase in deadlocks during last 60 minutes.", + "comparison": "gte", + "warning": "1", + "alert": "5", + }, + { + "key": "slowest_query_mean_ms", + "name": "Slowest Query Mean Time", + "checks": "Highest query mean execution time in latest snapshot.", + "comparison": "gte", + "warning": "300 ms", + "alert": "1000 ms", + }, + { + "key": "slowest_query_total_ms", + "name": "Slowest Query Total Time", + "checks": "Highest query total execution time in latest snapshot.", + "comparison": "gte", + "warning": "3000 ms", + "alert": "10000 ms", + }, + ] + + def validate_alert_thresholds(comparison: str, warning_threshold: float | None, alert_threshold: float) -> None: if comparison not in _ALLOWED_COMPARISONS: raise HTTPException(status_code=400, detail=f"Invalid comparison. Use one of {sorted(_ALLOWED_COMPARISONS)}") diff --git a/frontend/src/pages/AlertsPage.jsx b/frontend/src/pages/AlertsPage.jsx index 70759f2..06258e2 100644 --- a/frontend/src/pages/AlertsPage.jsx +++ b/frontend/src/pages/AlertsPage.jsx @@ -14,86 +14,6 @@ const initialForm = { enabled: true, }; -const STANDARD_ALERT_INFO = [ - { - name: "Target Reachability", - check: "Connection to target database can be established.", - comparison: "-", - warning: "-", - alert: "On connection failure", - }, - { - name: "Connectivity Latency", - check: "Connection handshake duration (ms).", - comparison: "gte", - warning: "1000 ms", - alert: "2500 ms", - }, - { - name: "Collector Freshness", - check: "Age of newest metric sample.", - comparison: "gte", - warning: "poll interval x2", - alert: "poll interval x4", - }, - { - name: "Active Connection Ratio", - check: "active_connections / total_connections.", - comparison: "gte", - warning: "0.70", - alert: "0.90", - }, - { - name: "Cache Hit Ratio", - check: "Buffer cache efficiency.", - comparison: "lte", - warning: "0.95", - alert: "0.90", - }, - { - name: "Lock Pressure", - check: "Current number of locks.", - comparison: "gte", - warning: "50", - alert: "100", - }, - { - name: "Checkpoint Pressure (15m)", - check: "Increase of requested checkpoints in last 15m.", - comparison: "gte", - warning: "5", - alert: "15", - }, - { - name: "Rollback Ratio", - check: "rollback / (commit + rollback) within rolling window.", - comparison: "gte", - warning: "0.10", - alert: "0.25", - }, - { - name: "Deadlocks (60m)", - check: "Increase in deadlocks in last 60 minutes.", - comparison: "gte", - warning: "1", - alert: "5", - }, - { - name: "Slowest Query Mean Time", - check: "Highest query mean execution time in latest snapshot.", - comparison: "gte", - warning: "300 ms", - alert: "1000 ms", - }, - { - name: "Slowest Query Total Time", - check: "Highest query total execution time in latest snapshot.", - comparison: "gte", - warning: "3000 ms", - alert: "10000 ms", - }, -]; - function formatAlertValue(value) { if (value === null || value === undefined) return "-"; if (Number.isInteger(value)) return String(value); @@ -164,6 +84,7 @@ export function AlertsPage() { const [expandedKey, setExpandedKey] = useState(""); const [error, setError] = useState(""); const [loading, setLoading] = useState(true); + const [standardReference, setStandardReference] = useState([]); const [testing, setTesting] = useState(false); const [testResult, setTestResult] = useState(""); const [saving, setSaving] = useState(false); @@ -173,8 +94,12 @@ export function AlertsPage() { const loadAll = async () => { try { setError(""); - const targetRows = await apiFetch("/targets", {}, tokens, refresh); + const [targetRows, referenceRows] = await Promise.all([ + apiFetch("/targets", {}, tokens, refresh), + apiFetch("/alerts/standard-reference", {}, tokens, refresh), + ]); setTargets(targetRows); + setStandardReference(Array.isArray(referenceRows) ? referenceRows : []); if (canManageAlerts) { const defs = await apiFetch("/alerts/definitions", {}, tokens, refresh); @@ -446,15 +371,21 @@ export function AlertsPage() {
- {STANDARD_ALERT_INFO.map((row) => ( -{row.comparison}{row.comparison}