Add dynamic loading of standard alert references

Replaced hardcoded standard alert metadata with API-driven data. This change ensures the standard alert information is dynamically loaded from the backend, improving maintainability and scalability. Also adjusted the frontend to handle cases where no data is available.
2026-02-13 08:24:55 +01:00
parent 45d2173d1e
commit 7619757ed5
4 changed files with 139 additions and 89 deletions
--- a/backend/app/api/routes/alerts.py
+++ b/backend/app/api/routes/alerts.py
@@ -12,8 +12,10 @@ from app.schemas.alert import (
    AlertDefinitionTestResponse,
    AlertDefinitionUpdate,
    AlertStatusResponse,
+    StandardAlertReferenceItem,
 )
 from app.services.alerts import (
+    get_standard_alert_reference,
    get_alert_status,
    invalidate_alert_cache,
    run_scalar_sql_for_target,
@@ -44,6 +46,14 @@ async def list_alert_status(
    return payload


+@router.get("/standard-reference", response_model=list[StandardAlertReferenceItem])
+async def list_standard_alert_reference(
+    user: User = Depends(get_current_user),
+) -> list[StandardAlertReferenceItem]:
+    _ = user
+    return [StandardAlertReferenceItem(**item) for item in get_standard_alert_reference()]
+
+
@router.get("/definitions", response_model=list[AlertDefinitionOut])
 async def list_alert_definitions(
    user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db)
--- a/backend/app/schemas/alert.py
+++ b/backend/app/schemas/alert.py
@@ -72,3 +72,12 @@ class AlertStatusResponse(BaseModel):
    alerts: list[AlertStatusItem]
    warning_count: int
    alert_count: int
+
+
+class StandardAlertReferenceItem(BaseModel):
+    key: str
+    name: str
+    checks: str
+    comparison: str
+    warning: str
+    alert: str
--- a/backend/app/services/alerts.py
+++ b/backend/app/services/alerts.py
@@ -42,6 +42,106 @@ def invalidate_alert_cache() -> None:
    _status_cache["data"] = None


+def get_standard_alert_reference() -> list[dict[str, str]]:
+    return [
+        {
+            "key": "target_reachability",
+            "name": "Target Reachability",
+            "checks": "Connection to target database can be established.",
+            "comparison": "-",
+            "warning": "-",
+            "alert": "On connection failure",
+        },
+        {
+            "key": "connectivity_rtt_ms",
+            "name": "Connectivity Latency",
+            "checks": "Connection handshake duration (milliseconds).",
+            "comparison": "gte",
+            "warning": "1000 ms",
+            "alert": "2500 ms",
+        },
+        {
+            "key": "collector_freshness_seconds",
+            "name": "Collector Freshness",
+            "checks": "Age of newest metric sample.",
+            "comparison": "gte",
+            "warning": f"{settings.poll_interval_seconds * 2} s (poll interval x2)",
+            "alert": f"{settings.poll_interval_seconds * 4} s (poll interval x4)",
+        },
+        {
+            "key": "active_connections_ratio",
+            "name": "Active Connection Ratio",
+            "checks": (
+                "active_connections / total_connections "
+                f"(evaluated only when total sessions >= {settings.alert_active_connection_ratio_min_total_connections})."
+            ),
+            "comparison": "gte",
+            "warning": "0.70",
+            "alert": "0.90",
+        },
+        {
+            "key": "cache_hit_ratio_low",
+            "name": "Cache Hit Ratio",
+            "checks": "Buffer cache efficiency (lower is worse).",
+            "comparison": "lte",
+            "warning": "0.95",
+            "alert": "0.90",
+        },
+        {
+            "key": "locks_total",
+            "name": "Lock Pressure",
+            "checks": "Current total lock count.",
+            "comparison": "gte",
+            "warning": "50",
+            "alert": "100",
+        },
+        {
+            "key": "checkpoints_req_15m",
+            "name": "Checkpoint Pressure (15m)",
+            "checks": "Increase of requested checkpoints in last 15 minutes.",
+            "comparison": "gte",
+            "warning": "5",
+            "alert": "15",
+        },
+        {
+            "key": "rollback_ratio",
+            "name": "Rollback Ratio",
+            "checks": (
+                f"rollback / (commit + rollback) in last {settings.alert_rollback_ratio_window_minutes} minutes "
+                f"(evaluated only when >= {settings.alert_rollback_ratio_min_total_transactions} transactions "
+                f"and >= {settings.alert_rollback_ratio_min_rollbacks} rollbacks)."
+            ),
+            "comparison": "gte",
+            "warning": "0.10",
+            "alert": "0.25",
+        },
+        {
+            "key": "deadlocks_60m",
+            "name": "Deadlocks (60m)",
+            "checks": "Increase in deadlocks during last 60 minutes.",
+            "comparison": "gte",
+            "warning": "1",
+            "alert": "5",
+        },
+        {
+            "key": "slowest_query_mean_ms",
+            "name": "Slowest Query Mean Time",
+            "checks": "Highest query mean execution time in latest snapshot.",
+            "comparison": "gte",
+            "warning": "300 ms",
+            "alert": "1000 ms",
+        },
+        {
+            "key": "slowest_query_total_ms",
+            "name": "Slowest Query Total Time",
+            "checks": "Highest query total execution time in latest snapshot.",
+            "comparison": "gte",
+            "warning": "3000 ms",
+            "alert": "10000 ms",
+        },
+    ]
+
+
 def validate_alert_thresholds(comparison: str, warning_threshold: float | None, alert_threshold: float) -> None:
    if comparison not in _ALLOWED_COMPARISONS:
        raise HTTPException(status_code=400, detail=f"Invalid comparison. Use one of {sorted(_ALLOWED_COMPARISONS)}")