Add dynamic loading of standard alert references
All checks were successful
PostgreSQL Compatibility Matrix / PG14 smoke (push) Successful in 8s
PostgreSQL Compatibility Matrix / PG15 smoke (push) Successful in 7s
PostgreSQL Compatibility Matrix / PG16 smoke (push) Successful in 8s
PostgreSQL Compatibility Matrix / PG17 smoke (push) Successful in 7s
PostgreSQL Compatibility Matrix / PG18 smoke (push) Successful in 7s
All checks were successful
PostgreSQL Compatibility Matrix / PG14 smoke (push) Successful in 8s
PostgreSQL Compatibility Matrix / PG15 smoke (push) Successful in 7s
PostgreSQL Compatibility Matrix / PG16 smoke (push) Successful in 8s
PostgreSQL Compatibility Matrix / PG17 smoke (push) Successful in 7s
PostgreSQL Compatibility Matrix / PG18 smoke (push) Successful in 7s
Replaced hardcoded standard alert metadata with API-driven data. This change ensures the standard alert information is dynamically loaded from the backend, improving maintainability and scalability. Also adjusted the frontend to handle cases where no data is available.
This commit is contained in:
@@ -12,8 +12,10 @@ from app.schemas.alert import (
|
||||
AlertDefinitionTestResponse,
|
||||
AlertDefinitionUpdate,
|
||||
AlertStatusResponse,
|
||||
StandardAlertReferenceItem,
|
||||
)
|
||||
from app.services.alerts import (
|
||||
get_standard_alert_reference,
|
||||
get_alert_status,
|
||||
invalidate_alert_cache,
|
||||
run_scalar_sql_for_target,
|
||||
@@ -44,6 +46,14 @@ async def list_alert_status(
|
||||
return payload
|
||||
|
||||
|
||||
@router.get("/standard-reference", response_model=list[StandardAlertReferenceItem])
|
||||
async def list_standard_alert_reference(
|
||||
user: User = Depends(get_current_user),
|
||||
) -> list[StandardAlertReferenceItem]:
|
||||
_ = user
|
||||
return [StandardAlertReferenceItem(**item) for item in get_standard_alert_reference()]
|
||||
|
||||
|
||||
@router.get("/definitions", response_model=list[AlertDefinitionOut])
|
||||
async def list_alert_definitions(
|
||||
user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db)
|
||||
|
||||
@@ -72,3 +72,12 @@ class AlertStatusResponse(BaseModel):
|
||||
alerts: list[AlertStatusItem]
|
||||
warning_count: int
|
||||
alert_count: int
|
||||
|
||||
|
||||
class StandardAlertReferenceItem(BaseModel):
|
||||
key: str
|
||||
name: str
|
||||
checks: str
|
||||
comparison: str
|
||||
warning: str
|
||||
alert: str
|
||||
|
||||
@@ -42,6 +42,106 @@ def invalidate_alert_cache() -> None:
|
||||
_status_cache["data"] = None
|
||||
|
||||
|
||||
def get_standard_alert_reference() -> list[dict[str, str]]:
|
||||
return [
|
||||
{
|
||||
"key": "target_reachability",
|
||||
"name": "Target Reachability",
|
||||
"checks": "Connection to target database can be established.",
|
||||
"comparison": "-",
|
||||
"warning": "-",
|
||||
"alert": "On connection failure",
|
||||
},
|
||||
{
|
||||
"key": "connectivity_rtt_ms",
|
||||
"name": "Connectivity Latency",
|
||||
"checks": "Connection handshake duration (milliseconds).",
|
||||
"comparison": "gte",
|
||||
"warning": "1000 ms",
|
||||
"alert": "2500 ms",
|
||||
},
|
||||
{
|
||||
"key": "collector_freshness_seconds",
|
||||
"name": "Collector Freshness",
|
||||
"checks": "Age of newest metric sample.",
|
||||
"comparison": "gte",
|
||||
"warning": f"{settings.poll_interval_seconds * 2} s (poll interval x2)",
|
||||
"alert": f"{settings.poll_interval_seconds * 4} s (poll interval x4)",
|
||||
},
|
||||
{
|
||||
"key": "active_connections_ratio",
|
||||
"name": "Active Connection Ratio",
|
||||
"checks": (
|
||||
"active_connections / total_connections "
|
||||
f"(evaluated only when total sessions >= {settings.alert_active_connection_ratio_min_total_connections})."
|
||||
),
|
||||
"comparison": "gte",
|
||||
"warning": "0.70",
|
||||
"alert": "0.90",
|
||||
},
|
||||
{
|
||||
"key": "cache_hit_ratio_low",
|
||||
"name": "Cache Hit Ratio",
|
||||
"checks": "Buffer cache efficiency (lower is worse).",
|
||||
"comparison": "lte",
|
||||
"warning": "0.95",
|
||||
"alert": "0.90",
|
||||
},
|
||||
{
|
||||
"key": "locks_total",
|
||||
"name": "Lock Pressure",
|
||||
"checks": "Current total lock count.",
|
||||
"comparison": "gte",
|
||||
"warning": "50",
|
||||
"alert": "100",
|
||||
},
|
||||
{
|
||||
"key": "checkpoints_req_15m",
|
||||
"name": "Checkpoint Pressure (15m)",
|
||||
"checks": "Increase of requested checkpoints in last 15 minutes.",
|
||||
"comparison": "gte",
|
||||
"warning": "5",
|
||||
"alert": "15",
|
||||
},
|
||||
{
|
||||
"key": "rollback_ratio",
|
||||
"name": "Rollback Ratio",
|
||||
"checks": (
|
||||
f"rollback / (commit + rollback) in last {settings.alert_rollback_ratio_window_minutes} minutes "
|
||||
f"(evaluated only when >= {settings.alert_rollback_ratio_min_total_transactions} transactions "
|
||||
f"and >= {settings.alert_rollback_ratio_min_rollbacks} rollbacks)."
|
||||
),
|
||||
"comparison": "gte",
|
||||
"warning": "0.10",
|
||||
"alert": "0.25",
|
||||
},
|
||||
{
|
||||
"key": "deadlocks_60m",
|
||||
"name": "Deadlocks (60m)",
|
||||
"checks": "Increase in deadlocks during last 60 minutes.",
|
||||
"comparison": "gte",
|
||||
"warning": "1",
|
||||
"alert": "5",
|
||||
},
|
||||
{
|
||||
"key": "slowest_query_mean_ms",
|
||||
"name": "Slowest Query Mean Time",
|
||||
"checks": "Highest query mean execution time in latest snapshot.",
|
||||
"comparison": "gte",
|
||||
"warning": "300 ms",
|
||||
"alert": "1000 ms",
|
||||
},
|
||||
{
|
||||
"key": "slowest_query_total_ms",
|
||||
"name": "Slowest Query Total Time",
|
||||
"checks": "Highest query total execution time in latest snapshot.",
|
||||
"comparison": "gte",
|
||||
"warning": "3000 ms",
|
||||
"alert": "10000 ms",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def validate_alert_thresholds(comparison: str, warning_threshold: float | None, alert_threshold: float) -> None:
|
||||
if comparison not in _ALLOWED_COMPARISONS:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid comparison. Use one of {sorted(_ALLOWED_COMPARISONS)}")
|
||||
|
||||
Reference in New Issue
Block a user