Add dynamic loading of standard alert references
All checks were successful
PostgreSQL Compatibility Matrix / PG14 smoke (push) Successful in 8s
PostgreSQL Compatibility Matrix / PG15 smoke (push) Successful in 7s
PostgreSQL Compatibility Matrix / PG16 smoke (push) Successful in 8s
PostgreSQL Compatibility Matrix / PG17 smoke (push) Successful in 7s
PostgreSQL Compatibility Matrix / PG18 smoke (push) Successful in 7s

Replaced hardcoded standard alert metadata with API-driven data. This change ensures the standard alert information is dynamically loaded from the backend, improving maintainability and scalability. Also adjusted the frontend to handle cases where no data is available.
This commit is contained in:
2026-02-13 08:24:55 +01:00
parent 45d2173d1e
commit 7619757ed5
4 changed files with 139 additions and 89 deletions

View File

@@ -12,8 +12,10 @@ from app.schemas.alert import (
AlertDefinitionTestResponse, AlertDefinitionTestResponse,
AlertDefinitionUpdate, AlertDefinitionUpdate,
AlertStatusResponse, AlertStatusResponse,
StandardAlertReferenceItem,
) )
from app.services.alerts import ( from app.services.alerts import (
get_standard_alert_reference,
get_alert_status, get_alert_status,
invalidate_alert_cache, invalidate_alert_cache,
run_scalar_sql_for_target, run_scalar_sql_for_target,
@@ -44,6 +46,14 @@ async def list_alert_status(
return payload return payload
@router.get("/standard-reference", response_model=list[StandardAlertReferenceItem])
async def list_standard_alert_reference(
user: User = Depends(get_current_user),
) -> list[StandardAlertReferenceItem]:
_ = user
return [StandardAlertReferenceItem(**item) for item in get_standard_alert_reference()]
@router.get("/definitions", response_model=list[AlertDefinitionOut]) @router.get("/definitions", response_model=list[AlertDefinitionOut])
async def list_alert_definitions( async def list_alert_definitions(
user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db) user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db)

View File

@@ -72,3 +72,12 @@ class AlertStatusResponse(BaseModel):
alerts: list[AlertStatusItem] alerts: list[AlertStatusItem]
warning_count: int warning_count: int
alert_count: int alert_count: int
class StandardAlertReferenceItem(BaseModel):
key: str
name: str
checks: str
comparison: str
warning: str
alert: str

View File

@@ -42,6 +42,106 @@ def invalidate_alert_cache() -> None:
_status_cache["data"] = None _status_cache["data"] = None
def get_standard_alert_reference() -> list[dict[str, str]]:
return [
{
"key": "target_reachability",
"name": "Target Reachability",
"checks": "Connection to target database can be established.",
"comparison": "-",
"warning": "-",
"alert": "On connection failure",
},
{
"key": "connectivity_rtt_ms",
"name": "Connectivity Latency",
"checks": "Connection handshake duration (milliseconds).",
"comparison": "gte",
"warning": "1000 ms",
"alert": "2500 ms",
},
{
"key": "collector_freshness_seconds",
"name": "Collector Freshness",
"checks": "Age of newest metric sample.",
"comparison": "gte",
"warning": f"{settings.poll_interval_seconds * 2} s (poll interval x2)",
"alert": f"{settings.poll_interval_seconds * 4} s (poll interval x4)",
},
{
"key": "active_connections_ratio",
"name": "Active Connection Ratio",
"checks": (
"active_connections / total_connections "
f"(evaluated only when total sessions >= {settings.alert_active_connection_ratio_min_total_connections})."
),
"comparison": "gte",
"warning": "0.70",
"alert": "0.90",
},
{
"key": "cache_hit_ratio_low",
"name": "Cache Hit Ratio",
"checks": "Buffer cache efficiency (lower is worse).",
"comparison": "lte",
"warning": "0.95",
"alert": "0.90",
},
{
"key": "locks_total",
"name": "Lock Pressure",
"checks": "Current total lock count.",
"comparison": "gte",
"warning": "50",
"alert": "100",
},
{
"key": "checkpoints_req_15m",
"name": "Checkpoint Pressure (15m)",
"checks": "Increase of requested checkpoints in last 15 minutes.",
"comparison": "gte",
"warning": "5",
"alert": "15",
},
{
"key": "rollback_ratio",
"name": "Rollback Ratio",
"checks": (
f"rollback / (commit + rollback) in last {settings.alert_rollback_ratio_window_minutes} minutes "
f"(evaluated only when >= {settings.alert_rollback_ratio_min_total_transactions} transactions "
f"and >= {settings.alert_rollback_ratio_min_rollbacks} rollbacks)."
),
"comparison": "gte",
"warning": "0.10",
"alert": "0.25",
},
{
"key": "deadlocks_60m",
"name": "Deadlocks (60m)",
"checks": "Increase in deadlocks during last 60 minutes.",
"comparison": "gte",
"warning": "1",
"alert": "5",
},
{
"key": "slowest_query_mean_ms",
"name": "Slowest Query Mean Time",
"checks": "Highest query mean execution time in latest snapshot.",
"comparison": "gte",
"warning": "300 ms",
"alert": "1000 ms",
},
{
"key": "slowest_query_total_ms",
"name": "Slowest Query Total Time",
"checks": "Highest query total execution time in latest snapshot.",
"comparison": "gte",
"warning": "3000 ms",
"alert": "10000 ms",
},
]
def validate_alert_thresholds(comparison: str, warning_threshold: float | None, alert_threshold: float) -> None: def validate_alert_thresholds(comparison: str, warning_threshold: float | None, alert_threshold: float) -> None:
if comparison not in _ALLOWED_COMPARISONS: if comparison not in _ALLOWED_COMPARISONS:
raise HTTPException(status_code=400, detail=f"Invalid comparison. Use one of {sorted(_ALLOWED_COMPARISONS)}") raise HTTPException(status_code=400, detail=f"Invalid comparison. Use one of {sorted(_ALLOWED_COMPARISONS)}")

View File

@@ -14,86 +14,6 @@ const initialForm = {
enabled: true, enabled: true,
}; };
const STANDARD_ALERT_INFO = [
{
name: "Target Reachability",
check: "Connection to target database can be established.",
comparison: "-",
warning: "-",
alert: "On connection failure",
},
{
name: "Connectivity Latency",
check: "Connection handshake duration (ms).",
comparison: "gte",
warning: "1000 ms",
alert: "2500 ms",
},
{
name: "Collector Freshness",
check: "Age of newest metric sample.",
comparison: "gte",
warning: "poll interval x2",
alert: "poll interval x4",
},
{
name: "Active Connection Ratio",
check: "active_connections / total_connections.",
comparison: "gte",
warning: "0.70",
alert: "0.90",
},
{
name: "Cache Hit Ratio",
check: "Buffer cache efficiency.",
comparison: "lte",
warning: "0.95",
alert: "0.90",
},
{
name: "Lock Pressure",
check: "Current number of locks.",
comparison: "gte",
warning: "50",
alert: "100",
},
{
name: "Checkpoint Pressure (15m)",
check: "Increase of requested checkpoints in last 15m.",
comparison: "gte",
warning: "5",
alert: "15",
},
{
name: "Rollback Ratio",
check: "rollback / (commit + rollback) within rolling window.",
comparison: "gte",
warning: "0.10",
alert: "0.25",
},
{
name: "Deadlocks (60m)",
check: "Increase in deadlocks in last 60 minutes.",
comparison: "gte",
warning: "1",
alert: "5",
},
{
name: "Slowest Query Mean Time",
check: "Highest query mean execution time in latest snapshot.",
comparison: "gte",
warning: "300 ms",
alert: "1000 ms",
},
{
name: "Slowest Query Total Time",
check: "Highest query total execution time in latest snapshot.",
comparison: "gte",
warning: "3000 ms",
alert: "10000 ms",
},
];
function formatAlertValue(value) { function formatAlertValue(value) {
if (value === null || value === undefined) return "-"; if (value === null || value === undefined) return "-";
if (Number.isInteger(value)) return String(value); if (Number.isInteger(value)) return String(value);
@@ -164,6 +84,7 @@ export function AlertsPage() {
const [expandedKey, setExpandedKey] = useState(""); const [expandedKey, setExpandedKey] = useState("");
const [error, setError] = useState(""); const [error, setError] = useState("");
const [loading, setLoading] = useState(true); const [loading, setLoading] = useState(true);
const [standardReference, setStandardReference] = useState([]);
const [testing, setTesting] = useState(false); const [testing, setTesting] = useState(false);
const [testResult, setTestResult] = useState(""); const [testResult, setTestResult] = useState("");
const [saving, setSaving] = useState(false); const [saving, setSaving] = useState(false);
@@ -173,8 +94,12 @@ export function AlertsPage() {
const loadAll = async () => { const loadAll = async () => {
try { try {
setError(""); setError("");
const targetRows = await apiFetch("/targets", {}, tokens, refresh); const [targetRows, referenceRows] = await Promise.all([
apiFetch("/targets", {}, tokens, refresh),
apiFetch("/alerts/standard-reference", {}, tokens, refresh),
]);
setTargets(targetRows); setTargets(targetRows);
setStandardReference(Array.isArray(referenceRows) ? referenceRows : []);
if (canManageAlerts) { if (canManageAlerts) {
const defs = await apiFetch("/alerts/definitions", {}, tokens, refresh); const defs = await apiFetch("/alerts/definitions", {}, tokens, refresh);
@@ -446,15 +371,21 @@ export function AlertsPage() {
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
{STANDARD_ALERT_INFO.map((row) => ( {standardReference.length > 0 ? (
<tr key={row.name}> standardReference.map((row) => (
<td>{row.name}</td> <tr key={row.key || row.name}>
<td>{row.check}</td> <td>{row.name}</td>
<td><code>{row.comparison}</code></td> <td>{row.checks}</td>
<td>{row.warning}</td> <td><code>{row.comparison}</code></td>
<td>{row.alert}</td> <td>{row.warning}</td>
<td>{row.alert}</td>
</tr>
))
) : (
<tr>
<td colSpan={5} className="muted">No standard alert metadata available.</td>
</tr> </tr>
))} )}
</tbody> </tbody>
</table> </table>
</div> </div>