Add rollback ratio alert tuning parameters
All checks were successful
PostgreSQL Compatibility Matrix / PG14 smoke (push) Successful in 7s
PostgreSQL Compatibility Matrix / PG15 smoke (push) Successful in 7s
PostgreSQL Compatibility Matrix / PG16 smoke (push) Successful in 7s
PostgreSQL Compatibility Matrix / PG17 smoke (push) Successful in 8s
PostgreSQL Compatibility Matrix / PG18 smoke (push) Successful in 9s
All checks were successful
PostgreSQL Compatibility Matrix / PG14 smoke (push) Successful in 7s
PostgreSQL Compatibility Matrix / PG15 smoke (push) Successful in 7s
PostgreSQL Compatibility Matrix / PG16 smoke (push) Successful in 7s
PostgreSQL Compatibility Matrix / PG17 smoke (push) Successful in 8s
PostgreSQL Compatibility Matrix / PG18 smoke (push) Successful in 9s
Introduced new parameters to fine-tune rollback ratio alerts and reduce false positives on low-traffic databases. Adjusted evaluation logic to account for minimum rollback counts and transaction volumes, ensuring more reliable alert thresholds. Updated .env.example and descriptions for better configuration clarity.
This commit is contained in:
@@ -45,6 +45,10 @@ POLL_INTERVAL_SECONDS=30
|
||||
# Active Connection Ratio alert is only evaluated when total sessions
|
||||
# are at least this number (reduces false positives on low-traffic DBs).
|
||||
ALERT_ACTIVE_CONNECTION_RATIO_MIN_TOTAL_CONNECTIONS=5
|
||||
# Rollback Ratio tuning to reduce false positives on low traffic.
|
||||
ALERT_ROLLBACK_RATIO_WINDOW_MINUTES=15
|
||||
ALERT_ROLLBACK_RATIO_MIN_TOTAL_TRANSACTIONS=100
|
||||
ALERT_ROLLBACK_RATIO_MIN_ROLLBACKS=10
|
||||
# Initial admin bootstrap user (created on first startup if not present).
|
||||
INIT_ADMIN_EMAIL=admin@example.com
|
||||
INIT_ADMIN_PASSWORD=ChangeMe123!
|
||||
|
||||
@@ -26,6 +26,9 @@ class Settings(BaseSettings):
|
||||
cors_origins: str = "http://localhost:5173"
|
||||
poll_interval_seconds: int = 30
|
||||
alert_active_connection_ratio_min_total_connections: int = 5
|
||||
alert_rollback_ratio_window_minutes: int = 15
|
||||
alert_rollback_ratio_min_total_transactions: int = 100
|
||||
alert_rollback_ratio_min_rollbacks: int = 10
|
||||
init_admin_email: str = "admin@example.com"
|
||||
init_admin_password: str = "ChangeMe123!"
|
||||
|
||||
|
||||
@@ -152,17 +152,20 @@ async def _metric_delta(db: AsyncSession, target_id: int, metric_name: str, minu
|
||||
|
||||
|
||||
async def _rollback_ratio_recent(
|
||||
db: AsyncSession, target_id: int, minutes: int, min_total_transactions: int
|
||||
) -> tuple[float | None, float]:
|
||||
db: AsyncSession, target_id: int, minutes: int, min_total_transactions: int, min_rollbacks: int
|
||||
) -> tuple[float | None, float, float]:
|
||||
commit_delta = await _metric_delta(db, target_id, "xact_commit", minutes=minutes)
|
||||
rollback_delta = await _metric_delta(db, target_id, "xact_rollback", minutes=minutes)
|
||||
if commit_delta is None or rollback_delta is None:
|
||||
return None, 0.0
|
||||
return None, 0.0, 0.0
|
||||
tx_total = commit_delta + rollback_delta
|
||||
if tx_total < float(min_total_transactions):
|
||||
# Too little traffic in window, ratio would be noisy and misleading.
|
||||
return None, tx_total
|
||||
return (rollback_delta / tx_total) if tx_total > 0 else None, tx_total
|
||||
return None, tx_total, rollback_delta
|
||||
if rollback_delta < float(min_rollbacks):
|
||||
# Ignore tiny rollback counts even if ratio appears high on low absolute numbers.
|
||||
return None, tx_total, rollback_delta
|
||||
return (rollback_delta / tx_total) if tx_total > 0 else None, tx_total, rollback_delta
|
||||
|
||||
|
||||
async def _latest_query_snapshot_max(db: AsyncSession, target_id: int, column_name: str) -> float | None:
|
||||
@@ -275,8 +278,13 @@ async def _build_standard_rules(db: AsyncSession, target: Target) -> tuple[list[
|
||||
):
|
||||
active_ratio = active_connections / total_connections
|
||||
|
||||
rollback_ratio_15m, tx_total_15m = await _rollback_ratio_recent(
|
||||
db, target.id, minutes=15, min_total_transactions=20
|
||||
rollback_ratio_window = settings.alert_rollback_ratio_window_minutes
|
||||
rollback_ratio_val, tx_total_window, rollback_count_window = await _rollback_ratio_recent(
|
||||
db,
|
||||
target.id,
|
||||
minutes=rollback_ratio_window,
|
||||
min_total_transactions=settings.alert_rollback_ratio_min_total_transactions,
|
||||
min_rollbacks=settings.alert_rollback_ratio_min_rollbacks,
|
||||
)
|
||||
|
||||
rules.extend(
|
||||
@@ -324,9 +332,13 @@ async def _build_standard_rules(db: AsyncSession, target: Target) -> tuple[list[
|
||||
_RuleInput(
|
||||
key="rollback_ratio",
|
||||
name="Rollback Ratio",
|
||||
description="Fraction of rolled back transactions in the last 15 minutes (evaluated only when at least 20 transactions occurred).",
|
||||
description=(
|
||||
f"Fraction of rolled back transactions in the last {rollback_ratio_window} minutes "
|
||||
f"(evaluated only when at least {settings.alert_rollback_ratio_min_total_transactions} "
|
||||
f"transactions and {settings.alert_rollback_ratio_min_rollbacks} rollbacks occurred)."
|
||||
),
|
||||
category="transactions",
|
||||
value=rollback_ratio_15m,
|
||||
value=rollback_ratio_val,
|
||||
warning_threshold=0.10,
|
||||
alert_threshold=0.25,
|
||||
),
|
||||
@@ -364,9 +376,20 @@ async def _build_standard_rules(db: AsyncSession, target: Target) -> tuple[list[
|
||||
_RuleInput(
|
||||
key="rollback_tx_volume_15m",
|
||||
name="Rollback Ratio Evaluation Volume",
|
||||
description="Total transactions in the last 15 minutes used for rollback-ratio evaluation.",
|
||||
description=f"Total transactions in the last {rollback_ratio_window} minutes used for rollback-ratio evaluation.",
|
||||
category="transactions",
|
||||
value=tx_total_15m,
|
||||
value=tx_total_window,
|
||||
warning_threshold=None,
|
||||
alert_threshold=None,
|
||||
)
|
||||
)
|
||||
rules.append(
|
||||
_RuleInput(
|
||||
key="rollback_count_window",
|
||||
name="Rollback Count (Window)",
|
||||
description=f"Rollback count in the last {rollback_ratio_window} minutes used for rollback-ratio evaluation.",
|
||||
category="transactions",
|
||||
value=rollback_count_window,
|
||||
warning_threshold=None,
|
||||
alert_threshold=None,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user