From 117710cc0adf1df8b6cdded2badd13a4ef890718 Mon Sep 17 00:00:00 2001 From: nessi Date: Sat, 14 Feb 2026 11:30:56 +0100 Subject: [PATCH] [NX-101 Issue] Refactor error handling to use consistent API error format Replaced all inline error messages with the standardized `api_error` helper for consistent error response formatting. This improves clarity, maintainability, and ensures uniform error structures across the application. Updated logging for collector failures to include error class and switched to warning level for target unreachable scenarios. --- README.md | 1 + backend/app/api/routes/admin_settings.py | 10 ++- backend/app/api/routes/admin_users.py | 11 +-- backend/app/api/routes/alerts.py | 9 +-- backend/app/api/routes/auth.py | 21 ++++-- backend/app/api/routes/me.py | 11 ++- backend/app/api/routes/targets.py | 88 +++++++++++++++++++----- backend/app/core/config.py | 2 +- backend/app/core/deps.py | 26 +++++-- backend/app/core/errors.py | 9 ++- backend/app/services/alerts.py | 28 ++++++-- backend/app/services/collector.py | 14 +++- 12 files changed, 178 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index 1138054..39f502c 100644 --- a/README.md +++ b/README.md @@ -348,6 +348,7 @@ Common error codes: - `not_found` (`404`) - `conflict` (`409`) - `validation_error` (`422`) +- `target_unreachable` (`503`) - `internal_error` (`500`) ## `pg_stat_statements` Requirement diff --git a/backend/app/api/routes/admin_settings.py b/backend/app/api/routes/admin_settings.py index 5c227c5..6c8a4bc 100644 --- a/backend/app/api/routes/admin_settings.py +++ b/backend/app/api/routes/admin_settings.py @@ -9,6 +9,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.core.db import get_db from app.core.deps import require_roles +from app.core.errors import api_error from app.models.models import EmailNotificationSettings, User from app.schemas.admin_settings import EmailSettingsOut, EmailSettingsTestRequest, EmailSettingsUpdate from app.services.audit import write_audit_log @@ -96,9 +97,9 @@ async def test_email_settings( ) -> dict: settings = await _get_or_create_settings(db) if not settings.smtp_host: - raise HTTPException(status_code=400, detail="SMTP host is not configured") + raise HTTPException(status_code=400, detail=api_error("smtp_host_missing", "SMTP host is not configured")) if not settings.from_email: - raise HTTPException(status_code=400, detail="From email is not configured") + raise HTTPException(status_code=400, detail=api_error("smtp_from_email_missing", "From email is not configured")) password = decrypt_secret(settings.encrypted_smtp_password) if settings.encrypted_smtp_password else None message = EmailMessage() @@ -126,7 +127,10 @@ async def test_email_settings( smtp.login(settings.smtp_username, password or "") smtp.send_message(message) except Exception as exc: - raise HTTPException(status_code=400, detail=f"SMTP test failed: {exc}") + raise HTTPException( + status_code=400, + detail=api_error("smtp_test_failed", "SMTP test failed", {"error": str(exc)}), + ) from exc await write_audit_log(db, "admin.email_settings.test", admin.id, {"recipient": str(payload.recipient)}) return {"status": "sent", "recipient": str(payload.recipient)} diff --git a/backend/app/api/routes/admin_users.py b/backend/app/api/routes/admin_users.py index 22ff253..362b65e 100644 --- a/backend/app/api/routes/admin_users.py +++ b/backend/app/api/routes/admin_users.py @@ -3,6 +3,7 @@ from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession from app.core.db import get_db from app.core.deps import require_roles +from app.core.errors import api_error from app.core.security import hash_password from app.models.models import User from app.schemas.user import UserCreate, UserOut, UserUpdate @@ -22,7 +23,7 @@ async def list_users(admin: User = Depends(require_roles("admin")), db: AsyncSes async def create_user(payload: UserCreate, admin: User = Depends(require_roles("admin")), db: AsyncSession = Depends(get_db)) -> UserOut: exists = await db.scalar(select(User).where(User.email == payload.email)) if exists: - raise HTTPException(status_code=409, detail="Email already exists") + raise HTTPException(status_code=409, detail=api_error("email_exists", "Email already exists")) user = User( email=payload.email, first_name=payload.first_name, @@ -46,13 +47,13 @@ async def update_user( ) -> UserOut: user = await db.scalar(select(User).where(User.id == user_id)) if not user: - raise HTTPException(status_code=404, detail="User not found") + raise HTTPException(status_code=404, detail=api_error("user_not_found", "User not found")) update_data = payload.model_dump(exclude_unset=True) next_email = update_data.get("email") if next_email and next_email != user.email: existing = await db.scalar(select(User).where(User.email == next_email)) if existing and existing.id != user.id: - raise HTTPException(status_code=409, detail="Email already exists") + raise HTTPException(status_code=409, detail=api_error("email_exists", "Email already exists")) if "password" in update_data: raw_password = update_data.pop("password") if raw_password: @@ -68,10 +69,10 @@ async def update_user( @router.delete("/{user_id}") async def delete_user(user_id: int, admin: User = Depends(require_roles("admin")), db: AsyncSession = Depends(get_db)) -> dict: if user_id == admin.id: - raise HTTPException(status_code=400, detail="Cannot delete yourself") + raise HTTPException(status_code=400, detail=api_error("cannot_delete_self", "Cannot delete yourself")) user = await db.scalar(select(User).where(User.id == user_id)) if not user: - raise HTTPException(status_code=404, detail="User not found") + raise HTTPException(status_code=404, detail=api_error("user_not_found", "User not found")) await db.delete(user) await db.commit() await write_audit_log(db, "admin.user.delete", admin.id, {"deleted_user_id": user_id}) diff --git a/backend/app/api/routes/alerts.py b/backend/app/api/routes/alerts.py index bba8624..2faa592 100644 --- a/backend/app/api/routes/alerts.py +++ b/backend/app/api/routes/alerts.py @@ -4,6 +4,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.core.db import get_db from app.core.deps import get_current_user, require_roles +from app.core.errors import api_error from app.models.models import AlertDefinition, Target, User from app.schemas.alert import ( AlertDefinitionCreate, @@ -33,7 +34,7 @@ async def _validate_target_exists(db: AsyncSession, target_id: int | None) -> No return target_exists = await db.scalar(select(Target.id).where(Target.id == target_id)) if target_exists is None: - raise HTTPException(status_code=404, detail="Target not found") + raise HTTPException(status_code=404, detail=api_error("target_not_found", "Target not found")) @router.get("/status", response_model=AlertStatusResponse) @@ -101,7 +102,7 @@ async def update_alert_definition( ) -> AlertDefinitionOut: definition = await db.scalar(select(AlertDefinition).where(AlertDefinition.id == definition_id)) if definition is None: - raise HTTPException(status_code=404, detail="Alert definition not found") + raise HTTPException(status_code=404, detail=api_error("alert_definition_not_found", "Alert definition not found")) updates = payload.model_dump(exclude_unset=True) if "target_id" in updates: @@ -131,7 +132,7 @@ async def delete_alert_definition( ) -> dict: definition = await db.scalar(select(AlertDefinition).where(AlertDefinition.id == definition_id)) if definition is None: - raise HTTPException(status_code=404, detail="Alert definition not found") + raise HTTPException(status_code=404, detail=api_error("alert_definition_not_found", "Alert definition not found")) await db.delete(definition) await db.commit() invalidate_alert_cache() @@ -148,7 +149,7 @@ async def test_alert_definition( _ = user target = await db.scalar(select(Target).where(Target.id == payload.target_id)) if target is None: - raise HTTPException(status_code=404, detail="Target not found") + raise HTTPException(status_code=404, detail=api_error("target_not_found", "Target not found")) try: value = await run_scalar_sql_for_target(target, payload.sql_text) return AlertDefinitionTestResponse(ok=True, value=value) diff --git a/backend/app/api/routes/auth.py b/backend/app/api/routes/auth.py index 32d3d98..ed97cea 100644 --- a/backend/app/api/routes/auth.py +++ b/backend/app/api/routes/auth.py @@ -5,6 +5,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.core.config import get_settings from app.core.db import get_db from app.core.deps import get_current_user +from app.core.errors import api_error from app.core.security import create_access_token, create_refresh_token, verify_password from app.models.models import User from app.schemas.auth import LoginRequest, RefreshRequest, TokenResponse @@ -19,7 +20,10 @@ settings = get_settings() async def login(payload: LoginRequest, db: AsyncSession = Depends(get_db)) -> TokenResponse: user = await db.scalar(select(User).where(User.email == payload.email)) if not user or not verify_password(payload.password, user.password_hash): - raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid credentials") + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail=api_error("invalid_credentials", "Invalid credentials"), + ) await write_audit_log(db, action="auth.login", user_id=user.id, payload={"email": user.email}) return TokenResponse(access_token=create_access_token(str(user.id)), refresh_token=create_refresh_token(str(user.id))) @@ -30,14 +34,23 @@ async def refresh(payload: RefreshRequest, db: AsyncSession = Depends(get_db)) - try: token_payload = jwt.decode(payload.refresh_token, settings.jwt_secret_key, algorithms=[settings.jwt_algorithm]) except jwt.InvalidTokenError as exc: - raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid refresh token") from exc + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail=api_error("invalid_refresh_token", "Invalid refresh token"), + ) from exc if token_payload.get("type") != "refresh": - raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid refresh token type") + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail=api_error("invalid_refresh_token_type", "Invalid refresh token type"), + ) user_id = token_payload.get("sub") user = await db.scalar(select(User).where(User.id == int(user_id))) if not user: - raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="User not found") + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail=api_error("user_not_found", "User not found"), + ) await write_audit_log(db, action="auth.refresh", user_id=user.id, payload={}) return TokenResponse(access_token=create_access_token(str(user.id)), refresh_token=create_refresh_token(str(user.id))) diff --git a/backend/app/api/routes/me.py b/backend/app/api/routes/me.py index 0b43821..14cd815 100644 --- a/backend/app/api/routes/me.py +++ b/backend/app/api/routes/me.py @@ -2,6 +2,7 @@ from fastapi import APIRouter, Depends, HTTPException, status from sqlalchemy.ext.asyncio import AsyncSession from app.core.db import get_db from app.core.deps import get_current_user +from app.core.errors import api_error from app.core.security import hash_password, verify_password from app.models.models import User from app.schemas.user import UserOut, UserPasswordChange @@ -22,10 +23,16 @@ async def change_password( db: AsyncSession = Depends(get_db), ) -> dict: if not verify_password(payload.current_password, user.password_hash): - raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Current password is incorrect") + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=api_error("invalid_current_password", "Current password is incorrect"), + ) if verify_password(payload.new_password, user.password_hash): - raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="New password must be different") + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=api_error("password_reuse_not_allowed", "New password must be different"), + ) user.password_hash = hash_password(payload.new_password) await db.commit() diff --git a/backend/app/api/routes/targets.py b/backend/app/api/routes/targets.py index 6ab0322..ea437a7 100644 --- a/backend/app/api/routes/targets.py +++ b/backend/app/api/routes/targets.py @@ -8,6 +8,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.core.db import get_db from app.core.deps import get_current_user, require_roles +from app.core.errors import api_error from app.models.models import Metric, QueryStat, Target, TargetOwner, User from app.schemas.metric import MetricOut, QueryStatOut from app.schemas.overview import DatabaseOverviewOut @@ -85,7 +86,10 @@ async def _discover_databases(payload: TargetCreate) -> list[str]: ) return [row["datname"] for row in rows if row["datname"]] except Exception as exc: - raise HTTPException(status_code=400, detail=f"Database discovery failed: {exc}") + raise HTTPException( + status_code=400, + detail=api_error("database_discovery_failed", "Database discovery failed", {"error": str(exc)}), + ) finally: if conn: await conn.close() @@ -131,7 +135,10 @@ async def test_target_connection( version = await conn.fetchval("SHOW server_version") return {"ok": True, "message": "Connection successful", "server_version": version} except Exception as exc: - raise HTTPException(status_code=400, detail=f"Connection failed: {exc}") + raise HTTPException( + status_code=400, + detail=api_error("connection_test_failed", "Connection test failed", {"error": str(exc)}), + ) finally: if conn: await conn.close() @@ -147,7 +154,10 @@ async def create_target( if owner_ids: owners_exist = (await db.scalars(select(User.id).where(User.id.in_(owner_ids)))).all() if len(set(owners_exist)) != len(owner_ids): - raise HTTPException(status_code=400, detail="One or more owner users were not found") + raise HTTPException( + status_code=400, + detail=api_error("owner_users_not_found", "One or more owner users were not found"), + ) encrypted_password = encrypt_secret(payload.password) created_targets: list[Target] = [] @@ -155,7 +165,10 @@ async def create_target( if payload.discover_all_databases: databases = await _discover_databases(payload) if not databases: - raise HTTPException(status_code=400, detail="No databases discovered on target") + raise HTTPException( + status_code=400, + detail=api_error("no_databases_discovered", "No databases discovered on target"), + ) group_id = str(uuid4()) base_tags = payload.tags or {} for dbname in databases: @@ -194,7 +207,10 @@ async def create_target( await _set_target_owners(db, target.id, owner_ids, user.id) if not created_targets: - raise HTTPException(status_code=400, detail="All discovered databases already exist as targets") + raise HTTPException( + status_code=400, + detail=api_error("all_discovered_databases_exist", "All discovered databases already exist as targets"), + ) await db.commit() for item in created_targets: await db.refresh(item) @@ -247,7 +263,7 @@ async def get_target(target_id: int, user: User = Depends(get_current_user), db: _ = user target = await db.scalar(select(Target).where(Target.id == target_id)) if not target: - raise HTTPException(status_code=404, detail="Target not found") + raise HTTPException(status_code=404, detail=api_error("target_not_found", "Target not found")) owner_map = await _owners_by_target_ids(db, [target.id]) return _target_out_with_owners(target, owner_map.get(target.id, [])) @@ -261,7 +277,7 @@ async def update_target( ) -> TargetOut: target = await db.scalar(select(Target).where(Target.id == target_id)) if not target: - raise HTTPException(status_code=404, detail="Target not found") + raise HTTPException(status_code=404, detail=api_error("target_not_found", "Target not found")) updates = payload.model_dump(exclude_unset=True) owner_user_ids = updates.pop("owner_user_ids", None) @@ -273,7 +289,10 @@ async def update_target( if owner_user_ids is not None: owners_exist = (await db.scalars(select(User.id).where(User.id.in_(owner_user_ids)))).all() if len(set(owners_exist)) != len(set(owner_user_ids)): - raise HTTPException(status_code=400, detail="One or more owner users were not found") + raise HTTPException( + status_code=400, + detail=api_error("owner_users_not_found", "One or more owner users were not found"), + ) await _set_target_owners(db, target.id, owner_user_ids, user.id) await db.commit() @@ -292,12 +311,15 @@ async def set_target_owners( ) -> list[TargetOwnerOut]: target = await db.scalar(select(Target).where(Target.id == target_id)) if not target: - raise HTTPException(status_code=404, detail="Target not found") + raise HTTPException(status_code=404, detail=api_error("target_not_found", "Target not found")) owner_user_ids = sorted(set(payload.user_ids)) if owner_user_ids: owners_exist = (await db.scalars(select(User.id).where(User.id.in_(owner_user_ids)))).all() if len(set(owners_exist)) != len(set(owner_user_ids)): - raise HTTPException(status_code=400, detail="One or more owner users were not found") + raise HTTPException( + status_code=400, + detail=api_error("owner_users_not_found", "One or more owner users were not found"), + ) await _set_target_owners(db, target_id, owner_user_ids, user.id) await db.commit() await write_audit_log(db, "target.owners.update", user.id, {"target_id": target_id, "owner_user_ids": owner_user_ids}) @@ -321,7 +343,7 @@ async def get_target_owners( _ = user target = await db.scalar(select(Target).where(Target.id == target_id)) if not target: - raise HTTPException(status_code=404, detail="Target not found") + raise HTTPException(status_code=404, detail=api_error("target_not_found", "Target not found")) rows = ( await db.execute( select(User.id, User.email, User.role) @@ -341,7 +363,7 @@ async def delete_target( ) -> dict: target = await db.scalar(select(Target).where(Target.id == target_id)) if not target: - raise HTTPException(status_code=404, detail="Target not found") + raise HTTPException(status_code=404, detail=api_error("target_not_found", "Target not found")) await db.delete(target) await db.commit() await write_audit_log(db, "target.delete", user.id, {"target_id": target_id}) @@ -369,7 +391,22 @@ async def get_metrics( async def _live_conn(target: Target) -> asyncpg.Connection: - return await asyncpg.connect(dsn=build_target_dsn(target)) + try: + return await asyncpg.connect(dsn=build_target_dsn(target)) + except (OSError, asyncpg.PostgresError) as exc: + raise HTTPException( + status_code=503, + detail=api_error( + "target_unreachable", + "Target database is not reachable", + { + "target_id": target.id, + "host": target.host, + "port": target.port, + "error": str(exc), + }, + ), + ) from exc @router.get("/{target_id}/locks") @@ -377,7 +414,7 @@ async def get_locks(target_id: int, user: User = Depends(get_current_user), db: _ = user target = await db.scalar(select(Target).where(Target.id == target_id)) if not target: - raise HTTPException(status_code=404, detail="Target not found") + raise HTTPException(status_code=404, detail=api_error("target_not_found", "Target not found")) conn = await _live_conn(target) try: rows = await conn.fetch( @@ -398,7 +435,7 @@ async def get_activity(target_id: int, user: User = Depends(get_current_user), d _ = user target = await db.scalar(select(Target).where(Target.id == target_id)) if not target: - raise HTTPException(status_code=404, detail="Target not found") + raise HTTPException(status_code=404, detail=api_error("target_not_found", "Target not found")) conn = await _live_conn(target) try: rows = await conn.fetch( @@ -420,7 +457,7 @@ async def get_top_queries(target_id: int, user: User = Depends(get_current_user) _ = user target = await db.scalar(select(Target).where(Target.id == target_id)) if not target: - raise HTTPException(status_code=404, detail="Target not found") + raise HTTPException(status_code=404, detail=api_error("target_not_found", "Target not found")) if not target.use_pg_stat_statements: return [] rows = ( @@ -450,5 +487,20 @@ async def get_overview(target_id: int, user: User = Depends(get_current_user), d _ = user target = await db.scalar(select(Target).where(Target.id == target_id)) if not target: - raise HTTPException(status_code=404, detail="Target not found") - return await get_target_overview(target) + raise HTTPException(status_code=404, detail=api_error("target_not_found", "Target not found")) + try: + return await get_target_overview(target) + except (OSError, asyncpg.PostgresError) as exc: + raise HTTPException( + status_code=503, + detail=api_error( + "target_unreachable", + "Target database is not reachable", + { + "target_id": target.id, + "host": target.host, + "port": target.port, + "error": str(exc), + }, + ), + ) from exc diff --git a/backend/app/core/config.py b/backend/app/core/config.py index e54f34a..899dba8 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -2,7 +2,7 @@ from functools import lru_cache from pydantic import field_validator from pydantic_settings import BaseSettings, SettingsConfigDict -NEXAPG_VERSION = "0.1.8" +NEXAPG_VERSION = "0.2.0" class Settings(BaseSettings): diff --git a/backend/app/core/deps.py b/backend/app/core/deps.py index 0260c1b..b91a92a 100644 --- a/backend/app/core/deps.py +++ b/backend/app/core/deps.py @@ -5,6 +5,7 @@ from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession from app.core.config import get_settings from app.core.db import get_db +from app.core.errors import api_error from app.models.models import User settings = get_settings() @@ -16,27 +17,42 @@ async def get_current_user( db: AsyncSession = Depends(get_db), ) -> User: if not credentials: - raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Missing token") + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail=api_error("missing_token", "Missing token"), + ) token = credentials.credentials try: payload = jwt.decode(token, settings.jwt_secret_key, algorithms=[settings.jwt_algorithm]) except jwt.InvalidTokenError as exc: - raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid token") from exc + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail=api_error("invalid_token", "Invalid token"), + ) from exc if payload.get("type") != "access": - raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid token type") + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail=api_error("invalid_token_type", "Invalid token type"), + ) user_id = payload.get("sub") user = await db.scalar(select(User).where(User.id == int(user_id))) if not user: - raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="User not found") + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail=api_error("user_not_found", "User not found"), + ) return user def require_roles(*roles: str): async def role_dependency(user: User = Depends(get_current_user)) -> User: if user.role not in roles: - raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden") + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail=api_error("forbidden", "Forbidden"), + ) return user return role_dependency diff --git a/backend/app/core/errors.py b/backend/app/core/errors.py index a4c1f3c..60c2bbc 100644 --- a/backend/app/core/errors.py +++ b/backend/app/core/errors.py @@ -12,6 +12,14 @@ def error_payload(code: str, message: str, details: Any, request_id: str) -> dic } +def api_error(code: str, message: str, details: Any = None) -> dict[str, Any]: + return { + "code": code, + "message": message, + "details": details, + } + + def http_status_to_code(status_code: int) -> str: mapping = { 400: "bad_request", @@ -28,4 +36,3 @@ def http_status_to_code(status_code: int) -> str: 504: "gateway_timeout", } return mapping.get(status_code, f"http_{status_code}") - diff --git a/backend/app/services/alerts.py b/backend/app/services/alerts.py index 199205d..0180961 100644 --- a/backend/app/services/alerts.py +++ b/backend/app/services/alerts.py @@ -11,6 +11,7 @@ from sqlalchemy import desc, func, select from sqlalchemy.ext.asyncio import AsyncSession from app.core.config import get_settings +from app.core.errors import api_error from app.models.models import AlertDefinition, Metric, QueryStat, Target from app.schemas.alert import AlertStatusItem, AlertStatusResponse from app.services.collector import build_target_dsn @@ -144,25 +145,40 @@ def get_standard_alert_reference() -> list[dict[str, str]]: def validate_alert_thresholds(comparison: str, warning_threshold: float | None, alert_threshold: float) -> None: if comparison not in _ALLOWED_COMPARISONS: - raise HTTPException(status_code=400, detail=f"Invalid comparison. Use one of {sorted(_ALLOWED_COMPARISONS)}") + raise HTTPException( + status_code=400, + detail=api_error( + "invalid_comparison", + f"Invalid comparison. Use one of {sorted(_ALLOWED_COMPARISONS)}", + ), + ) if warning_threshold is None: return if comparison in {"gte", "gt"} and warning_threshold > alert_threshold: - raise HTTPException(status_code=400, detail="For gte/gt, warning_threshold must be <= alert_threshold") + raise HTTPException( + status_code=400, + detail=api_error("invalid_thresholds", "For gte/gt, warning_threshold must be <= alert_threshold"), + ) if comparison in {"lte", "lt"} and warning_threshold < alert_threshold: - raise HTTPException(status_code=400, detail="For lte/lt, warning_threshold must be >= alert_threshold") + raise HTTPException( + status_code=400, + detail=api_error("invalid_thresholds", "For lte/lt, warning_threshold must be >= alert_threshold"), + ) def validate_alert_sql(sql_text: str) -> str: sql = sql_text.strip().rstrip(";") lowered = sql.lower().strip() if not lowered.startswith("select"): - raise HTTPException(status_code=400, detail="Alert SQL must start with SELECT") + raise HTTPException(status_code=400, detail=api_error("invalid_alert_sql", "Alert SQL must start with SELECT")) if _FORBIDDEN_SQL_WORDS.search(lowered): - raise HTTPException(status_code=400, detail="Only read-only SELECT statements are allowed") + raise HTTPException( + status_code=400, + detail=api_error("invalid_alert_sql", "Only read-only SELECT statements are allowed"), + ) if ";" in sql: - raise HTTPException(status_code=400, detail="Only a single SQL statement is allowed") + raise HTTPException(status_code=400, detail=api_error("invalid_alert_sql", "Only a single SQL statement is allowed")) return sql diff --git a/backend/app/services/collector.py b/backend/app/services/collector.py index 51bbe3c..6befd5c 100644 --- a/backend/app/services/collector.py +++ b/backend/app/services/collector.py @@ -195,6 +195,7 @@ async def collect_once() -> None: except (OSError, SQLAlchemyError, asyncpg.PostgresError, Exception) as exc: now = datetime.now(timezone.utc) current_error = str(exc) + error_class = exc.__class__.__name__ state = _failure_state.get(target.id) if state is None: _failure_state[target.id] = { @@ -202,7 +203,13 @@ async def collect_once() -> None: "last_log_at": now, "error": current_error, } - logger.exception("collector_error target=%s err=%s", target.id, exc) + logger.warning( + "collector_target_unreachable target=%s error_class=%s err=%s consecutive_failures=%s", + target.id, + error_class, + current_error, + 1, + ) continue count = int(state.get("count", 0)) + 1 @@ -220,9 +227,10 @@ async def collect_once() -> None: if should_log: state["last_log_at"] = now state["error"] = current_error - logger.error( - "collector_error_throttled target=%s err=%s consecutive_failures=%s", + logger.warning( + "collector_target_unreachable target=%s error_class=%s err=%s consecutive_failures=%s", target.id, + error_class, current_error, count, )