import asyncio import logging from datetime import datetime, timezone from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.exc import SQLAlchemyError from app.core.config import get_settings from app.core.db import SessionLocal from app.models.models import Metric, QueryStat, Target from app.services.crypto import decrypt_secret import asyncpg logger = logging.getLogger(__name__) settings = get_settings() def build_target_dsn(target: Target) -> str: password = decrypt_secret(target.encrypted_password) return ( f"postgresql://{target.username}:{password}" f"@{target.host}:{target.port}/{target.dbname}?sslmode={target.sslmode}" ) async def _store_metric(db: AsyncSession, target_id: int, name: str, value: float, labels: dict | None = None) -> None: db.add( Metric( target_id=target_id, ts=datetime.now(timezone.utc), metric_name=name, value=float(value), labels=labels or {}, ) ) async def collect_target(target: Target) -> None: dsn = build_target_dsn(target) conn = await asyncpg.connect(dsn=dsn) try: stat_db = await conn.fetchrow( """ SELECT numbackends, xact_commit, xact_rollback, blks_hit, blks_read, tup_returned, tup_fetched FROM pg_stat_database WHERE datname = current_database() """ ) activity = await conn.fetchrow( """ SELECT count(*) FILTER (WHERE state = 'active') AS active_connections, count(*) AS total_connections FROM pg_stat_activity WHERE datname = current_database() """ ) bgwriter = await conn.fetchrow( """ SELECT checkpoints_timed, checkpoints_req, buffers_checkpoint, buffers_clean, maxwritten_clean FROM pg_stat_bgwriter """ ) if stat_db is None: stat_db = { "numbackends": 0, "xact_commit": 0, "xact_rollback": 0, "blks_hit": 0, "blks_read": 0, "tup_returned": 0, "tup_fetched": 0, } if activity is None: activity = {"active_connections": 0, "total_connections": 0} if bgwriter is None: bgwriter = { "checkpoints_timed": 0, "checkpoints_req": 0, "buffers_checkpoint": 0, "buffers_clean": 0, "maxwritten_clean": 0, } lock_count = await conn.fetchval("SELECT count(*) FROM pg_locks") cache_hit_ratio = 0.0 if stat_db and (stat_db["blks_hit"] + stat_db["blks_read"]) > 0: cache_hit_ratio = stat_db["blks_hit"] / (stat_db["blks_hit"] + stat_db["blks_read"]) query_rows = [] try: query_rows = await conn.fetch( """ SELECT queryid::text, calls, total_exec_time, mean_exec_time, rows, left(query, 2000) AS query_text FROM pg_stat_statements ORDER BY total_exec_time DESC LIMIT 20 """ ) except Exception: # Extension may be disabled on monitored instance. query_rows = [] async with SessionLocal() as db: await _store_metric(db, target.id, "connections_total", activity["total_connections"], {}) await _store_metric(db, target.id, "connections_active", activity["active_connections"], {}) await _store_metric(db, target.id, "xacts_total", stat_db["xact_commit"] + stat_db["xact_rollback"], {}) await _store_metric(db, target.id, "cache_hit_ratio", cache_hit_ratio, {}) await _store_metric(db, target.id, "locks_total", lock_count, {}) await _store_metric(db, target.id, "checkpoints_timed", bgwriter["checkpoints_timed"], {}) await _store_metric(db, target.id, "checkpoints_req", bgwriter["checkpoints_req"], {}) for row in query_rows: db.add( QueryStat( target_id=target.id, ts=datetime.now(timezone.utc), queryid=row["queryid"] or "0", calls=row["calls"] or 0, total_time=row["total_exec_time"] or 0.0, mean_time=row["mean_exec_time"] or 0.0, rows=row["rows"] or 0, query_text=row["query_text"], ) ) await db.commit() finally: await conn.close() async def collect_once() -> None: async with SessionLocal() as db: targets = (await db.scalars(select(Target))).all() for target in targets: try: await collect_target(target) except (OSError, SQLAlchemyError, asyncpg.PostgresError, Exception) as exc: logger.exception("collector_error target=%s err=%s", target.id, exc) async def collector_loop(stop_event: asyncio.Event) -> None: while not stop_event.is_set(): await collect_once() try: await asyncio.wait_for(stop_event.wait(), timeout=settings.poll_interval_seconds) except asyncio.TimeoutError: pass