checking system…
Docs / back / src/maf/doctor.py · line 1
Python · 283 lines
  1"""``python -m maf doctor`` — preflight check for every external dependency.
  2
  3Output is shaped to be **actionable**: each failed check ships a hint pointing
  4at the env var to set or the service to start. Exit code is 0 only when every
  5required check passes; non-zero otherwise so CI can gate on it.
  6
  7Required checks
  8---------------
  9* ``redis``    — Redis reachable on ``REDIS_URL``
 10* ``ollama``   — Ollama Cloud reachable + API key valid (``OLLAMA_API_KEY``)
 11* ``configs``  — every arena YAML in ``config/arenas/`` is loadable
 12* ``streams``  — the event/control/actions streams can be XADDed to
 13
 14Optional checks (degrade-but-not-fail)
 15--------------------------------------
 16* ``trtools2`` — ``engine.db.questdb`` importable
 17* ``fomo2``    — ``fomo2`` package importable
 18* ``mirofish`` — ``/health`` returns 200 on the configured base_url
 19"""
 20
 21from __future__ import annotations
 22
 23import asyncio
 24import logging
 25import os
 26import sys
 27import time
 28from typing import Any
 29
 30logger = logging.getLogger(__name__)
 31
 32
 33# ANSI colour helpers — best-effort, no dependency on a TTY detector.
 34_GREEN = "\033[32m"
 35_RED = "\033[31m"
 36_YELLOW = "\033[33m"
 37_DIM = "\033[2m"
 38_BOLD = "\033[1m"
 39_RESET = "\033[0m"
 40
 41
 42def _use_color() -> bool:
 43    return sys.stdout.isatty() and os.environ.get("NO_COLOR") is None
 44
 45
 46def _colour(s: str, code: str) -> str:
 47    return f"{code}{s}{_RESET}" if _use_color() else s
 48
 49
 50def _ok(s: str = "ok") -> str:    return _colour(s, _GREEN)
 51def _bad(s: str = "FAIL") -> str: return _colour(s, _RED)
 52def _warn(s: str = "warn") -> str: return _colour(s, _YELLOW)
 53def _dim(s: str) -> str:          return _colour(s, _DIM)
 54def _bold(s: str) -> str:         return _colour(s, _BOLD)
 55
 56
 57async def _check_redis() -> tuple[bool, str, float]:
 58    url = os.environ.get("REDIS_URL", "redis://localhost:6379/0")
 59    t0 = time.monotonic()
 60    try:
 61        import redis.asyncio as aioredis
 62        c = aioredis.from_url(url)
 63        await c.ping()
 64        try:
 65            ac = getattr(c, "aclose", None)
 66            if ac:
 67                await ac()
 68            else:
 69                await c.close()
 70        except Exception:
 71            pass
 72        return True, f"ping ok on {url}", (time.monotonic() - t0) * 1000
 73    except Exception as exc:
 74        return False, (
 75            f"{type(exc).__name__}: {exc}. "
 76            "Hint: start Redis (``docker run -p 6379:6379 redis:7``) or set REDIS_URL"
 77        ), (time.monotonic() - t0) * 1000
 78
 79
 80async def _check_ollama() -> tuple[bool, str, float]:
 81    key = os.environ.get("OLLAMA_API_KEY", "")
 82    if not key:
 83        return False, "OLLAMA_API_KEY not set. Hint: copy from Ollama Cloud → .env", 0.0
 84    t0 = time.monotonic()
 85    try:
 86        import httpx
 87        async with httpx.AsyncClient(timeout=10.0) as c:
 88            resp = await c.get(
 89                "https://ollama.com/v1/models",
 90                headers={"Authorization": f"Bearer {key}"},
 91            )
 92        ok = resp.status_code == 200
 93        if not ok:
 94            return False, (
 95                f"HTTP {resp.status_code} from ollama.com. "
 96                "Hint: verify OLLAMA_API_KEY at https://ollama.com/settings"
 97            ), (time.monotonic() - t0) * 1000
 98        models = resp.json().get("data", []) or resp.json().get("models", [])
 99        return True, f"{len(models)} models available", (time.monotonic() - t0) * 1000
100    except Exception as exc:
101        return False, (
102            f"{type(exc).__name__}: {exc}. Hint: check network / firewall"
103        ), (time.monotonic() - t0) * 1000
104
105
106def _check_configs() -> tuple[bool, str, float]:
107    t0 = time.monotonic()
108    try:
109        from maf.config import load_config
110        cfg = load_config()
111        names = [a.name for a in cfg.arenas]
112        if not names:
113            return False, (
114                "config/default.yaml loaded but no arena YAMLs found. "
115                "Hint: drop a YAML in config/arenas/"
116            ), (time.monotonic() - t0) * 1000
117        return True, f"{len(names)} arenas: {', '.join(names)}", (time.monotonic() - t0) * 1000
118    except Exception as exc:
119        return False, (
120            f"{type(exc).__name__}: {exc}. Hint: check config/default.yaml YAML syntax"
121        ), (time.monotonic() - t0) * 1000
122
123
124async def _check_streams() -> tuple[bool, str, float]:
125    """Round-trip a probe through every MAF stream so XADD/XREAD is verified."""
126    from maf.config import load_config
127    cfg = load_config()
128    url = cfg.redis_url
129    streams = [
130        cfg.streams.events_stream,
131        cfg.streams.control_in,
132        cfg.streams.control_out,
133        cfg.streams.actions_out,
134    ]
135    t0 = time.monotonic()
136    try:
137        import redis.asyncio as aioredis
138        c = aioredis.from_url(url)
139        for s in streams:
140            await c.xadd(s, {"data": "{\"probe\": true}"}, maxlen=10, approximate=True)
141        # Don't XACK — these are not consumer-grouped. Just verify XADD worked.
142        try:
143            ac = getattr(c, "aclose", None)
144            if ac:
145                await ac()
146            else:
147                await c.close()
148        except Exception:
149            pass
150        return True, f"probed {len(streams)} streams ok", (time.monotonic() - t0) * 1000
151    except Exception as exc:
152        return False, (
153            f"{type(exc).__name__}: {exc}. Hint: Redis writable? Streams permissions?"
154        ), (time.monotonic() - t0) * 1000
155
156
157def _check_trtools2() -> tuple[bool, str, float]:
158    t0 = time.monotonic()
159    try:
160        import importlib
161        importlib.import_module("engine.db.questdb")
162        return True, "engine.db.questdb importable", (time.monotonic() - t0) * 1000
163    except Exception as exc:
164        return False, (
165            f"{type(exc).__name__}: {exc}. Hint: add trtools2/src to PYTHONPATH "
166            "or install with pip install -e ../trtools2"
167        ), (time.monotonic() - t0) * 1000
168
169
170def _check_fomo2() -> tuple[bool, str, float]:
171    t0 = time.monotonic()
172    try:
173        import importlib
174        importlib.import_module("fomo2")
175        return True, "fomo2 package importable", (time.monotonic() - t0) * 1000
176    except Exception as exc:
177        return False, (
178            f"{type(exc).__name__}: {exc}. "
179            "Hint: pip install -e ../fomo2 to enable knowledge + enrichment adapters"
180        ), (time.monotonic() - t0) * 1000
181
182
183async def _check_mirofish() -> tuple[bool, str, float]:
184    from maf.config import load_config
185    cfg = load_config()
186    base = "http://localhost:5101"
187    for m in cfg.modules:
188        if m.name == "mirofish":
189            base = m.config.get("base_url", base)
190            if not m.enabled:
191                return True, "disabled (skip)", 0.0
192            break
193    t0 = time.monotonic()
194    try:
195        import httpx
196        async with httpx.AsyncClient(timeout=3.0) as c:
197            resp = await c.get(f"{base}/health")
198        ok = resp.status_code == 200
199        return ok, (
200            f"{base} → HTTP {resp.status_code}"
201            if ok else
202            f"{base} → HTTP {resp.status_code}. "
203            "Hint: docker compose -f docker-compose.mirofish.yml up -d"
204        ), (time.monotonic() - t0) * 1000
205    except Exception as exc:
206        return False, (
207            f"{type(exc).__name__}: {exc}. "
208            "Hint: mirofish container down; docker compose -f docker-compose.mirofish.yml up -d"
209        ), (time.monotonic() - t0) * 1000
210
211
212async def run_all_checks() -> int:
213    """Run every doctor check and print a coloured summary.
214
215    Returns the exit code: 0 if every *required* check passed, else 1.
216    """
217    # Importing maf.config also auto-loads .env so the env-var checks below
218    # see what an in-process MAFApp would see. Do this once, up front.
219    try:
220        from maf.config import load_config
221        load_config()
222    except Exception:
223        pass
224
225    header = "\nMAF doctor — preflight checks\n"
226    print(_bold(header) if _use_color() else header)
227
228    required = [
229        ("redis",   _check_redis,   True),
230        ("ollama",  _check_ollama,  True),
231        ("configs", _check_configs, True),
232        ("streams", _check_streams, True),
233    ]
234    optional = [
235        ("trtools2", _check_trtools2, False),
236        ("fomo2",    _check_fomo2,    False),
237        ("mirofish", _check_mirofish, False),
238    ]
239
240    overall_ok = True
241
242    async def _run(check: Any) -> tuple[bool, str, float]:
243        result = check()
244        if asyncio.iscoroutine(result):
245            return await result
246        return result  # type: ignore[return-value]
247
248    print(_dim("required:"))
249    for name, check, _req in required:
250        ok, msg, ms = await _run(check)
251        marker = _ok("[✓]") if ok else _bad("[✗]")
252        print(f"  {marker} {name:<10} {_dim(f'({ms:.0f}ms)'):<14} {msg}")
253        if not ok:
254            overall_ok = False
255
256    print()
257    print(_dim("optional:"))
258    for name, check, _req in optional:
259        ok, msg, ms = await _run(check)
260        marker = _ok("[✓]") if ok else _warn("[!]")
261        print(f"  {marker} {name:<10} {_dim(f'({ms:.0f}ms)'):<14} {msg}")
262
263    print()
264    if overall_ok:
265        print(_ok(_bold("OK")) + " — engine is ready. Try:")
266        print("  python -m maf --dashboard          (web UI at :8420)")
267        print("  python -m maf --arena mastermind --ticker NVDA")
268        print("  python -m maf                      (long-running service)")
269        return 0
270    else:
271        print(
272            _bad(_bold("NOT READY"))
273            + " — fix the [✗] lines above and re-run ``python -m maf doctor``."
274        )
275        return 1
276
277
278def main() -> int:
279    return asyncio.run(run_all_checks())
280
281
282__all__ = ["main", "run_all_checks"]