checking system…
Docs / back / src/maf/config.py · line 182
Python · 414 lines
  1"""Configuration management — Pydantic models loaded from YAML with ${ENV_VAR} interpolation."""
  2
  3from __future__ import annotations
  4
  5import logging
  6import os
  7import re
  8from pathlib import Path
  9from typing import Any, Literal
 10
 11import yaml
 12from pydantic import BaseModel
 13
 14logger = logging.getLogger(__name__)
 15
 16# Auto-load .env
 17try:
 18    from dotenv import load_dotenv
 19
 20    for p in [Path.cwd() / ".env", Path(__file__).parent.parent.parent / ".env"]:
 21        if p.exists():
 22            load_dotenv(p)
 23            break
 24except ImportError:
 25    pass
 26
 27_ENV_VAR_RE = re.compile(r"\$\{(\w+)}")
 28
 29
 30def _interpolate_env(value: Any) -> Any:
 31    """Recursively replace ${ENV_VAR} placeholders with environment values."""
 32    if isinstance(value, str):
 33
 34        def _replace(m: re.Match[str]) -> str:
 35            var_name = m.group(1)
 36            val = os.environ.get(var_name)
 37            if val is None:
 38                logger.warning("Environment variable %s is not set (used in config)", var_name)
 39                return ""
 40            return val
 41
 42        return _ENV_VAR_RE.sub(_replace, value)
 43    if isinstance(value, dict):
 44        return {k: _interpolate_env(v) for k, v in value.items()}
 45    if isinstance(value, list):
 46        return [_interpolate_env(v) for v in value]
 47    return value
 48
 49
 50# ---------------------------------------------------------------------------
 51# LLM configuration
 52# ---------------------------------------------------------------------------
 53
 54
 55class LLMProviderConfig(BaseModel):
 56    api_key: str = ""
 57    model: str = ""
 58    base_url: str = ""
 59    max_output_tokens: int = 4096
 60    temperature: float = 0.4
 61
 62
 63class LLMConfig(BaseModel):
 64    default_provider: str = "anthropic"
 65    providers: dict[str, LLMProviderConfig] = {}
 66    quick_provider: str | None = None
 67    quick_model: str | None = None
 68    deep_provider: str | None = None
 69    deep_model: str | None = None
 70
 71
 72# ---------------------------------------------------------------------------
 73# Memory configuration
 74# ---------------------------------------------------------------------------
 75
 76
 77class MemoryInstanceConfig(BaseModel):
 78    name: str
 79    type: Literal["bm25", "chroma", "hybrid"] = "bm25"
 80    persist_path: str = ""
 81
 82
 83class MemoryConfig(BaseModel):
 84    instances: list[MemoryInstanceConfig] = []
 85    chromadb_path: str = "./data/chromadb"
 86    embedding_provider: str = "gemini"
 87    embedding_model: str = "text-embedding-004"
 88
 89
 90# ---------------------------------------------------------------------------
 91# Source configuration
 92# ---------------------------------------------------------------------------
 93
 94
 95# ---------------------------------------------------------------------------
 96# Target abstraction
 97# ---------------------------------------------------------------------------
 98#
 99# Every arena runs over a target. Until now ``target_key`` was the only
100# discriminator and most arenas defaulted to "ticker". That made the
101# framework feel ticker-centric even though it was always generic.
102#
103# The new model: arenas, agents, and source bindings can declare which
104# target types they apply to. ``applicable_target_types: []`` (the
105# default) means "applies to everything" — backwards-compatible.
106#
107# Canonical target types (extend as needed; we don't validate the list):
108#
109#   ticker      — a single tradeable instrument (NVDA, BTC-USD, …)
110#   sector      — a sector or thematic basket ("AI semis", with peers)
111#   tickers     — a peer set without a sector framing (basket trade)
112#   question    — a free-text question to deliberate on
113#   document    — a document / RFC / paper to review
114#   event       — a news event or earnings event id
115#   deal        — an M&A / PE deal under diligence
116#   free_text   — anything else; the intent_router classifies it
117
118
119TARGET_TYPES = [
120    "ticker", "sector", "tickers", "question", "document",
121    "event", "deal", "free_text",
122]
123
124
125class Target(BaseModel):
126    """Typed wrapper around an arena's run target.
127
128    Most arena runs still receive ``target`` as a plain dict (legacy);
129    :func:`Target.from_dict` reconstructs a Target from that dict so
130    the auto-prune logic at :meth:`Arena.run` has a typed view.
131
132    Fields:
133        type:           One of :data:`TARGET_TYPES`. Falls back to
134                        ``"free_text"`` when unknown.
135        primary_id:     The main identifier — e.g. ``NVDA`` for ticker,
136                        ``AI semiconductors`` for sector, a question_id, etc.
137        secondary_ids:  Optional related ids — e.g. tickers in a sector.
138        metadata:       Free-form extras (angle, date, …).
139    """
140
141    type: str = "free_text"
142    primary_id: str = ""
143    secondary_ids: list[str] = []
144    metadata: dict[str, Any] = {}
145
146    @classmethod
147    def from_dict(cls, raw: dict[str, Any] | None) -> "Target":
148        if not raw:
149            return cls()
150        # Already typed? trust it.
151        if "type" in raw and "primary_id" in raw:
152            return cls(**raw)
153        # Legacy shapes — infer type from the first matching field.
154        if "ticker" in raw:
155            return cls(
156                type="ticker",
157                primary_id=str(raw["ticker"]),
158                metadata={k: v for k, v in raw.items() if k != "ticker"},
159            )
160        if "sector" in raw:
161            tickers = raw.get("tickers") or []
162            return cls(
163                type="sector",
164                primary_id=str(raw["sector"]),
165                secondary_ids=list(tickers) if isinstance(tickers, (list, tuple)) else [],
166                metadata={k: v for k, v in raw.items() if k not in ("sector", "tickers")},
167            )
168        if "tickers" in raw:
169            t = raw["tickers"]
170            ids = list(t) if isinstance(t, (list, tuple)) else [str(t)]
171            return cls(type="tickers", primary_id=ids[0] if ids else "",
172                       secondary_ids=ids[1:], metadata={})
173        if "question_id" in raw or "question" in raw:
174            qid = raw.get("question_id") or raw.get("question") or ""
175            return cls(type="question", primary_id=str(qid),
176                       metadata={k: v for k, v in raw.items()
177                                 if k not in ("question_id", "question")})
178        if "document_id" in raw:
179            return cls(type="document", primary_id=str(raw["document_id"]),
180                       metadata={k: v for k, v in raw.items() if k != "document_id"})
181        if "event_id" in raw:
182            return cls(type="event", primary_id=str(raw["event_id"]),
183                       metadata={k: v for k, v in raw.items() if k != "event_id"})
184        if "deal_id" in raw:
185            return cls(type="deal", primary_id=str(raw["deal_id"]),
186                       metadata={k: v for k, v in raw.items() if k != "deal_id"})
187        # Fall through — free_text with the whole dict in metadata.
188        return cls(type="free_text", primary_id=str(raw.get("text") or ""),
189                   metadata=raw)
190
191
192class SourceBinding(BaseModel):
193    name: str
194    adapter: str  # "questdb", "fomo2", "web_search", "alpaca", "custom"
195    config: dict[str, Any] = {}
196    # Which target types this binding is useful for. Empty = applies to
197    # all targets (legacy default).
198    applicable_target_types: list[str] = []
199
200
201# ---------------------------------------------------------------------------
202# Agent configuration
203# ---------------------------------------------------------------------------
204
205
206class ToolConfig(BaseModel):
207    name: str
208    source: str  # source name from arena's source bindings
209    description: str = ""
210    params: dict[str, Any] = {}
211
212
213class AgentConfig(BaseModel):
214    name: str
215    role: Literal[
216        "analyst", "specialist", "debater", "judge", "synthesis",
217        "executor", "watcher", "replan",
218    ]
219    system_prompt: str = ""
220    system_prompt_file: str | None = None
221    sources: list[str] = []
222    tools: list[ToolConfig] = []
223    memory: str | None = None  # memory instance name
224    llm_tier: Literal["quick", "deep"] = "quick"
225    position: str | None = None  # for debaters: "bullish", "bearish", etc.
226    max_react_steps: int = 5  # max tool-calling iterations in ReAct loop
227    extra: dict[str, Any] = {}
228    # Which target types this agent contributes to. Empty = applies to
229    # all targets. Auto-prune at runtime skips agents whose list doesn't
230    # include the current target.type.
231    applicable_target_types: list[str] = []
232
233
234# ---------------------------------------------------------------------------
235# Phase configuration
236# ---------------------------------------------------------------------------
237
238
239class PhaseConfig(BaseModel):
240    name: str
241    pattern: Literal["parallel", "sequential", "debate"]
242    agents: list[AgentConfig]
243    max_rounds: int = 1  # for debate pattern
244    transition: str = "END"  # next phase name or "END"
245    signal_extract: bool = False  # run SignalProcessor on judge output
246
247
248# ---------------------------------------------------------------------------
249# Arena configuration
250# ---------------------------------------------------------------------------
251
252
253class TriggerConfig(BaseModel):
254    """One declarative trigger rule under ``arena.triggers``.
255
256    Maps to :class:`maf.triggers.dispatcher.TriggerRule` at startup.
257    """
258
259    on_stream: str
260    when: str = "True"
261    target: dict[str, Any] = {}
262    cooldown_s: int = 60
263    action_mode: Literal["auto", "semi", "manual"] = "manual"
264    name: str = ""
265
266
267class ArenaConfig(BaseModel):
268    name: str
269    description: str = ""
270    phases: list[PhaseConfig] = []
271    sources: list[SourceBinding] = []
272    llm: LLMConfig = LLMConfig()
273    memory: MemoryConfig = MemoryConfig()
274    schedule: str | None = None  # cron expression or interval
275    max_iterations: int = 1  # for loop-back arenas
276    selected_analysts: list[str] | None = None  # configurable analyst selection
277    output_stream: str | None = None  # Redis stream for output
278    state_class: str | None = None  # dotted path to custom state class
279    triggers: list[TriggerConfig] = []  # stream-driven reactivity
280    # Which field in ``target`` uniquely identifies the run's subject. For
281    # trading arenas this is ``ticker`` (the default); a research-debate
282    # arena would use ``question_id``; a code-review arena ``pr_id``.
283    # MAFApp uses this to decide which outbox to publish on:
284    #   target_key == "ticker"  → maf:actions:out  (TradingAction)
285    #   otherwise               → maf:decisions:out (GenericDecision)
286    target_key: str = "ticker"
287    # Which target types this arena can handle. Empty = applies to all
288    # (legacy default). The intent_router uses this to pick which arena
289    # to dispatch a free-text request to.
290    applicable_target_types: list[str] = []
291
292
293# ---------------------------------------------------------------------------
294# App configuration
295# ---------------------------------------------------------------------------
296
297
298class ModuleConfig(BaseModel):
299    """Configuration for a data module (fomo2, trtools2, web, etc.)."""
300    name: str
301    enabled: bool = True
302    config: dict[str, Any] = {}
303
304
305class StreamsConfig(BaseModel):
306    """Redis Stream names MAF uses for its realtime layer.
307
308    See ``maf.streaming.bus.EventBus`` (outbound events), ``maf.control.inbox``
309    (inbound command stream), ``maf.actions.outbox`` (outbound trading actions).
310    Stream names are configurable so multiple MAF instances can share a Redis
311    without colliding.
312    """
313
314    events_stream: str = "maf:events"
315    control_in: str = "maf:control:in"
316    control_out: str = "maf:control:out"
317    actions_out: str = "maf:actions:out"
318    # Generic decisions from non-trading arenas (research_debate, etc.)
319    decisions_out: str = "maf:decisions:out"
320    # Inbound streams MAF can subscribe to (set in arena source bindings)
321    trtools2_bars_1m: str = "trtools2:bars:1m"
322    trtools2_bars_1h: str = "trtools2:bars:1h"
323    trtools2_news: str = "trtools2:news"
324    trtools2_indicators: str = "trtools2:indicators"
325    trtools2_strategy_events: str = "trtools2:strategy:events"
326    fomo2_enriched: str = "fomo2:enriched"
327    fomo2_reports: str = "fomo2:reports"
328    fomo2_requests: str = "fomo2:requests:in"
329    # Kronos sidecar — compact-emit stream when forecasts change significantly.
330    kronos_forecasts_emitted: str = "kronos:forecasts:emitted"
331    # MiroFish refresher — fires when a fresh crowd-sim lands.
332    mirofish_sims_emitted: str = "mirofish:sims:emitted"
333
334
335class AppConfig(BaseModel):
336    name: str = "maf"
337    log_level: str = "INFO"
338    redis_url: str = "redis://localhost:6379/0"
339    llm: LLMConfig = LLMConfig()
340    streams: StreamsConfig = StreamsConfig()
341    modules: list[ModuleConfig] = []
342    arenas: list[ArenaConfig] = []
343    arena_dir: str = "config/arenas"
344    data_dir: str = "./data"
345    max_cost_per_hour_eur: float = 10.0
346    usd_to_eur: float = 0.86
347
348
349# ---------------------------------------------------------------------------
350# Config loading
351# ---------------------------------------------------------------------------
352
353
354def load_arena_config(path: str | Path) -> ArenaConfig:
355    """Load a single arena config from a YAML file."""
356    path = Path(path)
357    with open(path) as f:
358        raw = yaml.safe_load(f)
359    if not raw:
360        raise ValueError(f"Empty arena config: {path}")
361    data = _interpolate_env(raw)
362    return ArenaConfig(**data)
363
364
365def load_config(path: str | Path | None = None) -> AppConfig:
366    """Load MAF app config from YAML with env var interpolation."""
367    if path is None:
368        candidates = [
369            Path.cwd() / "config" / "default.yaml",
370            Path.cwd() / "config.yaml",
371            Path(__file__).parent.parent.parent / "config" / "default.yaml",
372        ]
373        for c in candidates:
374            if c.exists():
375                path = c
376                break
377
378    if path is None:
379        return AppConfig()
380
381    path = Path(path)
382    if not path.exists():
383        return AppConfig()
384
385    with open(path) as f:
386        raw = yaml.safe_load(f)
387
388    if not raw:
389        return AppConfig()
390
391    data = _interpolate_env(raw)
392
393    # Flatten redis.url → redis_url
394    if "redis" in data and isinstance(data["redis"], dict):
395        data["redis_url"] = data["redis"].get("url", "redis://localhost:6379/0")
396        del data["redis"]
397
398    config = AppConfig(**data)
399
400    # Auto-discover arena YAML files
401    arena_dir = Path(config.arena_dir)
402    if arena_dir.exists():
403        for arena_file in sorted(arena_dir.glob("*.yaml")):
404            try:
405                arena_cfg = load_arena_config(arena_file)
406                # Inherit app-level LLM config if arena doesn't specify providers
407                if not arena_cfg.llm.providers:
408                    arena_cfg.llm = config.llm
409                config.arenas.append(arena_cfg)
410            except Exception:
411                logger.exception("Failed to load arena config: %s", arena_file)
412
413    return config