ems/scripts/analysis/join_inverter_ote_snapshot.py

#!/usr/bin/env python3
"""Join Deye inverter export (wide xlsx) with OTE 15min sell prices for BA81-style analysis.

OTE CSV: regenerate from EMS DB (MCP or psql), example:

  SELECT string_agg(
    to_char((interval_start AT TIME ZONE 'Europe/Prague')::date, 'YYYY-MM-DD') || ',' ||
    to_char(interval_start AT TIME ZONE 'Europe/Prague', 'HH24:MI') || ',' ||
    trim(to_char(sell_raw_price_czk_kwh, 'FM9999990.0000')),
    chr(10) ORDER BY interval_start
  )
  FROM ems.market_interval_price
  WHERE market_source = 'OTE_CZ'
    AND (interval_start AT TIME ZONE 'Europe/Prague')::date IN (...);

Convention in sample logs: negative Battery Power(W) ≈ charging, positive ≈ discharging.
Total Grid Power(W): small positive ≈ little/no export (sign per site firmware).

Requires: openpyxl. Use read_only=False (these exports report max_row=1 in read_only mode).
"""
from __future__ import annotations

import argparse
import statistics as st
from collections import defaultdict
from datetime import datetime
from pathlib import Path

import openpyxl

COLS = [
    "Time",
    "Total Solar Power(W)",
    "Total Inverter Output Power(W)",
    "Total Grid Power(W)",
    "Battery Power(W)",
    "SoC(%)",
]


def load_ote_csv(path: Path) -> dict[tuple[str, str], float]:
    ote: dict[tuple[str, str], float] = {}
    for line in path.read_text().splitlines():
        line = line.strip()
        if not line:
            continue
        d, hm, s = line.split(",")
        ote[(d, hm)] = float(s)
    return ote


def floor_15(dt: datetime) -> datetime:
    m = (dt.minute // 15) * 15
    return dt.replace(minute=m, second=0, microsecond=0)


def slot_key(dt: datetime) -> tuple[str, str]:
    f = floor_15(dt)
    return f.strftime("%Y-%m-%d"), f.strftime("%H:%M")


def load_inverter_rows(fp: Path) -> list[dict[str, object]]:
    wb = openpyxl.load_workbook(fp, read_only=False, data_only=True)
    ws = wb.active
    it = ws.iter_rows(values_only=True)
    header = next(it)
    idx = {str(h).strip(): i for i, h in enumerate(header) if h}
    rows: list[dict[str, object]] = []
    for r in it:
        if not r or r[idx["Time"]] is None:
            continue
        rows.append({c: r[idx[c]] for c in COLS})
    wb.close()
    return rows


def main() -> None:
    p = argparse.ArgumentParser(description=__doc__)
    p.add_argument("--ote-csv", type=Path, required=True)
    p.add_argument("xlsx", type=Path, nargs="+")
    args = p.parse_args()
    ote = load_ote_csv(args.ote_csv)

    for fp in args.xlsx:
        data = load_inverter_rows(fp)
        neg: list[tuple[float, datetime, dict]] = []
        for r in data:
            t = r["Time"]
            if isinstance(t, str):
                t = datetime.strptime(t, "%Y/%m/%d %H:%M:%S")
            dk, hm = slot_key(t)
            sell = ote.get((dk, hm))
            if sell is None or sell >= 0:
                continue
            neg.append((sell, t, r))

        print(f"\n=== {fp.name}  rows={len(data)}  OTE sell<0 samples={len(neg)}")
        if not neg:
            continue
        socs = [float(x[2]["SoC(%)"]) for x in neg]
        grids = [float(x[2]["Total Grid Power(W)"]) for x in neg]
        bats = [float(x[2]["Battery Power(W)"]) for x in neg]
        sols = [float(x[2]["Total Solar Power(W)"]) for x in neg]
        print(f"  SoC %:   mean={st.mean(socs):.1f}  min={min(socs):.0f}  max={max(socs):.0f}")
        print(f"  Grid W:  mean={st.mean(grids):.0f}  med={st.median(grids):.0f}")
        print(f"  Bat W:   mean={st.mean(bats):.0f}  med={st.median(bats):.0f}")
        print(f"  Solar W: mean={st.mean(sols):.0f}  med={st.median(sols):.0f}")

        buckets: dict[str, list] = defaultdict(list)
        for sell, t, r in neg:
            if t.hour < 9 or t.hour > 18:
                continue
            _, hm = slot_key(t)
            buckets[hm].append((sell, r))
        print("  15min buckets (OTE<0, 09-18h) medians:")
        for hm in sorted(buckets.keys()):
            b = buckets[hm]
            sell = b[0][0]
            socs_b = [float(x[1]["SoC(%)"]) for x in b]
            print(
                f"    {hm}  sell={sell:+.3f}  n={len(b):2d}  "
                f"SoC_med={st.median(socs_b):.0f}%  "
                f"Pgrid_med={st.median([float(x[1]['Total Grid Power(W)']) for x in b]):.0f}W  "
                f"Psol_med={st.median([float(x[1]['Total Solar Power(W)']) for x in b]):.0f}W"
            )


if __name__ == "__main__":
    main()