2vzorky pro korekci predikce
Some checks failed
CI and deploy / migration-check (push) Failing after 11s
CI and deploy / deploy (push) Has been skipped

This commit is contained in:
Dusan Vojacek
2026-04-22 21:34:44 +02:00
parent 5a66cfa63f
commit 3cd8e44d37
2 changed files with 224 additions and 2 deletions

View File

@@ -11,7 +11,7 @@ create or replace function ems.fn_pv_forecast_delta_profile(
p_data_to timestamptz default now(),
p_half_life_days numeric default 14,
p_threshold_w int default 150,
p_top_n_days int default null,
p_top_n_days int default 2,
p_non_top_day_factor numeric default 0.02,
p_day_weight_gamma numeric default 1.0
)
@@ -227,4 +227,4 @@ as $fn$
$fn$;
comment on function ems.fn_pv_forecast_delta_profile is
'Aditivní delta profil chyby PV forecastu po 15min slotu dne (96 slotů). Zdroj: forecast_accuracy, vážení exp(-age/half_life_days) * day_weight (clear-ish dny) * volitelně top_n_days (jen N nejlepších kalendářních dní podle w_energy*w_smooth, ostatní ztlumené) * power(day_weight, day_weight_gamma). Vrací JSON {deltas:[{slot_of_day, delta_w, sample_count}], ...}. Cutoff dat od 2026-04-12 Europe/Prague.';
'Aditivní delta profil chyby PV forecastu po 15min slotu dne (96 slotů). Zdroj: forecast_accuracy, vážení exp(-age/half_life_days) * day_weight (clear-ish dny) * top_n_days (default 3 = jen 3 nejlepší kalendářní dny podle w_energy*w_smooth, ostatní ztlumené non_top_day_factor; explicitní NULL = tier vypnut, váží se všechny dny) * power(day_weight, day_weight_gamma). Vrací JSON {deltas:[{slot_of_day, delta_w, sample_count}], ...}. Cutoff dat od 2026-04-12 Europe/Prague.';

View File

@@ -0,0 +1,222 @@
-- Diagnostika: z kterých kalendářních dní (Europe/Prague) se skládá váha pro delta profil
-- (stejná logika jako ems.fn_pv_forecast_delta_profile: best → slots → day_stats → day_rank → váhy w).
--
-- Uprav params (site_id, okno, half_life, threshold, top_n_days / non_top / gamma) a spusť v psql.
-- Jedna řádka = jeden kalendářní den v okně; p_top_n_days mění tier u vah (ne počet řádků).
WITH params AS (
SELECT
2::int AS site_id,
(now() - interval '60 days')::timestamptz AS p_data_from,
now()::timestamptz AS p_data_to,
14::numeric AS half_life_days,
150::int AS threshold_w,
NULL::int AS p_top_n_days,
0.02::numeric AS p_non_top_day_factor,
1.0::numeric AS p_day_weight_gamma
),
tz AS (
SELECT coalesce(nullif(trim(s.timezone), ''), 'Europe/Prague') AS tz_name
FROM ems.site s
JOIN params p ON s.id = p.site_id
),
cutoff AS (
SELECT timestamptz '2026-04-11T22:00:00Z' AS min_ts
),
bounds AS (
SELECT
greatest(p.p_data_from, p.p_data_to - interval '120 days', (SELECT min_ts FROM cutoff)) AS ts_from,
p.p_data_to AS ts_to,
greatest(p.half_life_days, 1) AS half_life_days,
greatest(p.threshold_w, 0) AS threshold_w
FROM params p
),
best AS (
SELECT
fa.interval_start,
fa.pv_array_id,
fa.forecast_power_w,
fa.actual_power_w,
fa.forecast_created_at,
row_number() OVER (
PARTITION BY fa.interval_start, fa.pv_array_id
ORDER BY fa.forecast_created_at DESC
) AS rn
FROM ems.forecast_accuracy fa
CROSS JOIN bounds b
JOIN params p ON fa.site_id = p.site_id
WHERE fa.interval_start >= b.ts_from
AND fa.interval_start < b.ts_to
AND fa.actual_power_w IS NOT NULL
AND fa.forecast_created_at <= fa.interval_start
),
slots AS (
SELECT
b.interval_start,
sum(b.forecast_power_w)::numeric AS forecast_total_w,
sum(b.actual_power_w)::numeric AS actual_total_w,
(
(extract(hour FROM (b.interval_start AT TIME ZONE tz.tz_name))::int * 60)
+ extract(minute FROM (b.interval_start AT TIME ZONE tz.tz_name))::int
) / 15 AS slot_of_day,
(b.interval_start AT TIME ZONE tz.tz_name)::date AS day_local,
extract(epoch FROM (now() - b.interval_start)) / 86400.0 AS age_days
FROM best b
CROSS JOIN tz
WHERE b.rn = 1
GROUP BY b.interval_start, slot_of_day, day_local, tz.tz_name
),
day_energy AS (
SELECT s.day_local, sum(s.actual_total_w)::numeric / 4000.0 AS energy_kwh
FROM slots s
GROUP BY s.day_local
),
ref AS (
SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY de.energy_kwh) AS med_kwh
FROM day_energy de
),
slot_steps AS (
SELECT
s.*,
lag(s.actual_total_w) OVER (PARTITION BY s.day_local ORDER BY s.interval_start) AS prev_actual_w
FROM slots s
WHERE s.slot_of_day BETWEEN 20 AND 80
AND s.actual_total_w > (SELECT threshold_w FROM bounds)
),
day_jump AS (
SELECT
ss.day_local,
percentile_cont(0.5) WITHIN GROUP (ORDER BY abs(ss.actual_total_w - ss.prev_actual_w)) AS med_jump_w
FROM slot_steps ss
WHERE ss.prev_actual_w IS NOT NULL
GROUP BY ss.day_local
),
day_med AS (
SELECT
s.day_local,
percentile_cont(0.5) WITHIN GROUP (ORDER BY s.actual_total_w) AS p50_actual_w
FROM slots s
WHERE s.actual_total_w > (SELECT threshold_w FROM bounds)
GROUP BY s.day_local
),
day_stats AS (
SELECT
de.day_local,
de.energy_kwh,
dj.med_jump_w,
dm.p50_actual_w,
CASE
WHEN (SELECT med_kwh FROM ref) IS NULL OR (SELECT med_kwh FROM ref) <= 0 THEN 0.5
ELSE greatest(
0.0,
least(
1.0,
(de.energy_kwh - (SELECT med_kwh FROM ref) * 0.55)
/ nullif((SELECT med_kwh FROM ref) * 0.35, 0)
)
)
END AS w_energy,
CASE
WHEN dj.med_jump_w IS NULL OR dm.p50_actual_w IS NULL THEN 0.35
ELSE greatest(
0.0,
least(
1.0,
1.0
- (
dj.med_jump_w
/ nullif(greatest(300.0, dm.p50_actual_w * 0.25), 0)
)
)
)
END AS w_smooth
FROM day_energy de
LEFT JOIN day_jump dj ON dj.day_local = de.day_local
LEFT JOIN day_med dm ON dm.day_local = de.day_local
),
day_rank AS (
SELECT
ds.day_local,
row_number() OVER (
ORDER BY
(coalesce(ds.w_energy, 0.35) * coalesce(ds.w_smooth, 0.35)) DESC,
ds.day_local DESC
) AS rn,
(coalesce(ds.w_energy, 0.35) * coalesce(ds.w_smooth, 0.35)) AS day_score
FROM day_stats ds
),
filtered AS (
SELECT
s.day_local,
s.slot_of_day,
exp(-s.age_days / nullif((SELECT half_life_days FROM bounds), 0))
* (
CASE
WHEN (SELECT p_top_n_days FROM params) IS NULL THEN 1::numeric
WHEN (SELECT p_top_n_days FROM params) < 1 THEN 1::numeric
WHEN dr.rn <= (SELECT p_top_n_days FROM params) THEN 1::numeric
ELSE greatest(
0::numeric,
least(1::numeric, coalesce((SELECT p_non_top_day_factor FROM params), 0.02))
)
END
)
* (
0.05
+ 0.95
* power(
greatest(
0.0,
least(1.0, coalesce(ds.w_energy, 0.35) * coalesce(ds.w_smooth, 0.35))
),
greatest(
0.25,
least(coalesce((SELECT p_day_weight_gamma FROM params), 1.0), 8.0)
)
)
) AS w
FROM slots s
CROSS JOIN bounds b
LEFT JOIN day_stats ds ON ds.day_local = s.day_local
LEFT JOIN day_rank dr ON dr.day_local = s.day_local
WHERE s.slot_of_day BETWEEN 0 AND 95
AND (s.actual_total_w > b.threshold_w OR s.forecast_total_w > b.threshold_w)
),
by_day AS (
SELECT day_local, sum(w) AS w_sum, count(*)::bigint AS slot_rows
FROM filtered
GROUP BY day_local
),
tot AS (
SELECT sum(w_sum) AS w_tot FROM by_day
)
SELECT
dr.rn AS day_rank,
ds.day_local,
round(ds.energy_kwh::numeric, 2) AS energy_kwh,
round(ds.w_energy::numeric, 3) AS w_energy,
round(ds.w_smooth::numeric, 3) AS w_smooth,
round(dr.day_score::numeric, 4) AS day_score,
round(coalesce(bd.w_sum, 0)::numeric, 2) AS sum_w_in_filtered,
coalesce(bd.slot_rows, 0::bigint) AS slot_rows,
round(
(100.0 * coalesce(bd.w_sum, 0) / nullif((SELECT w_tot FROM tot), 0))::numeric,
2
) AS pct_of_total_weight
FROM day_stats ds
JOIN day_rank dr ON dr.day_local = ds.day_local
LEFT JOIN by_day bd ON bd.day_local = ds.day_local
ORDER BY dr.rn;
-- Shrnutí okna + výstup funkce (stejné parametry jako v params výše):
-- SELECT (ems.fn_pv_forecast_delta_profile(
-- (SELECT site_id FROM params),
-- (SELECT p_data_from FROM params),
-- (SELECT p_data_to FROM params),
-- (SELECT half_life_days FROM params),
-- (SELECT threshold_w FROM params),
-- (SELECT p_top_n_days FROM params),
-- (SELECT p_non_top_day_factor FROM params),
-- (SELECT p_day_weight_gamma FROM params)
-- ))->'data_from' AS data_from,
-- (ems.fn_pv_forecast_delta_profile(...))->'data_to' AS data_to;