diff --git a/db/routines/R__078_fn_pv_forecast_delta_profile.sql b/db/routines/R__078_fn_pv_forecast_delta_profile.sql index feb1110..bf6fe1c 100644 --- a/db/routines/R__078_fn_pv_forecast_delta_profile.sql +++ b/db/routines/R__078_fn_pv_forecast_delta_profile.sql @@ -11,7 +11,7 @@ create or replace function ems.fn_pv_forecast_delta_profile( p_data_to timestamptz default now(), p_half_life_days numeric default 14, p_threshold_w int default 150, - p_top_n_days int default null, + p_top_n_days int default 2, p_non_top_day_factor numeric default 0.02, p_day_weight_gamma numeric default 1.0 ) @@ -227,4 +227,4 @@ as $fn$ $fn$; comment on function ems.fn_pv_forecast_delta_profile is - 'Aditivní delta profil chyby PV forecastu po 15min slotu dne (96 slotů). Zdroj: forecast_accuracy, vážení exp(-age/half_life_days) * day_weight (clear-ish dny) * volitelně top_n_days (jen N nejlepších kalendářních dní podle w_energy*w_smooth, ostatní ztlumené) * power(day_weight, day_weight_gamma). Vrací JSON {deltas:[{slot_of_day, delta_w, sample_count}], ...}. Cutoff dat od 2026-04-12 Europe/Prague.'; + 'Aditivní delta profil chyby PV forecastu po 15min slotu dne (96 slotů). Zdroj: forecast_accuracy, vážení exp(-age/half_life_days) * day_weight (clear-ish dny) * top_n_days (default 3 = jen 3 nejlepší kalendářní dny podle w_energy*w_smooth, ostatní ztlumené non_top_day_factor; explicitní NULL = tier vypnut, váží se všechny dny) * power(day_weight, day_weight_gamma). Vrací JSON {deltas:[{slot_of_day, delta_w, sample_count}], ...}. Cutoff dat od 2026-04-12 Europe/Prague.'; diff --git a/scripts/analysis/pv_delta_profile_diagnostics.sql b/scripts/analysis/pv_delta_profile_diagnostics.sql new file mode 100644 index 0000000..7866a61 --- /dev/null +++ b/scripts/analysis/pv_delta_profile_diagnostics.sql @@ -0,0 +1,222 @@ +-- Diagnostika: z kterých kalendářních dní (Europe/Prague) se skládá váha pro delta profil +-- (stejná logika jako ems.fn_pv_forecast_delta_profile: best → slots → day_stats → day_rank → váhy w). +-- +-- Uprav params (site_id, okno, half_life, threshold, top_n_days / non_top / gamma) a spusť v psql. +-- Jedna řádka = jeden kalendářní den v okně; p_top_n_days mění tier u vah (ne počet řádků). + +WITH params AS ( + SELECT + 2::int AS site_id, + (now() - interval '60 days')::timestamptz AS p_data_from, + now()::timestamptz AS p_data_to, + 14::numeric AS half_life_days, + 150::int AS threshold_w, + NULL::int AS p_top_n_days, + 0.02::numeric AS p_non_top_day_factor, + 1.0::numeric AS p_day_weight_gamma +), +tz AS ( + SELECT coalesce(nullif(trim(s.timezone), ''), 'Europe/Prague') AS tz_name + FROM ems.site s + JOIN params p ON s.id = p.site_id +), +cutoff AS ( + SELECT timestamptz '2026-04-11T22:00:00Z' AS min_ts +), +bounds AS ( + SELECT + greatest(p.p_data_from, p.p_data_to - interval '120 days', (SELECT min_ts FROM cutoff)) AS ts_from, + p.p_data_to AS ts_to, + greatest(p.half_life_days, 1) AS half_life_days, + greatest(p.threshold_w, 0) AS threshold_w + FROM params p +), +best AS ( + SELECT + fa.interval_start, + fa.pv_array_id, + fa.forecast_power_w, + fa.actual_power_w, + fa.forecast_created_at, + row_number() OVER ( + PARTITION BY fa.interval_start, fa.pv_array_id + ORDER BY fa.forecast_created_at DESC + ) AS rn + FROM ems.forecast_accuracy fa + CROSS JOIN bounds b + JOIN params p ON fa.site_id = p.site_id + WHERE fa.interval_start >= b.ts_from + AND fa.interval_start < b.ts_to + AND fa.actual_power_w IS NOT NULL + AND fa.forecast_created_at <= fa.interval_start +), +slots AS ( + SELECT + b.interval_start, + sum(b.forecast_power_w)::numeric AS forecast_total_w, + sum(b.actual_power_w)::numeric AS actual_total_w, + ( + (extract(hour FROM (b.interval_start AT TIME ZONE tz.tz_name))::int * 60) + + extract(minute FROM (b.interval_start AT TIME ZONE tz.tz_name))::int + ) / 15 AS slot_of_day, + (b.interval_start AT TIME ZONE tz.tz_name)::date AS day_local, + extract(epoch FROM (now() - b.interval_start)) / 86400.0 AS age_days + FROM best b + CROSS JOIN tz + WHERE b.rn = 1 + GROUP BY b.interval_start, slot_of_day, day_local, tz.tz_name +), +day_energy AS ( + SELECT s.day_local, sum(s.actual_total_w)::numeric / 4000.0 AS energy_kwh + FROM slots s + GROUP BY s.day_local +), +ref AS ( + SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY de.energy_kwh) AS med_kwh + FROM day_energy de +), +slot_steps AS ( + SELECT + s.*, + lag(s.actual_total_w) OVER (PARTITION BY s.day_local ORDER BY s.interval_start) AS prev_actual_w + FROM slots s + WHERE s.slot_of_day BETWEEN 20 AND 80 + AND s.actual_total_w > (SELECT threshold_w FROM bounds) +), +day_jump AS ( + SELECT + ss.day_local, + percentile_cont(0.5) WITHIN GROUP (ORDER BY abs(ss.actual_total_w - ss.prev_actual_w)) AS med_jump_w + FROM slot_steps ss + WHERE ss.prev_actual_w IS NOT NULL + GROUP BY ss.day_local +), +day_med AS ( + SELECT + s.day_local, + percentile_cont(0.5) WITHIN GROUP (ORDER BY s.actual_total_w) AS p50_actual_w + FROM slots s + WHERE s.actual_total_w > (SELECT threshold_w FROM bounds) + GROUP BY s.day_local +), +day_stats AS ( + SELECT + de.day_local, + de.energy_kwh, + dj.med_jump_w, + dm.p50_actual_w, + CASE + WHEN (SELECT med_kwh FROM ref) IS NULL OR (SELECT med_kwh FROM ref) <= 0 THEN 0.5 + ELSE greatest( + 0.0, + least( + 1.0, + (de.energy_kwh - (SELECT med_kwh FROM ref) * 0.55) + / nullif((SELECT med_kwh FROM ref) * 0.35, 0) + ) + ) + END AS w_energy, + CASE + WHEN dj.med_jump_w IS NULL OR dm.p50_actual_w IS NULL THEN 0.35 + ELSE greatest( + 0.0, + least( + 1.0, + 1.0 + - ( + dj.med_jump_w + / nullif(greatest(300.0, dm.p50_actual_w * 0.25), 0) + ) + ) + ) + END AS w_smooth + FROM day_energy de + LEFT JOIN day_jump dj ON dj.day_local = de.day_local + LEFT JOIN day_med dm ON dm.day_local = de.day_local +), +day_rank AS ( + SELECT + ds.day_local, + row_number() OVER ( + ORDER BY + (coalesce(ds.w_energy, 0.35) * coalesce(ds.w_smooth, 0.35)) DESC, + ds.day_local DESC + ) AS rn, + (coalesce(ds.w_energy, 0.35) * coalesce(ds.w_smooth, 0.35)) AS day_score + FROM day_stats ds +), +filtered AS ( + SELECT + s.day_local, + s.slot_of_day, + exp(-s.age_days / nullif((SELECT half_life_days FROM bounds), 0)) + * ( + CASE + WHEN (SELECT p_top_n_days FROM params) IS NULL THEN 1::numeric + WHEN (SELECT p_top_n_days FROM params) < 1 THEN 1::numeric + WHEN dr.rn <= (SELECT p_top_n_days FROM params) THEN 1::numeric + ELSE greatest( + 0::numeric, + least(1::numeric, coalesce((SELECT p_non_top_day_factor FROM params), 0.02)) + ) + END + ) + * ( + 0.05 + + 0.95 + * power( + greatest( + 0.0, + least(1.0, coalesce(ds.w_energy, 0.35) * coalesce(ds.w_smooth, 0.35)) + ), + greatest( + 0.25, + least(coalesce((SELECT p_day_weight_gamma FROM params), 1.0), 8.0) + ) + ) + ) AS w + FROM slots s + CROSS JOIN bounds b + LEFT JOIN day_stats ds ON ds.day_local = s.day_local + LEFT JOIN day_rank dr ON dr.day_local = s.day_local + WHERE s.slot_of_day BETWEEN 0 AND 95 + AND (s.actual_total_w > b.threshold_w OR s.forecast_total_w > b.threshold_w) +), +by_day AS ( + SELECT day_local, sum(w) AS w_sum, count(*)::bigint AS slot_rows + FROM filtered + GROUP BY day_local +), +tot AS ( + SELECT sum(w_sum) AS w_tot FROM by_day +) +SELECT + dr.rn AS day_rank, + ds.day_local, + round(ds.energy_kwh::numeric, 2) AS energy_kwh, + round(ds.w_energy::numeric, 3) AS w_energy, + round(ds.w_smooth::numeric, 3) AS w_smooth, + round(dr.day_score::numeric, 4) AS day_score, + round(coalesce(bd.w_sum, 0)::numeric, 2) AS sum_w_in_filtered, + coalesce(bd.slot_rows, 0::bigint) AS slot_rows, + round( + (100.0 * coalesce(bd.w_sum, 0) / nullif((SELECT w_tot FROM tot), 0))::numeric, + 2 + ) AS pct_of_total_weight +FROM day_stats ds +JOIN day_rank dr ON dr.day_local = ds.day_local +LEFT JOIN by_day bd ON bd.day_local = ds.day_local +ORDER BY dr.rn; + +-- Shrnutí okna + výstup funkce (stejné parametry jako v params výše): +-- SELECT (ems.fn_pv_forecast_delta_profile( +-- (SELECT site_id FROM params), +-- (SELECT p_data_from FROM params), +-- (SELECT p_data_to FROM params), +-- (SELECT half_life_days FROM params), +-- (SELECT threshold_w FROM params), +-- (SELECT p_top_n_days FROM params), +-- (SELECT p_non_top_day_factor FROM params), +-- (SELECT p_day_weight_gamma FROM params) +-- ))->'data_from' AS data_from, +-- (ems.fn_pv_forecast_delta_profile(...))->'data_to' AS data_to;