Skip to main content

xlog_logic/
wcoj_var_ordering.rs

1//! Variable-ordering cost model for WCOJ dispatch.
2//!
3//! The trait [`WcojVariableOrderingModel`] picks the kernel **leader
4//! slot** (slot 0) for triangle and 4-cycle WCOJ shapes. The leader
5//! is the input that drives the outer iteration; smaller leaders
6//! mean fewer iterations.
7//!
8//! The default implementation [`LeaderCardinalityModel`] picks the
9//! min-cardinality input as leader IFF the ratio
10//! `min_card / default_leader_card ≤
11//! config.effective_wcoj_var_ordering_threshold()`. Marginal
12//! cases (above threshold) return `None` so the promoter leaves
13//! `var_order = None` and default WCOJ dispatch behavior is
14//! preserved.
15//!
16//! The trait returns only the **leader index** in the promoter's
17//! canonical input order — the promoter is responsible for building
18//! the full [`xlog_ir::rir::VariableOrder`] using the canonical
19//! permutation tables (because computing `kernel_output_cols`
20//! requires the rule's head-variable order, which only the promoter
21//! has).
22//!
23//! # Canonical permutation tables
24//!
25//! Triangle (canonical inputs `[e_xy, e_yz, e_xz]`, vars `{X, Y, Z}`):
26//!
27//! | Leader | Slot 0 | Slot 1 | Slot 2 | Lookup col-swaps    | Kernel-direct output |
28//! |--------|--------|--------|--------|---------------------|----------------------|
29//! | e_xy 0 | e_xy   | e_yz   | e_xz   | none                | (X, Y, Z)            |
30//! | e_yz 1 | e_yz   | e_xz↔  | e_xy↔  | e_xz, e_xy          | (Y, Z, X)            |
31//! | e_xz 2 | e_xz   | e_yz↔  | e_xy   | e_yz                | (X, Z, Y)            |
32//!
33//! 4-cycle (canonical inputs `[e_wx, e_xy, e_yz, e_zw]`,
34//! vars `{W, X, Y, Z}`) — all rotation-only, no col-swaps:
35//!
36//! | Leader | Slot 0 | Slot 1 | Slot 2 | Slot 3 | Kernel-direct output |
37//! |--------|--------|--------|--------|--------|----------------------|
38//! | e_wx 0 | e_wx   | e_xy   | e_yz   | e_zw   | (W, X, Y, Z)         |
39//! | e_xy 1 | e_xy   | e_yz   | e_zw   | e_wx   | (X, Y, Z, W)         |
40//! | e_yz 2 | e_yz   | e_zw   | e_wx   | e_xy   | (Y, Z, W, X)         |
41//! | e_zw 3 | e_zw   | e_wx   | e_xy   | e_yz   | (Z, W, X, Y)         |
42
43use xlog_core::RelId;
44use xlog_ir::rir::{LookupPerm, ProjectExpr, VariableOrder};
45use xlog_stats::StatsManager;
46
47use crate::compiler_config::{CompilerConfig, WcojVarOrderingKind};
48
49/// Named K-clique WCOJ-vs-hash gate parameters.
50#[derive(Debug, Clone, Copy, PartialEq)]
51pub struct WcojCostGateParams {
52    /// WCOJ routes only when estimated WCOJ work is no greater than
53    /// this multiple of estimated hash-chain work. The equality
54    /// boundary keeps the gate one-sided and mirrors the planner's
55    /// lower-cost winner semantics.
56    pub wcoj_to_hash_cost_ratio_ceiling: f64,
57}
58
59/// Default K-clique cost-gate policy.
60pub const WCOJ_COST_GATE_PARAMS: WcojCostGateParams = WcojCostGateParams {
61    wcoj_to_hash_cost_ratio_ceiling: 1.0,
62};
63
64/// Returns true when the named K-clique cost-gate policy routes WCOJ.
65pub fn wcoj_cost_gate_predicts_wcoj(wcoj_cost: f64, hash_cost: f64) -> bool {
66    if !wcoj_cost.is_finite() || !hash_cost.is_finite() || hash_cost <= 0.0 {
67        return false;
68    }
69    wcoj_cost / hash_cost <= WCOJ_COST_GATE_PARAMS.wcoj_to_hash_cost_ratio_ceiling
70}
71
72/// Trait that picks a leader slot for triangle / 4-cycle WCOJ.
73///
74/// Implementations look at relation cardinalities (or other stats)
75/// and decide whether a non-default leader is worth selecting. They
76/// return `None` to mean "leave default leader" and preserve the
77/// no-reorder behavior.
78pub trait WcojVariableOrderingModel {
79    /// Pick a leader for the triangle WCOJ shape.
80    ///
81    /// `rel_ids` are in **promoter canonical order**:
82    /// `[e_xy, e_yz, e_xz]`. Returns leader index in `[0, 3)` or
83    /// `None` if the model declines to override the default.
84    fn pick_triangle_leader(
85        &self,
86        rel_ids: [RelId; 3],
87        stats: &StatsManager,
88        config: &CompilerConfig,
89    ) -> Option<u8>;
90
91    /// Pick a leader for the 4-cycle WCOJ shape.
92    ///
93    /// `rel_ids` are in **promoter canonical order**:
94    /// `[e_wx, e_xy, e_yz, e_zw]`. Returns leader index in
95    /// `[0, 4)` or `None` if the model declines to override the
96    /// default.
97    fn pick_4cycle_leader(
98        &self,
99        rel_ids: [RelId; 4],
100        stats: &StatsManager,
101        config: &CompilerConfig,
102    ) -> Option<u8>;
103}
104
105/// Default implementation: pick the min-cardinality input as leader,
106/// gated by the configurable `wcoj_var_ordering_threshold` ratio.
107#[derive(Debug, Clone, Copy, Default)]
108pub struct LeaderCardinalityModel;
109
110impl LeaderCardinalityModel {
111    /// Look up cardinality from `StatsManager`. Returns `None` for
112    /// unregistered or unseeded relations — the caller treats `None`
113    /// as "no useful stats, decline to reorder".
114    fn card_of(stats: &StatsManager, rel: RelId) -> Option<u64> {
115        let s = stats.get_relation_stats(rel)?;
116        // 0 cardinality is treated as "no info" rather than "empty"
117        // because the promoter runs at compile time when EDBs may
118        // not yet be loaded; the dispatcher's runtime layout helper
119        // handles truly empty inputs.
120        if s.cardinality == 0 {
121            None
122        } else {
123            Some(s.cardinality)
124        }
125    }
126
127    /// Returns the slot index whose cardinality is minimum, or
128    /// `None` if any input has missing/zero stats. This is the
129    /// safety floor — partial stats degrade to default-leader rather
130    /// than mis-picking.
131    fn argmin_card<const N: usize>(
132        rel_ids: &[RelId; N],
133        stats: &StatsManager,
134    ) -> Option<(u8, u64)> {
135        let mut best: Option<(u8, u64)> = None;
136        for (idx, rel) in rel_ids.iter().enumerate() {
137            let card = Self::card_of(stats, *rel)?;
138            best = match best {
139                None => Some((idx as u8, card)),
140                Some((_, b)) if card < b => Some((idx as u8, card)),
141                Some(prev) => Some(prev),
142            };
143        }
144        best
145    }
146
147    /// Apply the `Disabled`-config short-circuit and the threshold
148    /// gate. Returns `Some(leader_idx)` only when the model is
149    /// confident the non-default leader is worth the layout overhead.
150    fn pick_leader<const N: usize>(
151        &self,
152        rel_ids: [RelId; N],
153        stats: &StatsManager,
154        config: &CompilerConfig,
155    ) -> Option<u8> {
156        if config.wcoj_variable_ordering == WcojVarOrderingKind::Disabled {
157            return None;
158        }
159        let (min_idx, min_card) = Self::argmin_card(&rel_ids, stats)?;
160        // Default leader is index 0 (e_xy / e_wx). If the argmin IS
161        // index 0, no reordering would happen — return None to keep
162        // the IR `var_order = None` (bit-identical to the default
163        // no-reorder behavior).
164        if min_idx == 0 {
165            return None;
166        }
167        let default_card = Self::card_of(stats, rel_ids[0])?;
168        // Ratio gate: require min_card / default_card ≤ threshold.
169        // Use float math for clarity; cardinalities fit comfortably
170        // in f64 precision for the relation sizes the promoter sees.
171        let ratio = min_card as f64 / default_card as f64;
172        let threshold = config.effective_wcoj_var_ordering_threshold();
173        if ratio <= threshold {
174            Some(min_idx)
175        } else {
176            None
177        }
178    }
179}
180
181impl WcojVariableOrderingModel for LeaderCardinalityModel {
182    fn pick_triangle_leader(
183        &self,
184        rel_ids: [RelId; 3],
185        stats: &StatsManager,
186        config: &CompilerConfig,
187    ) -> Option<u8> {
188        self.pick_leader(rel_ids, stats, config)
189    }
190
191    fn pick_4cycle_leader(
192        &self,
193        rel_ids: [RelId; 4],
194        stats: &StatsManager,
195        config: &CompilerConfig,
196    ) -> Option<u8> {
197        self.pick_leader(rel_ids, stats, config)
198    }
199}
200
201// ---------------------------------------------------------------
202// HeatAwareLeaderModel
203// ---------------------------------------------------------------
204
205/// Heat-aware cost model: combines cardinality, access heat, and
206/// observed join selectivity into a fixed composite score formula.
207/// Higher score = more expensive to iterate over → demote from the
208/// leader (slot 0). The model picks `argmin(score)` as leader, gated
209/// by the same `effective_wcoj_var_ordering_threshold` as the
210/// leader-cardinality model.
211///
212/// **Composite score formula**:
213/// ```text
214/// score(rel) = cardinality(rel)
215///            * (1.0 + 4.0 * heat(rel))
216///            * Σ_{e ∈ edges(rel)} 1.0 / max(0.01, observed_sel_or_one(e))
217/// ```
218///
219/// * `cardinality` from `StatsManager::get_relation_stats(rel).cardinality`.
220/// * `heat` from `RelationStats.heat: f32` (EMA written by
221///   `record_access` at `node_dispatch.rs:26`).
222/// * `observed_sel_or_one(e)` from
223///   `StatsManager::get_join_selectivity(rel_a, rel_b)` (join-result
224///   feedback via `record_join_result`, EMA-smoothed).
225///   **Key-validation**: only consume a cached `JoinSelectivity`
226///   when its `(left_keys, right_keys)` match the candidate edge
227///   keys after `StatsManager::canonical_join_key` swap. On
228///   mismatch, default to `1.0` (no observed filter assumption).
229///
230/// **Heat weight 4.0**: with default threshold 0.5, gate fires when
231/// `min/default ≤ 0.5`. With cards equal + `heat = h` on the hot
232/// rel, ratio = `1 / (1 + 4h)`. For `1 + 4h ≥ 2.0` → `h ≥ 0.25`
233/// (~3 `record_access` calls).
234#[derive(Debug, Clone, Copy, Default)]
235pub struct HeatAwareLeaderModel;
236
237impl HeatAwareLeaderModel {
238    /// Heat weight used by the composite score formula.
239    pub const HEAT_WEIGHT: f32 = 4.0;
240
241    /// Default selectivity for an edge with no observed
242    /// `JoinSelectivity` record (or with mismatched keys).
243    pub const NO_OBSERVED_SEL: f64 = 1.0;
244
245    /// Floor used in `1/max(0.01, sel)` to avoid divide-by-zero
246    /// on tightly observed edges.
247    pub const SEL_FLOOR: f64 = 0.01;
248
249    /// Same `card_of` semantics as `LeaderCardinalityModel`:
250    /// returns None for unregistered or zero-card rels (safety
251    /// floor).
252    fn card_of(stats: &StatsManager, rel: RelId) -> Option<u64> {
253        let s = stats.get_relation_stats(rel)?;
254        if s.cardinality == 0 {
255            None
256        } else {
257            Some(s.cardinality)
258        }
259    }
260
261    /// Read heat from the cached `RelationStats`. Returns 0.0 for
262    /// unregistered rels — the formula treats unobserved heat as
263    /// cold.
264    fn heat_of(stats: &StatsManager, rel: RelId) -> f32 {
265        stats.get_relation_stats(rel).map(|s| s.heat).unwrap_or(0.0)
266    }
267
268    /// Look up the observed selectivity for an edge with explicit
269    /// key validation. The candidate keys describe how slot
270    /// `pair.0` joins with slot `pair.1` in the canonical kernel
271    /// topology; the cache key is canonicalized via
272    /// `StatsManager::canonical_join_key` (which may swap the rel
273    /// order). On a hit, the stored `left_keys` / `right_keys`
274    /// MUST match the candidate keys after the same swap;
275    /// otherwise return `NO_OBSERVED_SEL` (the model treats the
276    /// edge as having no useful filter info).
277    fn observed_sel_or_one(
278        stats: &StatsManager,
279        rel_a: RelId,
280        rel_b: RelId,
281        candidate_left_keys: &[usize],
282        candidate_right_keys: &[usize],
283    ) -> f64 {
284        let Some(js) = stats.get_join_selectivity(rel_a, rel_b) else {
285            return Self::NO_OBSERVED_SEL;
286        };
287        // `get_join_selectivity` canonicalizes internally so
288        // `js.left_rel <= js.right_rel`. The candidate keys must
289        // be swapped correspondingly when the stored canonical
290        // order differs from the candidate's `(rel_a, rel_b)`
291        // order.
292        let (expected_left_keys, expected_right_keys) = if js.left_rel == rel_a {
293            (candidate_left_keys, candidate_right_keys)
294        } else {
295            // Stored entry has rels swapped vs candidate; swap
296            // expected keys correspondingly.
297            (candidate_right_keys, candidate_left_keys)
298        };
299        if js.left_keys.as_slice() == expected_left_keys
300            && js.right_keys.as_slice() == expected_right_keys
301        {
302            js.selectivity
303        } else {
304            Self::NO_OBSERVED_SEL
305        }
306    }
307
308    /// Compute the composite score for a single rel.
309    ///
310    /// `incident_edges` is a slice of `(other_rel,
311    /// candidate_left_keys, candidate_right_keys)` for each edge
312    /// containing `rel`. The model derives this from the canonical
313    /// kernel topology (triangle: 3 edges, 4-cycle: 4 edges); the
314    /// key columns come from the canonical permutation tables.
315    fn composite_score(
316        rel: RelId,
317        card: u64,
318        stats: &StatsManager,
319        incident_edges: &[(RelId, &[usize], &[usize])],
320    ) -> f64 {
321        let heat = Self::heat_of(stats, rel);
322        let heat_factor = 1.0 + Self::HEAT_WEIGHT as f64 * heat as f64;
323        let sel_penalty: f64 = incident_edges
324            .iter()
325            .map(|(other, lk, rk)| {
326                let sel = Self::observed_sel_or_one(stats, rel, *other, lk, rk);
327                1.0 / sel.max(Self::SEL_FLOOR)
328            })
329            .sum();
330        card as f64 * heat_factor * sel_penalty
331    }
332
333    /// Generic argmin over composite scores with the same
334    /// short-circuits as `LeaderCardinalityModel::pick_leader`:
335    /// `Disabled` config → None; argmin == 0 (default leader
336    /// already optimal) → None; threshold-gate via
337    /// `min_score / default_score ≤ effective_threshold` else
338    /// None.
339    fn pick_leader_from_scores<const N: usize>(
340        rel_ids: [RelId; N],
341        scores: [f64; N],
342        config: &CompilerConfig,
343    ) -> Option<u8> {
344        if config.wcoj_variable_ordering == WcojVarOrderingKind::Disabled {
345            return None;
346        }
347        // Defensive: if any score is non-finite or zero, treat as
348        // missing data → safety floor returns None.
349        if scores.iter().any(|s| !s.is_finite() || *s <= 0.0) {
350            return None;
351        }
352        let _ = rel_ids; // future-proof param; unused today.
353        let (min_idx, &min_score) = scores
354            .iter()
355            .enumerate()
356            .min_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(std::cmp::Ordering::Equal))?;
357        if min_idx == 0 {
358            return None;
359        }
360        let default_score = scores[0];
361        let ratio = min_score / default_score;
362        let threshold = config.effective_wcoj_var_ordering_threshold();
363        if ratio <= threshold {
364            Some(min_idx as u8)
365        } else {
366            None
367        }
368    }
369}
370
371impl WcojVariableOrderingModel for HeatAwareLeaderModel {
372    fn pick_triangle_leader(
373        &self,
374        rel_ids: [RelId; 3],
375        stats: &StatsManager,
376        config: &CompilerConfig,
377    ) -> Option<u8> {
378        // Triangle canonical edges: {(0,1), (1,2), (0,2)} —
379        // each rel ∈ 2 edges. Edge keys follow the canonical
380        // permutation tables: every triangle edge joins on
381        // [1]/[0] in the **canonical** layout (no swaps in the
382        // canonical default-leader topology). The HeatAware
383        // model's selectivity lookup uses these canonical-layout
384        // keys; non-default-leader rotated keys are only relevant
385        // on the FEEDBACK side.
386        let cards: [u64; 3] = match (
387            Self::card_of(stats, rel_ids[0]),
388            Self::card_of(stats, rel_ids[1]),
389            Self::card_of(stats, rel_ids[2]),
390        ) {
391            (Some(a), Some(b), Some(c)) => [a, b, c],
392            _ => return None, // safety floor on missing card
393        };
394        // Edges: indexed by (slot_a, slot_b) per canonical topology.
395        // edge01 = rel_ids[0] ⋈ rel_ids[1] on rel_ids[0].col1 ↔ rel_ids[1].col0
396        // edge12 = rel_ids[1] ⋈ rel_ids[2] on rel_ids[1].col1 ↔ rel_ids[2].col0
397        // edge02 = rel_ids[0] ⋈ rel_ids[2] on rel_ids[0].col0 ↔ rel_ids[2].col0
398        //   (canonical triangle slot_vars[2] = [a, c] — keys [0]/[0]).
399        let scores: [f64; 3] = [
400            Self::composite_score(
401                rel_ids[0],
402                cards[0],
403                stats,
404                &[(rel_ids[1], &[1], &[0]), (rel_ids[2], &[0], &[0])],
405            ),
406            Self::composite_score(
407                rel_ids[1],
408                cards[1],
409                stats,
410                &[(rel_ids[0], &[0], &[1]), (rel_ids[2], &[1], &[1])],
411            ),
412            Self::composite_score(
413                rel_ids[2],
414                cards[2],
415                stats,
416                &[(rel_ids[0], &[0], &[0]), (rel_ids[1], &[1], &[1])],
417            ),
418        ];
419        Self::pick_leader_from_scores(rel_ids, scores, config)
420    }
421
422    fn pick_4cycle_leader(
423        &self,
424        rel_ids: [RelId; 4],
425        stats: &StatsManager,
426        config: &CompilerConfig,
427    ) -> Option<u8> {
428        // 4-cycle canonical edges: {(0,1), (1,2), (2,3), (3,0)} —
429        // each rel ∈ 2 edges. Rotation-only (no swaps); every
430        // edge joins on [1]/[0] in canonical layout.
431        let cards: [u64; 4] = match (
432            Self::card_of(stats, rel_ids[0]),
433            Self::card_of(stats, rel_ids[1]),
434            Self::card_of(stats, rel_ids[2]),
435            Self::card_of(stats, rel_ids[3]),
436        ) {
437            (Some(a), Some(b), Some(c), Some(d)) => [a, b, c, d],
438            _ => return None,
439        };
440        let scores: [f64; 4] = [
441            Self::composite_score(
442                rel_ids[0],
443                cards[0],
444                stats,
445                &[
446                    (rel_ids[1], &[1], &[0]), // edge (0,1)
447                    (rel_ids[3], &[0], &[1]), // edge (3,0) — reversed: rel0.col0 ↔ rel3.col1
448                ],
449            ),
450            Self::composite_score(
451                rel_ids[1],
452                cards[1],
453                stats,
454                &[(rel_ids[0], &[0], &[1]), (rel_ids[2], &[1], &[0])],
455            ),
456            Self::composite_score(
457                rel_ids[2],
458                cards[2],
459                stats,
460                &[(rel_ids[1], &[0], &[1]), (rel_ids[3], &[1], &[0])],
461            ),
462            Self::composite_score(
463                rel_ids[3],
464                cards[3],
465                stats,
466                &[(rel_ids[2], &[0], &[1]), (rel_ids[0], &[1], &[0])],
467            ),
468        ];
469        Self::pick_leader_from_scores(rel_ids, scores, config)
470    }
471}
472
473/// Promoter helper: triangle canonical permutation table.
474///
475/// Returns `lookup_perms` (the 2 non-leader slot entries) for the
476/// given `leader_idx ∈ [0, 3)`. The promoter combines this with its
477/// own knowledge of the rule head to produce
478/// `kernel_output_cols`.
479///
480/// **Canonical semantics**:
481/// * 0 (e_xy default) — slots `[e_yz, e_xz]`, no col-swaps.
482/// * 1 (e_yz)         — slots `[e_xz↔, e_xy↔]`, both col-swapped.
483/// * 2 (e_xz)         — slots `[e_yz↔, e_xy]`, e_yz col-swapped only.
484pub fn triangle_lookup_perms(leader_idx: u8) -> Vec<LookupPerm> {
485    match leader_idx {
486        0 => vec![
487            // slot 1 = e_yz (input_idx 1), slot 2 = e_xz (input_idx 2)
488            LookupPerm {
489                input_idx: 1,
490                swap_cols: false,
491            },
492            LookupPerm {
493                input_idx: 2,
494                swap_cols: false,
495            },
496        ],
497        1 => vec![
498            // slot 1 = e_xz swapped (input_idx 2), slot 2 = e_xy swapped (input_idx 0)
499            LookupPerm {
500                input_idx: 2,
501                swap_cols: true,
502            },
503            LookupPerm {
504                input_idx: 0,
505                swap_cols: true,
506            },
507        ],
508        2 => vec![
509            // slot 1 = e_yz swapped (input_idx 1), slot 2 = e_xy as-is (input_idx 0)
510            LookupPerm {
511                input_idx: 1,
512                swap_cols: true,
513            },
514            LookupPerm {
515                input_idx: 0,
516                swap_cols: false,
517            },
518        ],
519        _ => panic!("triangle leader_idx must be in [0, 3): got {leader_idx}"),
520    }
521}
522
523/// Promoter helper: triangle canonical head-projection table.
524///
525/// Maps the kernel-direct output (in leader's `(a, b, c)` order)
526/// back to the canonical head order `(X, Y, Z)`.
527///
528/// **Canonical semantics**:
529/// * 0 (e_xy default) — identity `[Column(0), Column(1), Column(2)]`.
530/// * 1 (e_yz)         — `[Column(2), Column(0), Column(1)]`
531///   because kernel-direct = `(Y, Z, X)`.
532/// * 2 (e_xz)         — `[Column(0), Column(2), Column(1)]`
533///   because kernel-direct = `(X, Z, Y)`.
534pub fn triangle_kernel_output_cols(leader_idx: u8) -> Vec<ProjectExpr> {
535    match leader_idx {
536        0 => vec![
537            ProjectExpr::Column(0),
538            ProjectExpr::Column(1),
539            ProjectExpr::Column(2),
540        ],
541        1 => vec![
542            ProjectExpr::Column(2),
543            ProjectExpr::Column(0),
544            ProjectExpr::Column(1),
545        ],
546        2 => vec![
547            ProjectExpr::Column(0),
548            ProjectExpr::Column(2),
549            ProjectExpr::Column(1),
550        ],
551        _ => panic!("triangle leader_idx must be in [0, 3): got {leader_idx}"),
552    }
553}
554
555/// Promoter helper: 4-cycle canonical head-projection table.
556///
557/// Maps the kernel-direct output (in leader's rotated cycle order)
558/// back to the canonical head order `(W, X, Y, Z)`. All 4 entries
559/// are pure permutations; no col-swap is involved.
560pub fn cycle4_kernel_output_cols(leader_idx: u8) -> Vec<ProjectExpr> {
561    match leader_idx {
562        0 => vec![
563            ProjectExpr::Column(0),
564            ProjectExpr::Column(1),
565            ProjectExpr::Column(2),
566            ProjectExpr::Column(3),
567        ],
568        1 => vec![
569            // kernel-direct = (X, Y, Z, W) → head (W, X, Y, Z)
570            ProjectExpr::Column(3),
571            ProjectExpr::Column(0),
572            ProjectExpr::Column(1),
573            ProjectExpr::Column(2),
574        ],
575        2 => vec![
576            // kernel-direct = (Y, Z, W, X) → head (W, X, Y, Z)
577            ProjectExpr::Column(2),
578            ProjectExpr::Column(3),
579            ProjectExpr::Column(0),
580            ProjectExpr::Column(1),
581        ],
582        3 => vec![
583            // kernel-direct = (Z, W, X, Y) → head (W, X, Y, Z)
584            ProjectExpr::Column(1),
585            ProjectExpr::Column(2),
586            ProjectExpr::Column(3),
587            ProjectExpr::Column(0),
588        ],
589        _ => panic!("4-cycle leader_idx must be in [0, 4): got {leader_idx}"),
590    }
591}
592
593/// Promoter helper: build a complete `VariableOrder` for a triangle
594/// from the cost model's `leader_idx` decision.
595pub fn build_triangle_var_order(leader_idx: u8) -> VariableOrder {
596    VariableOrder::legacy(
597        leader_idx,
598        triangle_lookup_perms(leader_idx),
599        triangle_kernel_output_cols(leader_idx),
600    )
601}
602
603/// Promoter helper: build a complete `VariableOrder` for a 4-cycle
604/// from the cost model's `leader_idx` decision.
605pub fn build_cycle4_var_order(leader_idx: u8) -> VariableOrder {
606    VariableOrder::legacy(
607        leader_idx,
608        cycle4_lookup_perms(leader_idx),
609        cycle4_kernel_output_cols(leader_idx),
610    )
611}
612
613/// Promoter helper: 4-cycle canonical permutation table.
614///
615/// All 4 leaders are rotation-only — no col-swaps. `lookup_perms`
616/// rotates the input list so the leader is at slot 0 and subsequent
617/// slots follow the cycle direction.
618pub fn cycle4_lookup_perms(leader_idx: u8) -> Vec<LookupPerm> {
619    if leader_idx >= 4 {
620        panic!("4-cycle leader_idx must be in [0, 4): got {leader_idx}");
621    }
622    // Rotation: slot k holds input (leader_idx + k) mod 4.
623    // The leader itself (k=0) is recorded in VariableOrder::leader_idx
624    // and not duplicated here, so this returns the 3 non-leader slots.
625    (1..4)
626        .map(|k| LookupPerm {
627            input_idx: ((leader_idx as usize + k) % 4) as u8,
628            swap_cols: false,
629        })
630        .collect()
631}
632
633#[cfg(test)]
634mod tests {
635    //! Unit tests for the cost-model trait, default implementation,
636    //! and canonical permutation tables. End-to-end leader-pick tests
637    //! live alongside the leader-pick integration coverage.
638    use super::*;
639    use xlog_core::RelId;
640
641    fn stats_with(cards: &[(RelId, u64)]) -> StatsManager {
642        let mut mgr = StatsManager::new();
643        for (rel, card) in cards {
644            mgr.register_relation(*rel);
645            if *card > 0 {
646                mgr.update_cardinality(*rel, *card);
647            }
648        }
649        mgr
650    }
651
652    #[test]
653    fn disabled_config_returns_none_even_with_smaller_input() {
654        let stats = stats_with(&[(RelId(1), 1000), (RelId(2), 10), (RelId(3), 1000)]);
655        let config = CompilerConfig::default(); // Disabled
656        let m = LeaderCardinalityModel;
657        assert_eq!(
658            m.pick_triangle_leader([RelId(1), RelId(2), RelId(3)], &stats, &config),
659            None,
660            "Disabled config must short-circuit before the threshold gate"
661        );
662    }
663
664    #[test]
665    fn missing_stats_returns_none_safety_floor() {
666        // RelId(2) has no cardinality registered (registration without
667        // update_cardinality leaves card=0, treated as missing).
668        let stats = stats_with(&[(RelId(1), 1000), (RelId(2), 0), (RelId(3), 1000)]);
669        let config = CompilerConfig {
670            wcoj_variable_ordering: WcojVarOrderingKind::LeaderCardinality,
671            ..CompilerConfig::default()
672        };
673        let m = LeaderCardinalityModel;
674        assert_eq!(
675            m.pick_triangle_leader([RelId(1), RelId(2), RelId(3)], &stats, &config),
676            None,
677            "missing-stats safety floor: any None card → None decision"
678        );
679    }
680
681    #[test]
682    fn picks_min_when_clearly_under_threshold() {
683        let stats = stats_with(&[(RelId(1), 1000), (RelId(2), 100), (RelId(3), 800)]);
684        let config = CompilerConfig {
685            wcoj_variable_ordering: WcojVarOrderingKind::LeaderCardinality,
686            ..CompilerConfig::default()
687        };
688        let m = LeaderCardinalityModel;
689        // ratio = 100/1000 = 0.1 ≤ 0.5 → leader = 1
690        assert_eq!(
691            m.pick_triangle_leader([RelId(1), RelId(2), RelId(3)], &stats, &config),
692            Some(1)
693        );
694    }
695
696    #[test]
697    fn declines_when_min_is_default_leader() {
698        // RelId(1) is the default leader and is already the smallest.
699        let stats = stats_with(&[(RelId(1), 100), (RelId(2), 1000), (RelId(3), 1000)]);
700        let config = CompilerConfig {
701            wcoj_variable_ordering: WcojVarOrderingKind::LeaderCardinality,
702            ..CompilerConfig::default()
703        };
704        let m = LeaderCardinalityModel;
705        assert_eq!(
706            m.pick_triangle_leader([RelId(1), RelId(2), RelId(3)], &stats, &config),
707            None,
708            "min == default leader → no reorder needed → None"
709        );
710    }
711
712    #[test]
713    fn declines_marginal_above_threshold() {
714        // ratio = 700/1000 = 0.7 > 0.5 → no reorder
715        let stats = stats_with(&[(RelId(1), 1000), (RelId(2), 700), (RelId(3), 900)]);
716        let config = CompilerConfig {
717            wcoj_variable_ordering: WcojVarOrderingKind::LeaderCardinality,
718            ..CompilerConfig::default()
719        };
720        let m = LeaderCardinalityModel;
721        assert_eq!(
722            m.pick_triangle_leader([RelId(1), RelId(2), RelId(3)], &stats, &config),
723            None,
724        );
725    }
726
727    #[test]
728    fn picks_4cycle_min_under_threshold() {
729        let stats = stats_with(&[
730            (RelId(1), 1000),
731            (RelId(2), 1000),
732            (RelId(3), 100),
733            (RelId(4), 1000),
734        ]);
735        let config = CompilerConfig {
736            wcoj_variable_ordering: WcojVarOrderingKind::LeaderCardinality,
737            ..CompilerConfig::default()
738        };
739        let m = LeaderCardinalityModel;
740        assert_eq!(
741            m.pick_4cycle_leader([RelId(1), RelId(2), RelId(3), RelId(4)], &stats, &config),
742            Some(2)
743        );
744    }
745
746    #[test]
747    fn triangle_lookup_perms_default_leader_is_identity() {
748        let p = triangle_lookup_perms(0);
749        assert_eq!(p.len(), 2);
750        assert_eq!(p[0].input_idx, 1);
751        assert!(!p[0].swap_cols);
752        assert_eq!(p[1].input_idx, 2);
753        assert!(!p[1].swap_cols);
754    }
755
756    #[test]
757    fn triangle_lookup_perms_leader_yz_double_swap() {
758        let p = triangle_lookup_perms(1);
759        // slot 1 = e_xz swapped, slot 2 = e_xy swapped
760        assert_eq!(p[0].input_idx, 2);
761        assert!(p[0].swap_cols);
762        assert_eq!(p[1].input_idx, 0);
763        assert!(p[1].swap_cols);
764    }
765
766    #[test]
767    fn triangle_lookup_perms_leader_xz_partial_swap() {
768        let p = triangle_lookup_perms(2);
769        // slot 1 = e_yz swapped, slot 2 = e_xy as-is
770        assert_eq!(p[0].input_idx, 1);
771        assert!(p[0].swap_cols);
772        assert_eq!(p[1].input_idx, 0);
773        assert!(!p[1].swap_cols);
774    }
775
776    #[test]
777    fn cycle4_lookup_perms_default_is_identity_rotation() {
778        let p = cycle4_lookup_perms(0);
779        // slots 1, 2, 3 = inputs 1, 2, 3
780        assert_eq!(
781            p.iter().map(|lp| lp.input_idx).collect::<Vec<_>>(),
782            vec![1, 2, 3]
783        );
784        assert!(p.iter().all(|lp| !lp.swap_cols));
785    }
786
787    #[test]
788    fn cycle4_lookup_perms_leader_xy_rotates_to_yz_zw_wx() {
789        let p = cycle4_lookup_perms(1);
790        // leader = e_xy. slots 1, 2, 3 = e_yz (2), e_zw (3), e_wx (0).
791        assert_eq!(
792            p.iter().map(|lp| lp.input_idx).collect::<Vec<_>>(),
793            vec![2, 3, 0]
794        );
795        assert!(p.iter().all(|lp| !lp.swap_cols));
796    }
797
798    #[test]
799    fn cycle4_lookup_perms_leader_zw_rotates_to_wx_xy_yz() {
800        let p = cycle4_lookup_perms(3);
801        assert_eq!(
802            p.iter().map(|lp| lp.input_idx).collect::<Vec<_>>(),
803            vec![0, 1, 2]
804        );
805        assert!(p.iter().all(|lp| !lp.swap_cols));
806    }
807
808    // ===============================================================
809    // HeatAwareLeaderModel composite-score tests
810    // ===============================================================
811
812    /// Build stats with cards + per-rel heat values (heat injected
813    /// directly via `get_relation_stats_mut`; sidesteps EMA so unit
814    /// tests can pin exact values).
815    fn stats_with_heat(seeded: &[(RelId, u64, f32)]) -> StatsManager {
816        let mut mgr = StatsManager::new();
817        for (rel, card, heat) in seeded {
818            mgr.register_relation(*rel);
819            if *card > 0 {
820                mgr.update_cardinality(*rel, *card);
821            }
822            if let Some(s) = mgr.get_relation_stats_mut(*rel) {
823                s.heat = *heat;
824            }
825        }
826        mgr
827    }
828
829    fn heat_aware_config() -> CompilerConfig {
830        CompilerConfig {
831            wcoj_variable_ordering: WcojVarOrderingKind::HeatAware,
832            ..CompilerConfig::default()
833        }
834    }
835
836    #[test]
837    fn heat_aware_leader_picks_cold_when_hot_relation_at_default_idx() {
838        // Triangle. Cards equal at 100. Hot rel at idx 0 (default
839        // leader): heat = 0.5. Other rels heat = 0.
840        // Expected scores (per HEAT_WEIGHT = 4 formula):
841        //   idx0: 100 * (1+4*0.5) * 2 = 100 * 3 * 2 = 600
842        //   idx1: 100 * 1 * 2 = 200
843        //   idx2: 100 * 1 * 2 = 200
844        // argmin = idx 1 (first hit). default = 600. ratio
845        // 200/600 = 0.333 ≤ 0.5 → returns Some(1).
846        let stats = stats_with_heat(&[
847            (RelId(1), 100, 0.5),
848            (RelId(2), 100, 0.0),
849            (RelId(3), 100, 0.0),
850        ]);
851        let config = heat_aware_config();
852        let m = HeatAwareLeaderModel;
853        assert_eq!(
854            m.pick_triangle_leader([RelId(1), RelId(2), RelId(3)], &stats, &config),
855            Some(1)
856        );
857    }
858
859    #[test]
860    fn heat_aware_leader_demotes_relation_in_tight_edge() {
861        // Triangle. Cards 100 each. Heat 0 each. ONE
862        // JoinSelectivity record on edge (rel(1), rel(2))
863        // (canonical idx 0/1) with selectivity = 0.01.
864        // Expected penalties:
865        //   rel0 ∈ {(0,1) tight, (0,2) default}: 100 + 1 = 101.
866        //   rel1 ∈ {(0,1) tight, (1,2) default}: 101.
867        //   rel2 ∈ {(1,2) default, (0,2) default}: 1 + 1 = 2.
868        // Scores (heat factor 1):
869        //   idx0: 100 * 101 = 10100
870        //   idx1: 10100
871        //   idx2: 100 * 2 = 200
872        // argmin = idx 2. default = 10100. ratio 200/10100
873        // ≈ 0.0198 ≤ 0.5 → returns Some(2).
874        let mut stats = stats_with_heat(&[
875            (RelId(1), 100, 0.0),
876            (RelId(2), 100, 0.0),
877            (RelId(3), 100, 0.0),
878        ]);
879        // Tight edge on (rel0=RelId(1), rel1=RelId(2)) with keys
880        // [1]/[0] (canonical triangle slot_vars[0]=[a,b],
881        // slot_vars[1]=[b,c] → join key b at slot0.col1 / slot1.col0).
882        stats.set_join_selectivity(RelId(1), RelId(2), vec![1], vec![0], 0.01);
883        let config = heat_aware_config();
884        let m = HeatAwareLeaderModel;
885        assert_eq!(
886            m.pick_triangle_leader([RelId(1), RelId(2), RelId(3)], &stats, &config),
887            Some(2)
888        );
889    }
890
891    #[test]
892    fn heat_aware_leader_returns_none_when_heat_too_low() {
893        // Cards equal. Heat at idx 0 = 0.1 (one record_access from
894        // cold), others 0. Factor at idx 0 = 1.4, others 1.0.
895        // Score idx0 = 100*1.4*2 = 280. idx1/idx2 = 200.
896        // ratio 200/280 ≈ 0.714 > 0.5 → returns None.
897        let stats = stats_with_heat(&[
898            (RelId(1), 100, 0.1),
899            (RelId(2), 100, 0.0),
900            (RelId(3), 100, 0.0),
901        ]);
902        let config = heat_aware_config();
903        let m = HeatAwareLeaderModel;
904        assert_eq!(
905            m.pick_triangle_leader([RelId(1), RelId(2), RelId(3)], &stats, &config),
906            None
907        );
908    }
909
910    #[test]
911    fn heat_aware_leader_disabled_short_circuit() {
912        // Strong heat differential, but config = Disabled → None.
913        let stats = stats_with_heat(&[
914            (RelId(1), 100, 0.9),
915            (RelId(2), 100, 0.0),
916            (RelId(3), 100, 0.0),
917        ]);
918        let config = CompilerConfig::default(); // Disabled
919        let m = HeatAwareLeaderModel;
920        assert_eq!(
921            m.pick_triangle_leader([RelId(1), RelId(2), RelId(3)], &stats, &config),
922            None
923        );
924    }
925
926    #[test]
927    fn heat_aware_leader_missing_card_safety_floor() {
928        // RelId(2) has no card registered (zero). Cost model
929        // returns None per missing-card safety floor (matches
930        // LeaderCardinalityModel's behavior).
931        let stats = stats_with_heat(&[
932            (RelId(1), 100, 0.5),
933            (RelId(2), 0, 0.0), // missing card
934            (RelId(3), 100, 0.0),
935        ]);
936        let config = heat_aware_config();
937        let m = HeatAwareLeaderModel;
938        assert_eq!(
939            m.pick_triangle_leader([RelId(1), RelId(2), RelId(3)], &stats, &config),
940            None
941        );
942    }
943}