xlog_logic/wcoj_var_ordering.rs
1//! Variable-ordering cost model for WCOJ dispatch.
2//!
3//! The trait [`WcojVariableOrderingModel`] picks the kernel **leader
4//! slot** (slot 0) for triangle and 4-cycle WCOJ shapes. The leader
5//! is the input that drives the outer iteration; smaller leaders
6//! mean fewer iterations.
7//!
8//! The default implementation [`LeaderCardinalityModel`] picks the
9//! min-cardinality input as leader IFF the ratio
10//! `min_card / default_leader_card ≤
11//! config.effective_wcoj_var_ordering_threshold()`. Marginal
12//! cases (above threshold) return `None` so the promoter leaves
13//! `var_order = None` and default WCOJ dispatch behavior is
14//! preserved.
15//!
16//! The trait returns only the **leader index** in the promoter's
17//! canonical input order — the promoter is responsible for building
18//! the full [`xlog_ir::rir::VariableOrder`] using the canonical
19//! permutation tables (because computing `kernel_output_cols`
20//! requires the rule's head-variable order, which only the promoter
21//! has).
22//!
23//! # Canonical permutation tables
24//!
25//! Triangle (canonical inputs `[e_xy, e_yz, e_xz]`, vars `{X, Y, Z}`):
26//!
27//! | Leader | Slot 0 | Slot 1 | Slot 2 | Lookup col-swaps | Kernel-direct output |
28//! |--------|--------|--------|--------|---------------------|----------------------|
29//! | e_xy 0 | e_xy | e_yz | e_xz | none | (X, Y, Z) |
30//! | e_yz 1 | e_yz | e_xz↔ | e_xy↔ | e_xz, e_xy | (Y, Z, X) |
31//! | e_xz 2 | e_xz | e_yz↔ | e_xy | e_yz | (X, Z, Y) |
32//!
33//! 4-cycle (canonical inputs `[e_wx, e_xy, e_yz, e_zw]`,
34//! vars `{W, X, Y, Z}`) — all rotation-only, no col-swaps:
35//!
36//! | Leader | Slot 0 | Slot 1 | Slot 2 | Slot 3 | Kernel-direct output |
37//! |--------|--------|--------|--------|--------|----------------------|
38//! | e_wx 0 | e_wx | e_xy | e_yz | e_zw | (W, X, Y, Z) |
39//! | e_xy 1 | e_xy | e_yz | e_zw | e_wx | (X, Y, Z, W) |
40//! | e_yz 2 | e_yz | e_zw | e_wx | e_xy | (Y, Z, W, X) |
41//! | e_zw 3 | e_zw | e_wx | e_xy | e_yz | (Z, W, X, Y) |
42
43use xlog_core::RelId;
44use xlog_ir::rir::{LookupPerm, ProjectExpr, VariableOrder};
45use xlog_stats::StatsManager;
46
47use crate::compiler_config::{CompilerConfig, WcojVarOrderingKind};
48
49/// Named K-clique WCOJ-vs-hash gate parameters.
50#[derive(Debug, Clone, Copy, PartialEq)]
51pub struct WcojCostGateParams {
52 /// WCOJ routes only when estimated WCOJ work is no greater than
53 /// this multiple of estimated hash-chain work. The equality
54 /// boundary keeps the gate one-sided and mirrors the planner's
55 /// lower-cost winner semantics.
56 pub wcoj_to_hash_cost_ratio_ceiling: f64,
57}
58
59/// Default K-clique cost-gate policy.
60pub const WCOJ_COST_GATE_PARAMS: WcojCostGateParams = WcojCostGateParams {
61 wcoj_to_hash_cost_ratio_ceiling: 1.0,
62};
63
64/// Returns true when the named K-clique cost-gate policy routes WCOJ.
65pub fn wcoj_cost_gate_predicts_wcoj(wcoj_cost: f64, hash_cost: f64) -> bool {
66 if !wcoj_cost.is_finite() || !hash_cost.is_finite() || hash_cost <= 0.0 {
67 return false;
68 }
69 wcoj_cost / hash_cost <= WCOJ_COST_GATE_PARAMS.wcoj_to_hash_cost_ratio_ceiling
70}
71
72/// Trait that picks a leader slot for triangle / 4-cycle WCOJ.
73///
74/// Implementations look at relation cardinalities (or other stats)
75/// and decide whether a non-default leader is worth selecting. They
76/// return `None` to mean "leave default leader" and preserve the
77/// no-reorder behavior.
78pub trait WcojVariableOrderingModel {
79 /// Pick a leader for the triangle WCOJ shape.
80 ///
81 /// `rel_ids` are in **promoter canonical order**:
82 /// `[e_xy, e_yz, e_xz]`. Returns leader index in `[0, 3)` or
83 /// `None` if the model declines to override the default.
84 fn pick_triangle_leader(
85 &self,
86 rel_ids: [RelId; 3],
87 stats: &StatsManager,
88 config: &CompilerConfig,
89 ) -> Option<u8>;
90
91 /// Pick a leader for the 4-cycle WCOJ shape.
92 ///
93 /// `rel_ids` are in **promoter canonical order**:
94 /// `[e_wx, e_xy, e_yz, e_zw]`. Returns leader index in
95 /// `[0, 4)` or `None` if the model declines to override the
96 /// default.
97 fn pick_4cycle_leader(
98 &self,
99 rel_ids: [RelId; 4],
100 stats: &StatsManager,
101 config: &CompilerConfig,
102 ) -> Option<u8>;
103}
104
105/// Default implementation: pick the min-cardinality input as leader,
106/// gated by the configurable `wcoj_var_ordering_threshold` ratio.
107#[derive(Debug, Clone, Copy, Default)]
108pub struct LeaderCardinalityModel;
109
110impl LeaderCardinalityModel {
111 /// Look up cardinality from `StatsManager`. Returns `None` for
112 /// unregistered or unseeded relations — the caller treats `None`
113 /// as "no useful stats, decline to reorder".
114 fn card_of(stats: &StatsManager, rel: RelId) -> Option<u64> {
115 let s = stats.get_relation_stats(rel)?;
116 // 0 cardinality is treated as "no info" rather than "empty"
117 // because the promoter runs at compile time when EDBs may
118 // not yet be loaded; the dispatcher's runtime layout helper
119 // handles truly empty inputs.
120 if s.cardinality == 0 {
121 None
122 } else {
123 Some(s.cardinality)
124 }
125 }
126
127 /// Returns the slot index whose cardinality is minimum, or
128 /// `None` if any input has missing/zero stats. This is the
129 /// safety floor — partial stats degrade to default-leader rather
130 /// than mis-picking.
131 fn argmin_card<const N: usize>(
132 rel_ids: &[RelId; N],
133 stats: &StatsManager,
134 ) -> Option<(u8, u64)> {
135 let mut best: Option<(u8, u64)> = None;
136 for (idx, rel) in rel_ids.iter().enumerate() {
137 let card = Self::card_of(stats, *rel)?;
138 best = match best {
139 None => Some((idx as u8, card)),
140 Some((_, b)) if card < b => Some((idx as u8, card)),
141 Some(prev) => Some(prev),
142 };
143 }
144 best
145 }
146
147 /// Apply the `Disabled`-config short-circuit and the threshold
148 /// gate. Returns `Some(leader_idx)` only when the model is
149 /// confident the non-default leader is worth the layout overhead.
150 fn pick_leader<const N: usize>(
151 &self,
152 rel_ids: [RelId; N],
153 stats: &StatsManager,
154 config: &CompilerConfig,
155 ) -> Option<u8> {
156 if config.wcoj_variable_ordering == WcojVarOrderingKind::Disabled {
157 return None;
158 }
159 let (min_idx, min_card) = Self::argmin_card(&rel_ids, stats)?;
160 // Default leader is index 0 (e_xy / e_wx). If the argmin IS
161 // index 0, no reordering would happen — return None to keep
162 // the IR `var_order = None` (bit-identical to the default
163 // no-reorder behavior).
164 if min_idx == 0 {
165 return None;
166 }
167 let default_card = Self::card_of(stats, rel_ids[0])?;
168 // Ratio gate: require min_card / default_card ≤ threshold.
169 // Use float math for clarity; cardinalities fit comfortably
170 // in f64 precision for the relation sizes the promoter sees.
171 let ratio = min_card as f64 / default_card as f64;
172 let threshold = config.effective_wcoj_var_ordering_threshold();
173 if ratio <= threshold {
174 Some(min_idx)
175 } else {
176 None
177 }
178 }
179}
180
181impl WcojVariableOrderingModel for LeaderCardinalityModel {
182 fn pick_triangle_leader(
183 &self,
184 rel_ids: [RelId; 3],
185 stats: &StatsManager,
186 config: &CompilerConfig,
187 ) -> Option<u8> {
188 self.pick_leader(rel_ids, stats, config)
189 }
190
191 fn pick_4cycle_leader(
192 &self,
193 rel_ids: [RelId; 4],
194 stats: &StatsManager,
195 config: &CompilerConfig,
196 ) -> Option<u8> {
197 self.pick_leader(rel_ids, stats, config)
198 }
199}
200
201// ---------------------------------------------------------------
202// HeatAwareLeaderModel
203// ---------------------------------------------------------------
204
205/// Heat-aware cost model: combines cardinality, access heat, and
206/// observed join selectivity into a fixed composite score formula.
207/// Higher score = more expensive to iterate over → demote from the
208/// leader (slot 0). The model picks `argmin(score)` as leader, gated
209/// by the same `effective_wcoj_var_ordering_threshold` as the
210/// leader-cardinality model.
211///
212/// **Composite score formula**:
213/// ```text
214/// score(rel) = cardinality(rel)
215/// * (1.0 + 4.0 * heat(rel))
216/// * Σ_{e ∈ edges(rel)} 1.0 / max(0.01, observed_sel_or_one(e))
217/// ```
218///
219/// * `cardinality` from `StatsManager::get_relation_stats(rel).cardinality`.
220/// * `heat` from `RelationStats.heat: f32` (EMA written by
221/// `record_access` at `node_dispatch.rs:26`).
222/// * `observed_sel_or_one(e)` from
223/// `StatsManager::get_join_selectivity(rel_a, rel_b)` (join-result
224/// feedback via `record_join_result`, EMA-smoothed).
225/// **Key-validation**: only consume a cached `JoinSelectivity`
226/// when its `(left_keys, right_keys)` match the candidate edge
227/// keys after `StatsManager::canonical_join_key` swap. On
228/// mismatch, default to `1.0` (no observed filter assumption).
229///
230/// **Heat weight 4.0**: with default threshold 0.5, gate fires when
231/// `min/default ≤ 0.5`. With cards equal + `heat = h` on the hot
232/// rel, ratio = `1 / (1 + 4h)`. For `1 + 4h ≥ 2.0` → `h ≥ 0.25`
233/// (~3 `record_access` calls).
234#[derive(Debug, Clone, Copy, Default)]
235pub struct HeatAwareLeaderModel;
236
237impl HeatAwareLeaderModel {
238 /// Heat weight used by the composite score formula.
239 pub const HEAT_WEIGHT: f32 = 4.0;
240
241 /// Default selectivity for an edge with no observed
242 /// `JoinSelectivity` record (or with mismatched keys).
243 pub const NO_OBSERVED_SEL: f64 = 1.0;
244
245 /// Floor used in `1/max(0.01, sel)` to avoid divide-by-zero
246 /// on tightly observed edges.
247 pub const SEL_FLOOR: f64 = 0.01;
248
249 /// Same `card_of` semantics as `LeaderCardinalityModel`:
250 /// returns None for unregistered or zero-card rels (safety
251 /// floor).
252 fn card_of(stats: &StatsManager, rel: RelId) -> Option<u64> {
253 let s = stats.get_relation_stats(rel)?;
254 if s.cardinality == 0 {
255 None
256 } else {
257 Some(s.cardinality)
258 }
259 }
260
261 /// Read heat from the cached `RelationStats`. Returns 0.0 for
262 /// unregistered rels — the formula treats unobserved heat as
263 /// cold.
264 fn heat_of(stats: &StatsManager, rel: RelId) -> f32 {
265 stats.get_relation_stats(rel).map(|s| s.heat).unwrap_or(0.0)
266 }
267
268 /// Look up the observed selectivity for an edge with explicit
269 /// key validation. The candidate keys describe how slot
270 /// `pair.0` joins with slot `pair.1` in the canonical kernel
271 /// topology; the cache key is canonicalized via
272 /// `StatsManager::canonical_join_key` (which may swap the rel
273 /// order). On a hit, the stored `left_keys` / `right_keys`
274 /// MUST match the candidate keys after the same swap;
275 /// otherwise return `NO_OBSERVED_SEL` (the model treats the
276 /// edge as having no useful filter info).
277 fn observed_sel_or_one(
278 stats: &StatsManager,
279 rel_a: RelId,
280 rel_b: RelId,
281 candidate_left_keys: &[usize],
282 candidate_right_keys: &[usize],
283 ) -> f64 {
284 let Some(js) = stats.get_join_selectivity(rel_a, rel_b) else {
285 return Self::NO_OBSERVED_SEL;
286 };
287 // `get_join_selectivity` canonicalizes internally so
288 // `js.left_rel <= js.right_rel`. The candidate keys must
289 // be swapped correspondingly when the stored canonical
290 // order differs from the candidate's `(rel_a, rel_b)`
291 // order.
292 let (expected_left_keys, expected_right_keys) = if js.left_rel == rel_a {
293 (candidate_left_keys, candidate_right_keys)
294 } else {
295 // Stored entry has rels swapped vs candidate; swap
296 // expected keys correspondingly.
297 (candidate_right_keys, candidate_left_keys)
298 };
299 if js.left_keys.as_slice() == expected_left_keys
300 && js.right_keys.as_slice() == expected_right_keys
301 {
302 js.selectivity
303 } else {
304 Self::NO_OBSERVED_SEL
305 }
306 }
307
308 /// Compute the composite score for a single rel.
309 ///
310 /// `incident_edges` is a slice of `(other_rel,
311 /// candidate_left_keys, candidate_right_keys)` for each edge
312 /// containing `rel`. The model derives this from the canonical
313 /// kernel topology (triangle: 3 edges, 4-cycle: 4 edges); the
314 /// key columns come from the canonical permutation tables.
315 fn composite_score(
316 rel: RelId,
317 card: u64,
318 stats: &StatsManager,
319 incident_edges: &[(RelId, &[usize], &[usize])],
320 ) -> f64 {
321 let heat = Self::heat_of(stats, rel);
322 let heat_factor = 1.0 + Self::HEAT_WEIGHT as f64 * heat as f64;
323 let sel_penalty: f64 = incident_edges
324 .iter()
325 .map(|(other, lk, rk)| {
326 let sel = Self::observed_sel_or_one(stats, rel, *other, lk, rk);
327 1.0 / sel.max(Self::SEL_FLOOR)
328 })
329 .sum();
330 card as f64 * heat_factor * sel_penalty
331 }
332
333 /// Generic argmin over composite scores with the same
334 /// short-circuits as `LeaderCardinalityModel::pick_leader`:
335 /// `Disabled` config → None; argmin == 0 (default leader
336 /// already optimal) → None; threshold-gate via
337 /// `min_score / default_score ≤ effective_threshold` else
338 /// None.
339 fn pick_leader_from_scores<const N: usize>(
340 rel_ids: [RelId; N],
341 scores: [f64; N],
342 config: &CompilerConfig,
343 ) -> Option<u8> {
344 if config.wcoj_variable_ordering == WcojVarOrderingKind::Disabled {
345 return None;
346 }
347 // Defensive: if any score is non-finite or zero, treat as
348 // missing data → safety floor returns None.
349 if scores.iter().any(|s| !s.is_finite() || *s <= 0.0) {
350 return None;
351 }
352 let _ = rel_ids; // future-proof param; unused today.
353 let (min_idx, &min_score) = scores
354 .iter()
355 .enumerate()
356 .min_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(std::cmp::Ordering::Equal))?;
357 if min_idx == 0 {
358 return None;
359 }
360 let default_score = scores[0];
361 let ratio = min_score / default_score;
362 let threshold = config.effective_wcoj_var_ordering_threshold();
363 if ratio <= threshold {
364 Some(min_idx as u8)
365 } else {
366 None
367 }
368 }
369}
370
371impl WcojVariableOrderingModel for HeatAwareLeaderModel {
372 fn pick_triangle_leader(
373 &self,
374 rel_ids: [RelId; 3],
375 stats: &StatsManager,
376 config: &CompilerConfig,
377 ) -> Option<u8> {
378 // Triangle canonical edges: {(0,1), (1,2), (0,2)} —
379 // each rel ∈ 2 edges. Edge keys follow the canonical
380 // permutation tables: every triangle edge joins on
381 // [1]/[0] in the **canonical** layout (no swaps in the
382 // canonical default-leader topology). The HeatAware
383 // model's selectivity lookup uses these canonical-layout
384 // keys; non-default-leader rotated keys are only relevant
385 // on the FEEDBACK side.
386 let cards: [u64; 3] = match (
387 Self::card_of(stats, rel_ids[0]),
388 Self::card_of(stats, rel_ids[1]),
389 Self::card_of(stats, rel_ids[2]),
390 ) {
391 (Some(a), Some(b), Some(c)) => [a, b, c],
392 _ => return None, // safety floor on missing card
393 };
394 // Edges: indexed by (slot_a, slot_b) per canonical topology.
395 // edge01 = rel_ids[0] ⋈ rel_ids[1] on rel_ids[0].col1 ↔ rel_ids[1].col0
396 // edge12 = rel_ids[1] ⋈ rel_ids[2] on rel_ids[1].col1 ↔ rel_ids[2].col0
397 // edge02 = rel_ids[0] ⋈ rel_ids[2] on rel_ids[0].col0 ↔ rel_ids[2].col0
398 // (canonical triangle slot_vars[2] = [a, c] — keys [0]/[0]).
399 let scores: [f64; 3] = [
400 Self::composite_score(
401 rel_ids[0],
402 cards[0],
403 stats,
404 &[(rel_ids[1], &[1], &[0]), (rel_ids[2], &[0], &[0])],
405 ),
406 Self::composite_score(
407 rel_ids[1],
408 cards[1],
409 stats,
410 &[(rel_ids[0], &[0], &[1]), (rel_ids[2], &[1], &[1])],
411 ),
412 Self::composite_score(
413 rel_ids[2],
414 cards[2],
415 stats,
416 &[(rel_ids[0], &[0], &[0]), (rel_ids[1], &[1], &[1])],
417 ),
418 ];
419 Self::pick_leader_from_scores(rel_ids, scores, config)
420 }
421
422 fn pick_4cycle_leader(
423 &self,
424 rel_ids: [RelId; 4],
425 stats: &StatsManager,
426 config: &CompilerConfig,
427 ) -> Option<u8> {
428 // 4-cycle canonical edges: {(0,1), (1,2), (2,3), (3,0)} —
429 // each rel ∈ 2 edges. Rotation-only (no swaps); every
430 // edge joins on [1]/[0] in canonical layout.
431 let cards: [u64; 4] = match (
432 Self::card_of(stats, rel_ids[0]),
433 Self::card_of(stats, rel_ids[1]),
434 Self::card_of(stats, rel_ids[2]),
435 Self::card_of(stats, rel_ids[3]),
436 ) {
437 (Some(a), Some(b), Some(c), Some(d)) => [a, b, c, d],
438 _ => return None,
439 };
440 let scores: [f64; 4] = [
441 Self::composite_score(
442 rel_ids[0],
443 cards[0],
444 stats,
445 &[
446 (rel_ids[1], &[1], &[0]), // edge (0,1)
447 (rel_ids[3], &[0], &[1]), // edge (3,0) — reversed: rel0.col0 ↔ rel3.col1
448 ],
449 ),
450 Self::composite_score(
451 rel_ids[1],
452 cards[1],
453 stats,
454 &[(rel_ids[0], &[0], &[1]), (rel_ids[2], &[1], &[0])],
455 ),
456 Self::composite_score(
457 rel_ids[2],
458 cards[2],
459 stats,
460 &[(rel_ids[1], &[0], &[1]), (rel_ids[3], &[1], &[0])],
461 ),
462 Self::composite_score(
463 rel_ids[3],
464 cards[3],
465 stats,
466 &[(rel_ids[2], &[0], &[1]), (rel_ids[0], &[1], &[0])],
467 ),
468 ];
469 Self::pick_leader_from_scores(rel_ids, scores, config)
470 }
471}
472
473/// Promoter helper: triangle canonical permutation table.
474///
475/// Returns `lookup_perms` (the 2 non-leader slot entries) for the
476/// given `leader_idx ∈ [0, 3)`. The promoter combines this with its
477/// own knowledge of the rule head to produce
478/// `kernel_output_cols`.
479///
480/// **Canonical semantics**:
481/// * 0 (e_xy default) — slots `[e_yz, e_xz]`, no col-swaps.
482/// * 1 (e_yz) — slots `[e_xz↔, e_xy↔]`, both col-swapped.
483/// * 2 (e_xz) — slots `[e_yz↔, e_xy]`, e_yz col-swapped only.
484pub fn triangle_lookup_perms(leader_idx: u8) -> Vec<LookupPerm> {
485 match leader_idx {
486 0 => vec![
487 // slot 1 = e_yz (input_idx 1), slot 2 = e_xz (input_idx 2)
488 LookupPerm {
489 input_idx: 1,
490 swap_cols: false,
491 },
492 LookupPerm {
493 input_idx: 2,
494 swap_cols: false,
495 },
496 ],
497 1 => vec![
498 // slot 1 = e_xz swapped (input_idx 2), slot 2 = e_xy swapped (input_idx 0)
499 LookupPerm {
500 input_idx: 2,
501 swap_cols: true,
502 },
503 LookupPerm {
504 input_idx: 0,
505 swap_cols: true,
506 },
507 ],
508 2 => vec![
509 // slot 1 = e_yz swapped (input_idx 1), slot 2 = e_xy as-is (input_idx 0)
510 LookupPerm {
511 input_idx: 1,
512 swap_cols: true,
513 },
514 LookupPerm {
515 input_idx: 0,
516 swap_cols: false,
517 },
518 ],
519 _ => panic!("triangle leader_idx must be in [0, 3): got {leader_idx}"),
520 }
521}
522
523/// Promoter helper: triangle canonical head-projection table.
524///
525/// Maps the kernel-direct output (in leader's `(a, b, c)` order)
526/// back to the canonical head order `(X, Y, Z)`.
527///
528/// **Canonical semantics**:
529/// * 0 (e_xy default) — identity `[Column(0), Column(1), Column(2)]`.
530/// * 1 (e_yz) — `[Column(2), Column(0), Column(1)]`
531/// because kernel-direct = `(Y, Z, X)`.
532/// * 2 (e_xz) — `[Column(0), Column(2), Column(1)]`
533/// because kernel-direct = `(X, Z, Y)`.
534pub fn triangle_kernel_output_cols(leader_idx: u8) -> Vec<ProjectExpr> {
535 match leader_idx {
536 0 => vec![
537 ProjectExpr::Column(0),
538 ProjectExpr::Column(1),
539 ProjectExpr::Column(2),
540 ],
541 1 => vec![
542 ProjectExpr::Column(2),
543 ProjectExpr::Column(0),
544 ProjectExpr::Column(1),
545 ],
546 2 => vec![
547 ProjectExpr::Column(0),
548 ProjectExpr::Column(2),
549 ProjectExpr::Column(1),
550 ],
551 _ => panic!("triangle leader_idx must be in [0, 3): got {leader_idx}"),
552 }
553}
554
555/// Promoter helper: 4-cycle canonical head-projection table.
556///
557/// Maps the kernel-direct output (in leader's rotated cycle order)
558/// back to the canonical head order `(W, X, Y, Z)`. All 4 entries
559/// are pure permutations; no col-swap is involved.
560pub fn cycle4_kernel_output_cols(leader_idx: u8) -> Vec<ProjectExpr> {
561 match leader_idx {
562 0 => vec![
563 ProjectExpr::Column(0),
564 ProjectExpr::Column(1),
565 ProjectExpr::Column(2),
566 ProjectExpr::Column(3),
567 ],
568 1 => vec![
569 // kernel-direct = (X, Y, Z, W) → head (W, X, Y, Z)
570 ProjectExpr::Column(3),
571 ProjectExpr::Column(0),
572 ProjectExpr::Column(1),
573 ProjectExpr::Column(2),
574 ],
575 2 => vec![
576 // kernel-direct = (Y, Z, W, X) → head (W, X, Y, Z)
577 ProjectExpr::Column(2),
578 ProjectExpr::Column(3),
579 ProjectExpr::Column(0),
580 ProjectExpr::Column(1),
581 ],
582 3 => vec![
583 // kernel-direct = (Z, W, X, Y) → head (W, X, Y, Z)
584 ProjectExpr::Column(1),
585 ProjectExpr::Column(2),
586 ProjectExpr::Column(3),
587 ProjectExpr::Column(0),
588 ],
589 _ => panic!("4-cycle leader_idx must be in [0, 4): got {leader_idx}"),
590 }
591}
592
593/// Promoter helper: build a complete `VariableOrder` for a triangle
594/// from the cost model's `leader_idx` decision.
595pub fn build_triangle_var_order(leader_idx: u8) -> VariableOrder {
596 VariableOrder::legacy(
597 leader_idx,
598 triangle_lookup_perms(leader_idx),
599 triangle_kernel_output_cols(leader_idx),
600 )
601}
602
603/// Promoter helper: build a complete `VariableOrder` for a 4-cycle
604/// from the cost model's `leader_idx` decision.
605pub fn build_cycle4_var_order(leader_idx: u8) -> VariableOrder {
606 VariableOrder::legacy(
607 leader_idx,
608 cycle4_lookup_perms(leader_idx),
609 cycle4_kernel_output_cols(leader_idx),
610 )
611}
612
613/// Promoter helper: 4-cycle canonical permutation table.
614///
615/// All 4 leaders are rotation-only — no col-swaps. `lookup_perms`
616/// rotates the input list so the leader is at slot 0 and subsequent
617/// slots follow the cycle direction.
618pub fn cycle4_lookup_perms(leader_idx: u8) -> Vec<LookupPerm> {
619 if leader_idx >= 4 {
620 panic!("4-cycle leader_idx must be in [0, 4): got {leader_idx}");
621 }
622 // Rotation: slot k holds input (leader_idx + k) mod 4.
623 // The leader itself (k=0) is recorded in VariableOrder::leader_idx
624 // and not duplicated here, so this returns the 3 non-leader slots.
625 (1..4)
626 .map(|k| LookupPerm {
627 input_idx: ((leader_idx as usize + k) % 4) as u8,
628 swap_cols: false,
629 })
630 .collect()
631}
632
633#[cfg(test)]
634mod tests {
635 //! Unit tests for the cost-model trait, default implementation,
636 //! and canonical permutation tables. End-to-end leader-pick tests
637 //! live alongside the leader-pick integration coverage.
638 use super::*;
639 use xlog_core::RelId;
640
641 fn stats_with(cards: &[(RelId, u64)]) -> StatsManager {
642 let mut mgr = StatsManager::new();
643 for (rel, card) in cards {
644 mgr.register_relation(*rel);
645 if *card > 0 {
646 mgr.update_cardinality(*rel, *card);
647 }
648 }
649 mgr
650 }
651
652 #[test]
653 fn disabled_config_returns_none_even_with_smaller_input() {
654 let stats = stats_with(&[(RelId(1), 1000), (RelId(2), 10), (RelId(3), 1000)]);
655 let config = CompilerConfig::default(); // Disabled
656 let m = LeaderCardinalityModel;
657 assert_eq!(
658 m.pick_triangle_leader([RelId(1), RelId(2), RelId(3)], &stats, &config),
659 None,
660 "Disabled config must short-circuit before the threshold gate"
661 );
662 }
663
664 #[test]
665 fn missing_stats_returns_none_safety_floor() {
666 // RelId(2) has no cardinality registered (registration without
667 // update_cardinality leaves card=0, treated as missing).
668 let stats = stats_with(&[(RelId(1), 1000), (RelId(2), 0), (RelId(3), 1000)]);
669 let config = CompilerConfig {
670 wcoj_variable_ordering: WcojVarOrderingKind::LeaderCardinality,
671 ..CompilerConfig::default()
672 };
673 let m = LeaderCardinalityModel;
674 assert_eq!(
675 m.pick_triangle_leader([RelId(1), RelId(2), RelId(3)], &stats, &config),
676 None,
677 "missing-stats safety floor: any None card → None decision"
678 );
679 }
680
681 #[test]
682 fn picks_min_when_clearly_under_threshold() {
683 let stats = stats_with(&[(RelId(1), 1000), (RelId(2), 100), (RelId(3), 800)]);
684 let config = CompilerConfig {
685 wcoj_variable_ordering: WcojVarOrderingKind::LeaderCardinality,
686 ..CompilerConfig::default()
687 };
688 let m = LeaderCardinalityModel;
689 // ratio = 100/1000 = 0.1 ≤ 0.5 → leader = 1
690 assert_eq!(
691 m.pick_triangle_leader([RelId(1), RelId(2), RelId(3)], &stats, &config),
692 Some(1)
693 );
694 }
695
696 #[test]
697 fn declines_when_min_is_default_leader() {
698 // RelId(1) is the default leader and is already the smallest.
699 let stats = stats_with(&[(RelId(1), 100), (RelId(2), 1000), (RelId(3), 1000)]);
700 let config = CompilerConfig {
701 wcoj_variable_ordering: WcojVarOrderingKind::LeaderCardinality,
702 ..CompilerConfig::default()
703 };
704 let m = LeaderCardinalityModel;
705 assert_eq!(
706 m.pick_triangle_leader([RelId(1), RelId(2), RelId(3)], &stats, &config),
707 None,
708 "min == default leader → no reorder needed → None"
709 );
710 }
711
712 #[test]
713 fn declines_marginal_above_threshold() {
714 // ratio = 700/1000 = 0.7 > 0.5 → no reorder
715 let stats = stats_with(&[(RelId(1), 1000), (RelId(2), 700), (RelId(3), 900)]);
716 let config = CompilerConfig {
717 wcoj_variable_ordering: WcojVarOrderingKind::LeaderCardinality,
718 ..CompilerConfig::default()
719 };
720 let m = LeaderCardinalityModel;
721 assert_eq!(
722 m.pick_triangle_leader([RelId(1), RelId(2), RelId(3)], &stats, &config),
723 None,
724 );
725 }
726
727 #[test]
728 fn picks_4cycle_min_under_threshold() {
729 let stats = stats_with(&[
730 (RelId(1), 1000),
731 (RelId(2), 1000),
732 (RelId(3), 100),
733 (RelId(4), 1000),
734 ]);
735 let config = CompilerConfig {
736 wcoj_variable_ordering: WcojVarOrderingKind::LeaderCardinality,
737 ..CompilerConfig::default()
738 };
739 let m = LeaderCardinalityModel;
740 assert_eq!(
741 m.pick_4cycle_leader([RelId(1), RelId(2), RelId(3), RelId(4)], &stats, &config),
742 Some(2)
743 );
744 }
745
746 #[test]
747 fn triangle_lookup_perms_default_leader_is_identity() {
748 let p = triangle_lookup_perms(0);
749 assert_eq!(p.len(), 2);
750 assert_eq!(p[0].input_idx, 1);
751 assert!(!p[0].swap_cols);
752 assert_eq!(p[1].input_idx, 2);
753 assert!(!p[1].swap_cols);
754 }
755
756 #[test]
757 fn triangle_lookup_perms_leader_yz_double_swap() {
758 let p = triangle_lookup_perms(1);
759 // slot 1 = e_xz swapped, slot 2 = e_xy swapped
760 assert_eq!(p[0].input_idx, 2);
761 assert!(p[0].swap_cols);
762 assert_eq!(p[1].input_idx, 0);
763 assert!(p[1].swap_cols);
764 }
765
766 #[test]
767 fn triangle_lookup_perms_leader_xz_partial_swap() {
768 let p = triangle_lookup_perms(2);
769 // slot 1 = e_yz swapped, slot 2 = e_xy as-is
770 assert_eq!(p[0].input_idx, 1);
771 assert!(p[0].swap_cols);
772 assert_eq!(p[1].input_idx, 0);
773 assert!(!p[1].swap_cols);
774 }
775
776 #[test]
777 fn cycle4_lookup_perms_default_is_identity_rotation() {
778 let p = cycle4_lookup_perms(0);
779 // slots 1, 2, 3 = inputs 1, 2, 3
780 assert_eq!(
781 p.iter().map(|lp| lp.input_idx).collect::<Vec<_>>(),
782 vec![1, 2, 3]
783 );
784 assert!(p.iter().all(|lp| !lp.swap_cols));
785 }
786
787 #[test]
788 fn cycle4_lookup_perms_leader_xy_rotates_to_yz_zw_wx() {
789 let p = cycle4_lookup_perms(1);
790 // leader = e_xy. slots 1, 2, 3 = e_yz (2), e_zw (3), e_wx (0).
791 assert_eq!(
792 p.iter().map(|lp| lp.input_idx).collect::<Vec<_>>(),
793 vec![2, 3, 0]
794 );
795 assert!(p.iter().all(|lp| !lp.swap_cols));
796 }
797
798 #[test]
799 fn cycle4_lookup_perms_leader_zw_rotates_to_wx_xy_yz() {
800 let p = cycle4_lookup_perms(3);
801 assert_eq!(
802 p.iter().map(|lp| lp.input_idx).collect::<Vec<_>>(),
803 vec![0, 1, 2]
804 );
805 assert!(p.iter().all(|lp| !lp.swap_cols));
806 }
807
808 // ===============================================================
809 // HeatAwareLeaderModel composite-score tests
810 // ===============================================================
811
812 /// Build stats with cards + per-rel heat values (heat injected
813 /// directly via `get_relation_stats_mut`; sidesteps EMA so unit
814 /// tests can pin exact values).
815 fn stats_with_heat(seeded: &[(RelId, u64, f32)]) -> StatsManager {
816 let mut mgr = StatsManager::new();
817 for (rel, card, heat) in seeded {
818 mgr.register_relation(*rel);
819 if *card > 0 {
820 mgr.update_cardinality(*rel, *card);
821 }
822 if let Some(s) = mgr.get_relation_stats_mut(*rel) {
823 s.heat = *heat;
824 }
825 }
826 mgr
827 }
828
829 fn heat_aware_config() -> CompilerConfig {
830 CompilerConfig {
831 wcoj_variable_ordering: WcojVarOrderingKind::HeatAware,
832 ..CompilerConfig::default()
833 }
834 }
835
836 #[test]
837 fn heat_aware_leader_picks_cold_when_hot_relation_at_default_idx() {
838 // Triangle. Cards equal at 100. Hot rel at idx 0 (default
839 // leader): heat = 0.5. Other rels heat = 0.
840 // Expected scores (per HEAT_WEIGHT = 4 formula):
841 // idx0: 100 * (1+4*0.5) * 2 = 100 * 3 * 2 = 600
842 // idx1: 100 * 1 * 2 = 200
843 // idx2: 100 * 1 * 2 = 200
844 // argmin = idx 1 (first hit). default = 600. ratio
845 // 200/600 = 0.333 ≤ 0.5 → returns Some(1).
846 let stats = stats_with_heat(&[
847 (RelId(1), 100, 0.5),
848 (RelId(2), 100, 0.0),
849 (RelId(3), 100, 0.0),
850 ]);
851 let config = heat_aware_config();
852 let m = HeatAwareLeaderModel;
853 assert_eq!(
854 m.pick_triangle_leader([RelId(1), RelId(2), RelId(3)], &stats, &config),
855 Some(1)
856 );
857 }
858
859 #[test]
860 fn heat_aware_leader_demotes_relation_in_tight_edge() {
861 // Triangle. Cards 100 each. Heat 0 each. ONE
862 // JoinSelectivity record on edge (rel(1), rel(2))
863 // (canonical idx 0/1) with selectivity = 0.01.
864 // Expected penalties:
865 // rel0 ∈ {(0,1) tight, (0,2) default}: 100 + 1 = 101.
866 // rel1 ∈ {(0,1) tight, (1,2) default}: 101.
867 // rel2 ∈ {(1,2) default, (0,2) default}: 1 + 1 = 2.
868 // Scores (heat factor 1):
869 // idx0: 100 * 101 = 10100
870 // idx1: 10100
871 // idx2: 100 * 2 = 200
872 // argmin = idx 2. default = 10100. ratio 200/10100
873 // ≈ 0.0198 ≤ 0.5 → returns Some(2).
874 let mut stats = stats_with_heat(&[
875 (RelId(1), 100, 0.0),
876 (RelId(2), 100, 0.0),
877 (RelId(3), 100, 0.0),
878 ]);
879 // Tight edge on (rel0=RelId(1), rel1=RelId(2)) with keys
880 // [1]/[0] (canonical triangle slot_vars[0]=[a,b],
881 // slot_vars[1]=[b,c] → join key b at slot0.col1 / slot1.col0).
882 stats.set_join_selectivity(RelId(1), RelId(2), vec![1], vec![0], 0.01);
883 let config = heat_aware_config();
884 let m = HeatAwareLeaderModel;
885 assert_eq!(
886 m.pick_triangle_leader([RelId(1), RelId(2), RelId(3)], &stats, &config),
887 Some(2)
888 );
889 }
890
891 #[test]
892 fn heat_aware_leader_returns_none_when_heat_too_low() {
893 // Cards equal. Heat at idx 0 = 0.1 (one record_access from
894 // cold), others 0. Factor at idx 0 = 1.4, others 1.0.
895 // Score idx0 = 100*1.4*2 = 280. idx1/idx2 = 200.
896 // ratio 200/280 ≈ 0.714 > 0.5 → returns None.
897 let stats = stats_with_heat(&[
898 (RelId(1), 100, 0.1),
899 (RelId(2), 100, 0.0),
900 (RelId(3), 100, 0.0),
901 ]);
902 let config = heat_aware_config();
903 let m = HeatAwareLeaderModel;
904 assert_eq!(
905 m.pick_triangle_leader([RelId(1), RelId(2), RelId(3)], &stats, &config),
906 None
907 );
908 }
909
910 #[test]
911 fn heat_aware_leader_disabled_short_circuit() {
912 // Strong heat differential, but config = Disabled → None.
913 let stats = stats_with_heat(&[
914 (RelId(1), 100, 0.9),
915 (RelId(2), 100, 0.0),
916 (RelId(3), 100, 0.0),
917 ]);
918 let config = CompilerConfig::default(); // Disabled
919 let m = HeatAwareLeaderModel;
920 assert_eq!(
921 m.pick_triangle_leader([RelId(1), RelId(2), RelId(3)], &stats, &config),
922 None
923 );
924 }
925
926 #[test]
927 fn heat_aware_leader_missing_card_safety_floor() {
928 // RelId(2) has no card registered (zero). Cost model
929 // returns None per missing-card safety floor (matches
930 // LeaderCardinalityModel's behavior).
931 let stats = stats_with_heat(&[
932 (RelId(1), 100, 0.5),
933 (RelId(2), 0, 0.0), // missing card
934 (RelId(3), 100, 0.0),
935 ]);
936 let config = heat_aware_config();
937 let m = HeatAwareLeaderModel;
938 assert_eq!(
939 m.pick_triangle_leader([RelId(1), RelId(2), RelId(3)], &stats, &config),
940 None
941 );
942 }
943}