Skip to main content

xlog_stats/
manager.rs

1//! Statistics manager for GPU-resident relation metadata.
2//!
3//! This module provides the [`StatsManager`] type which maintains statistics for all
4//! GPU-resident relations and their join selectivities. It is the central coordination
5//! point for optimizer cost models and solver heuristics.
6
7use std::collections::HashMap;
8use xlog_core::RelId;
9
10use crate::stats::{ColumnStats, JoinSelectivity, RelationStats};
11
12/// Serializable snapshot of collected statistics.
13///
14/// This is intended for feeding runtime observations back into the compiler/optimizer.
15#[derive(Debug, Clone, Default)]
16pub struct StatsSnapshot {
17    /// Per-relation statistics.
18    pub relations: Vec<RelationStats>,
19    /// Cached join selectivity models.
20    pub join_selectivities: Vec<JoinSelectivity>,
21    /// Optional mapping from runtime `RelId` to predicate name.
22    ///
23    /// When present, consumers should prefer this over raw `RelId` matching to avoid
24    /// misapplying statistics across different programs where `RelId`s may be reused.
25    pub rel_names: Vec<(RelId, String)>,
26}
27
28/// Manages GPU-resident statistics for all relations.
29///
30/// The `StatsManager` is the central repository for relation statistics and join
31/// selectivity information. It provides methods for:
32///
33/// - Registering new relations and tracking their statistics
34/// - Updating cardinality and access patterns
35/// - Estimating join cardinalities using cached selectivity data
36/// - Managing relation "heat" for LRU-style eviction
37///
38/// # Thread Safety
39///
40/// This type is not thread-safe. For concurrent access, wrap in appropriate
41/// synchronization primitives (e.g., `RwLock`).
42///
43/// # Example
44///
45/// ```ignore
46/// use xlog_stats::StatsManager;
47/// use xlog_core::RelId;
48///
49/// let mut mgr = StatsManager::new();
50///
51/// // Register relations
52/// mgr.register_relation(RelId(1));
53/// mgr.register_relation(RelId(2));
54///
55/// // Update statistics
56/// mgr.update_cardinality(RelId(1), 10_000);
57/// mgr.update_cardinality(RelId(2), 5_000);
58///
59/// // Estimate join cardinality
60/// let estimate = mgr.estimate_join_cardinality(RelId(1), RelId(2), &[0], &[0]);
61/// ```
62#[derive(Debug, Default)]
63pub struct StatsManager {
64    /// Per-relation statistics indexed by RelId
65    relations: HashMap<RelId, RelationStats>,
66    /// Join selectivity cache indexed by (smaller_rel_id, larger_rel_id) for canonical ordering
67    join_selectivities: HashMap<(RelId, RelId), JoinSelectivity>,
68}
69
70impl StatsManager {
71    /// Creates a new empty statistics manager.
72    ///
73    /// # Returns
74    ///
75    /// A new `StatsManager` with no registered relations.
76    pub fn new() -> Self {
77        Self::default()
78    }
79
80    /// Registers a new relation for statistics tracking.
81    ///
82    /// If the relation is already registered, this is a no-op.
83    ///
84    /// # Arguments
85    ///
86    /// * `rel_id` - The unique identifier for the relation
87    pub fn register_relation(&mut self, rel_id: RelId) {
88        self.relations
89            .entry(rel_id)
90            .or_insert_with(|| RelationStats::new(rel_id));
91    }
92
93    /// Create a snapshot of all currently tracked statistics.
94    pub fn snapshot(&self) -> StatsSnapshot {
95        StatsSnapshot {
96            relations: self.relations.values().cloned().collect(),
97            join_selectivities: self.join_selectivities.values().cloned().collect(),
98            rel_names: Vec::new(),
99        }
100    }
101
102    /// Merge a previously captured snapshot into this manager.
103    ///
104    /// Existing entries are overwritten with the snapshot values.
105    pub fn merge_snapshot(&mut self, snapshot: &StatsSnapshot) {
106        for rel in &snapshot.relations {
107            self.register_relation(rel.rel_id);
108            if let Some(stats) = self.relations.get_mut(&rel.rel_id) {
109                *stats = rel.clone();
110            }
111        }
112
113        for js in &snapshot.join_selectivities {
114            self.set_join_selectivity(
115                js.left_rel,
116                js.right_rel,
117                js.left_keys.clone(),
118                js.right_keys.clone(),
119                js.selectivity,
120            );
121        }
122    }
123
124    /// Unregisters a relation, removing all associated statistics.
125    ///
126    /// Also removes any join selectivity entries involving this relation.
127    ///
128    /// # Arguments
129    ///
130    /// * `rel_id` - The relation to unregister
131    ///
132    /// # Returns
133    ///
134    /// The removed statistics if the relation was registered
135    pub fn unregister_relation(&mut self, rel_id: RelId) -> Option<RelationStats> {
136        // Remove join selectivities involving this relation
137        self.join_selectivities
138            .retain(|(left, right), _| *left != rel_id && *right != rel_id);
139
140        self.relations.remove(&rel_id)
141    }
142
143    /// Gets immutable reference to relation statistics.
144    ///
145    /// # Arguments
146    ///
147    /// * `rel_id` - The relation to look up
148    ///
149    /// # Returns
150    ///
151    /// A reference to the statistics if the relation is registered
152    pub fn get_relation_stats(&self, rel_id: RelId) -> Option<&RelationStats> {
153        self.relations.get(&rel_id)
154    }
155
156    /// Gets mutable reference to relation statistics.
157    ///
158    /// # Arguments
159    ///
160    /// * `rel_id` - The relation to look up
161    ///
162    /// # Returns
163    ///
164    /// A mutable reference to the statistics if the relation is registered
165    pub fn get_relation_stats_mut(&mut self, rel_id: RelId) -> Option<&mut RelationStats> {
166        self.relations.get_mut(&rel_id)
167    }
168
169    /// Updates the cardinality (row count) for a relation.
170    ///
171    /// If the relation is not registered, this is a no-op.
172    ///
173    /// # Arguments
174    ///
175    /// * `rel_id` - The relation to update
176    /// * `rows` - The new cardinality estimate
177    pub fn update_cardinality(&mut self, rel_id: RelId, rows: u64) {
178        if let Some(stats) = self.relations.get_mut(&rel_id) {
179            stats.update_cardinality(rows);
180        }
181    }
182
183    /// Updates the byte size estimate for a relation.
184    ///
185    /// If the relation is not registered, this is a no-op.
186    ///
187    /// # Arguments
188    ///
189    /// * `rel_id` - The relation to update
190    /// * `bytes` - The estimated total size in bytes
191    pub fn update_byte_size(&mut self, rel_id: RelId, bytes: u64) {
192        if let Some(stats) = self.relations.get_mut(&rel_id) {
193            stats.update_byte_size(bytes);
194        }
195    }
196
197    /// Records an access to a relation, updating its heat and timestamp.
198    ///
199    /// If the relation is not registered, this is a no-op.
200    ///
201    /// # Arguments
202    ///
203    /// * `rel_id` - The relation that was accessed
204    pub fn record_access(&mut self, rel_id: RelId) {
205        if let Some(stats) = self.relations.get_mut(&rel_id) {
206            stats.record_access();
207        }
208    }
209
210    /// Adds column statistics to a relation.
211    ///
212    /// If the relation is not registered, this is a no-op.
213    ///
214    /// # Arguments
215    ///
216    /// * `rel_id` - The relation to update
217    /// * `col_stats` - The column statistics to add
218    pub fn add_column_stats(&mut self, rel_id: RelId, col_stats: ColumnStats) {
219        if let Some(stats) = self.relations.get_mut(&rel_id) {
220            stats.add_column(col_stats);
221        }
222    }
223
224    /// Estimates the output cardinality for a join between two relations.
225    ///
226    /// Uses cached selectivity if available, otherwise uses a default heuristic.
227    /// The estimation formula is: `left_card * right_card * selectivity`.
228    ///
229    /// # Arguments
230    ///
231    /// * `left_rel` - The left relation in the join
232    /// * `right_rel` - The right relation in the join
233    /// * `left_keys` - Column indices used as join keys on the left (currently for future use)
234    /// * `right_keys` - Column indices used as join keys on the right (currently for future use)
235    ///
236    /// # Returns
237    ///
238    /// The estimated output cardinality (minimum of 1)
239    pub fn estimate_join_cardinality(
240        &self,
241        left_rel: RelId,
242        right_rel: RelId,
243        left_keys: &[usize],
244        right_keys: &[usize],
245    ) -> u64 {
246        // Get cardinalities with sensible defaults
247        let left_card = self
248            .relations
249            .get(&left_rel)
250            .map(|s| s.cardinality)
251            .unwrap_or(1000);
252        let right_card = self
253            .relations
254            .get(&right_rel)
255            .map(|s| s.cardinality)
256            .unwrap_or(1000);
257
258        // Use canonical key ordering for selectivity lookup
259        let key = Self::canonical_join_key(left_rel, right_rel);
260
261        // Try to use cached selectivity
262        if let Some(js) = self.join_selectivities.get(&key) {
263            return js.estimate_output_rows(left_card, right_card);
264        }
265
266        // Try to estimate from column statistics
267        if !left_keys.is_empty() && !right_keys.is_empty() {
268            if let (Some(left_stats), Some(right_stats)) = (
269                self.relations.get(&left_rel),
270                self.relations.get(&right_rel),
271            ) {
272                // Use first key column for selectivity estimation
273                let left_distinct = left_stats
274                    .get_column(left_keys[0])
275                    .map(|c| c.distinct_estimate)
276                    .unwrap_or(0);
277                let right_distinct = right_stats
278                    .get_column(right_keys[0])
279                    .map(|c| c.distinct_estimate)
280                    .unwrap_or(0);
281
282                if left_distinct > 0 && right_distinct > 0 {
283                    let selectivity = JoinSelectivity::estimate_selectivity_from_stats(
284                        left_distinct,
285                        right_distinct,
286                    );
287                    return ((left_card as f64 * right_card as f64 * selectivity) as u64).max(1);
288                }
289            }
290        }
291
292        // Default: assume 10% selectivity as a reasonable heuristic
293        // This is conservative and avoids underestimating join sizes
294        let default_selectivity = 0.1;
295        ((left_card as f64 * right_card as f64 * default_selectivity) as u64).max(1)
296    }
297
298    /// Records the result of a join execution to improve future estimates.
299    ///
300    /// Updates the selectivity model using exponential moving average:
301    /// `new_selectivity = old_selectivity * 0.7 + observed_selectivity * 0.3`
302    ///
303    /// # Arguments
304    ///
305    /// * `left_rel` - The left relation in the join
306    /// * `right_rel` - The right relation in the join
307    /// * `left_keys` - Column indices used as join keys on the left
308    /// * `right_keys` - Column indices used as join keys on the right
309    /// * `input_rows` - Product of input relation cardinalities
310    /// * `output_rows` - Actual output row count
311    pub fn record_join_result(
312        &mut self,
313        left_rel: RelId,
314        right_rel: RelId,
315        left_keys: Vec<usize>,
316        right_keys: Vec<usize>,
317        input_rows: u64,
318        output_rows: u64,
319    ) {
320        let key = Self::canonical_join_key(left_rel, right_rel);
321
322        // Compute observed selectivity
323        let observed_selectivity = if input_rows > 0 {
324            (output_rows as f64 / input_rows as f64).clamp(0.0, 1.0)
325        } else {
326            0.1 // Default when no input
327        };
328
329        // Update or create the selectivity entry
330        let entry = self.join_selectivities.entry(key).or_insert_with(|| {
331            let (canonical_left, canonical_right) = key;
332            JoinSelectivity::new(canonical_left, canonical_right)
333        });
334
335        // Update keys (store in canonical order)
336        let (keys_left, keys_right) = if left_rel <= right_rel {
337            (left_keys, right_keys)
338        } else {
339            (right_keys, left_keys)
340        };
341        entry.left_keys = keys_left;
342        entry.right_keys = keys_right;
343
344        // Exponential moving average for selectivity
345        const EMA_OLD_WEIGHT: f64 = 0.7;
346        const EMA_NEW_WEIGHT: f64 = 0.3;
347        entry.selectivity =
348            entry.selectivity * EMA_OLD_WEIGHT + observed_selectivity * EMA_NEW_WEIGHT;
349    }
350
351    /// Set (or overwrite) the join selectivity between two relations.
352    ///
353    /// This is useful for seeding the optimizer from external observations (e.g., runtime stats).
354    pub fn set_join_selectivity(
355        &mut self,
356        left_rel: RelId,
357        right_rel: RelId,
358        left_keys: Vec<usize>,
359        right_keys: Vec<usize>,
360        selectivity: f64,
361    ) {
362        let key = Self::canonical_join_key(left_rel, right_rel);
363        let entry = self.join_selectivities.entry(key).or_insert_with(|| {
364            let (canonical_left, canonical_right) = key;
365            JoinSelectivity::new(canonical_left, canonical_right)
366        });
367
368        // Store keys in canonical order.
369        let (keys_left, keys_right) = if left_rel <= right_rel {
370            (left_keys, right_keys)
371        } else {
372            (right_keys, left_keys)
373        };
374        entry.set_keys(keys_left, keys_right);
375        entry.set_selectivity(selectivity);
376    }
377
378    /// Gets the cached join selectivity between two relations.
379    ///
380    /// # Arguments
381    ///
382    /// * `left_rel` - One relation in the join
383    /// * `right_rel` - The other relation in the join
384    ///
385    /// # Returns
386    ///
387    /// A reference to the cached selectivity if present
388    pub fn get_join_selectivity(
389        &self,
390        left_rel: RelId,
391        right_rel: RelId,
392    ) -> Option<&JoinSelectivity> {
393        let key = Self::canonical_join_key(left_rel, right_rel);
394        self.join_selectivities.get(&key)
395    }
396
397    /// Decays the heat of all relations by a multiplicative factor.
398    ///
399    /// This should be called periodically (e.g., during garbage collection
400    /// or memory pressure events) to allow unused relations to cool down.
401    ///
402    /// # Arguments
403    ///
404    /// * `factor` - Multiplicative decay factor (typically 0.0 to 1.0)
405    pub fn decay_all_heat(&mut self, factor: f32) {
406        for stats in self.relations.values_mut() {
407            stats.decay_heat(factor);
408        }
409    }
410
411    /// Returns the IDs of all "hot" relations above a given heat threshold.
412    ///
413    /// This is useful for identifying frequently accessed relations that should
414    /// be kept in GPU memory.
415    ///
416    /// # Arguments
417    ///
418    /// * `threshold` - The minimum heat value to be considered "hot"
419    ///
420    /// # Returns
421    ///
422    /// A vector of RelIds for all relations with heat >= threshold
423    pub fn hot_relations(&self, threshold: f32) -> Vec<RelId> {
424        self.relations
425            .iter()
426            .filter(|(_, s)| s.heat >= threshold)
427            .map(|(id, _)| *id)
428            .collect()
429    }
430
431    /// Returns the IDs of all "cold" relations below a given heat threshold.
432    ///
433    /// This is useful for identifying candidates for eviction from GPU memory.
434    ///
435    /// # Arguments
436    ///
437    /// * `threshold` - The maximum heat value to be considered "cold"
438    ///
439    /// # Returns
440    ///
441    /// A vector of RelIds for all relations with heat < threshold
442    pub fn cold_relations(&self, threshold: f32) -> Vec<RelId> {
443        self.relations
444            .iter()
445            .filter(|(_, s)| s.heat < threshold)
446            .map(|(id, _)| *id)
447            .collect()
448    }
449
450    /// Returns the total number of registered relations.
451    pub fn relation_count(&self) -> usize {
452        self.relations.len()
453    }
454
455    /// Returns an iterator over all registered relation IDs.
456    pub fn relation_ids(&self) -> impl Iterator<Item = RelId> + '_ {
457        self.relations.keys().copied()
458    }
459
460    /// Returns the total estimated bytes across all relations.
461    pub fn total_byte_size(&self) -> u64 {
462        self.relations.values().map(|s| s.byte_size).sum()
463    }
464
465    /// Returns the total cardinality across all relations.
466    pub fn total_cardinality(&self) -> u64 {
467        self.relations.values().map(|s| s.cardinality).sum()
468    }
469
470    /// Clears all statistics.
471    ///
472    /// Removes all relation statistics and join selectivities.
473    pub fn clear(&mut self) {
474        self.relations.clear();
475        self.join_selectivities.clear();
476    }
477
478    /// Returns canonical key for join selectivity lookup.
479    ///
480    /// Ensures (smaller_id, larger_id) ordering for consistent lookups.
481    #[inline]
482    fn canonical_join_key(left: RelId, right: RelId) -> (RelId, RelId) {
483        if left <= right {
484            (left, right)
485        } else {
486            (right, left)
487        }
488    }
489}
490
491#[cfg(test)]
492mod tests {
493    use super::*;
494    use xlog_core::ScalarType;
495
496    #[test]
497    fn test_stats_manager_new() {
498        let mgr = StatsManager::new();
499        assert!(mgr.get_relation_stats(RelId(1)).is_none());
500        assert_eq!(mgr.relation_count(), 0);
501    }
502
503    #[test]
504    fn test_stats_manager_default() {
505        let mgr = StatsManager::default();
506        assert_eq!(mgr.relation_count(), 0);
507        assert!(mgr.get_relation_stats(RelId(42)).is_none());
508    }
509
510    #[test]
511    fn test_stats_manager_register_relation() {
512        let mut mgr = StatsManager::new();
513        mgr.register_relation(RelId(1));
514        assert!(mgr.get_relation_stats(RelId(1)).is_some());
515        assert_eq!(mgr.relation_count(), 1);
516    }
517
518    #[test]
519    fn test_stats_manager_register_relation_idempotent() {
520        let mut mgr = StatsManager::new();
521        mgr.register_relation(RelId(1));
522        mgr.update_cardinality(RelId(1), 500);
523        mgr.register_relation(RelId(1)); // Should not reset stats
524        let stats = mgr.get_relation_stats(RelId(1)).unwrap();
525        assert_eq!(stats.cardinality, 500);
526    }
527
528    #[test]
529    fn test_stats_manager_register_multiple_relations() {
530        let mut mgr = StatsManager::new();
531        for i in 1..=10 {
532            mgr.register_relation(RelId(i));
533        }
534        assert_eq!(mgr.relation_count(), 10);
535        for i in 1..=10 {
536            assert!(mgr.get_relation_stats(RelId(i)).is_some());
537        }
538    }
539
540    #[test]
541    fn test_stats_manager_unregister_relation() {
542        let mut mgr = StatsManager::new();
543        mgr.register_relation(RelId(1));
544        mgr.update_cardinality(RelId(1), 1000);
545
546        let removed = mgr.unregister_relation(RelId(1));
547        assert!(removed.is_some());
548        assert_eq!(removed.unwrap().cardinality, 1000);
549        assert!(mgr.get_relation_stats(RelId(1)).is_none());
550        assert_eq!(mgr.relation_count(), 0);
551    }
552
553    #[test]
554    fn test_stats_manager_unregister_removes_join_selectivities() {
555        let mut mgr = StatsManager::new();
556        mgr.register_relation(RelId(1));
557        mgr.register_relation(RelId(2));
558        mgr.register_relation(RelId(3));
559        mgr.update_cardinality(RelId(1), 1000);
560        mgr.update_cardinality(RelId(2), 500);
561        mgr.update_cardinality(RelId(3), 200);
562
563        // Record join results to create selectivity entries
564        mgr.record_join_result(RelId(1), RelId(2), vec![0], vec![0], 500_000, 1000);
565        mgr.record_join_result(RelId(1), RelId(3), vec![0], vec![0], 200_000, 500);
566        mgr.record_join_result(RelId(2), RelId(3), vec![0], vec![0], 100_000, 250);
567
568        assert!(mgr.get_join_selectivity(RelId(1), RelId(2)).is_some());
569        assert!(mgr.get_join_selectivity(RelId(1), RelId(3)).is_some());
570        assert!(mgr.get_join_selectivity(RelId(2), RelId(3)).is_some());
571
572        // Unregister relation 1 - should remove join selectivities with 1
573        mgr.unregister_relation(RelId(1));
574
575        assert!(mgr.get_join_selectivity(RelId(1), RelId(2)).is_none());
576        assert!(mgr.get_join_selectivity(RelId(1), RelId(3)).is_none());
577        // Join between 2 and 3 should still exist
578        assert!(mgr.get_join_selectivity(RelId(2), RelId(3)).is_some());
579    }
580
581    #[test]
582    fn test_stats_manager_update_cardinality() {
583        let mut mgr = StatsManager::new();
584        mgr.register_relation(RelId(1));
585        mgr.update_cardinality(RelId(1), 5000);
586        let stats = mgr.get_relation_stats(RelId(1)).unwrap();
587        assert_eq!(stats.cardinality, 5000);
588    }
589
590    #[test]
591    fn test_stats_manager_update_cardinality_unregistered() {
592        let mut mgr = StatsManager::new();
593        // Should be a no-op for unregistered relation
594        mgr.update_cardinality(RelId(1), 5000);
595        assert!(mgr.get_relation_stats(RelId(1)).is_none());
596    }
597
598    #[test]
599    fn test_stats_manager_update_byte_size() {
600        let mut mgr = StatsManager::new();
601        mgr.register_relation(RelId(1));
602        mgr.update_byte_size(RelId(1), 1024 * 1024);
603        let stats = mgr.get_relation_stats(RelId(1)).unwrap();
604        assert_eq!(stats.byte_size, 1024 * 1024);
605    }
606
607    #[test]
608    fn test_stats_manager_record_access() {
609        let mut mgr = StatsManager::new();
610        mgr.register_relation(RelId(1));
611        mgr.record_access(RelId(1));
612        let stats = mgr.get_relation_stats(RelId(1)).unwrap();
613        assert!(stats.heat > 0.0);
614        assert!(stats.last_access > 0);
615    }
616
617    #[test]
618    fn test_stats_manager_record_access_multiple() {
619        let mut mgr = StatsManager::new();
620        mgr.register_relation(RelId(1));
621
622        for _ in 0..10 {
623            mgr.record_access(RelId(1));
624        }
625
626        let stats = mgr.get_relation_stats(RelId(1)).unwrap();
627        // After 10 accesses, heat should be quite high
628        assert!(stats.heat > 0.5);
629    }
630
631    #[test]
632    fn test_stats_manager_add_column_stats() {
633        let mut mgr = StatsManager::new();
634        mgr.register_relation(RelId(1));
635
636        let mut col_stats = ColumnStats::new(0, ScalarType::I64);
637        col_stats.update_distinct(100);
638        col_stats.update_range(0, 1000);
639        mgr.add_column_stats(RelId(1), col_stats);
640
641        let stats = mgr.get_relation_stats(RelId(1)).unwrap();
642        assert_eq!(stats.column_stats.len(), 1);
643        let col = stats.get_column(0).unwrap();
644        assert_eq!(col.distinct_estimate, 100);
645    }
646
647    #[test]
648    fn test_stats_manager_estimate_join() {
649        let mut mgr = StatsManager::new();
650        mgr.register_relation(RelId(1));
651        mgr.register_relation(RelId(2));
652        mgr.update_cardinality(RelId(1), 1000);
653        mgr.update_cardinality(RelId(2), 500);
654
655        let estimate = mgr.estimate_join_cardinality(RelId(1), RelId(2), &[0], &[0]);
656        // Default selectivity assumes 10%: 1000 * 500 * 0.1 = 50000
657        assert!(estimate > 0);
658        assert!(estimate <= 1000 * 500);
659    }
660
661    #[test]
662    fn test_stats_manager_estimate_join_with_column_stats() {
663        let mut mgr = StatsManager::new();
664        mgr.register_relation(RelId(1));
665        mgr.register_relation(RelId(2));
666        mgr.update_cardinality(RelId(1), 1000);
667        mgr.update_cardinality(RelId(2), 500);
668
669        // Add column stats with distinct values
670        let mut col_stats1 = ColumnStats::new(0, ScalarType::I64);
671        col_stats1.update_distinct(100);
672        mgr.add_column_stats(RelId(1), col_stats1);
673
674        let mut col_stats2 = ColumnStats::new(0, ScalarType::I64);
675        col_stats2.update_distinct(50);
676        mgr.add_column_stats(RelId(2), col_stats2);
677
678        let estimate = mgr.estimate_join_cardinality(RelId(1), RelId(2), &[0], &[0]);
679        // Selectivity = 1/max(100, 50) = 0.01
680        // Expected: 1000 * 500 * 0.01 = 5000
681        assert_eq!(estimate, 5000);
682    }
683
684    #[test]
685    fn test_stats_manager_estimate_join_unregistered() {
686        let mgr = StatsManager::new();
687        // Should use default cardinality of 1000 for unregistered relations
688        let estimate = mgr.estimate_join_cardinality(RelId(1), RelId(2), &[0], &[0]);
689        // 1000 * 1000 * 0.1 = 100000
690        assert_eq!(estimate, 100_000);
691    }
692
693    #[test]
694    fn test_stats_manager_estimate_join_minimum_one() {
695        let mut mgr = StatsManager::new();
696        mgr.register_relation(RelId(1));
697        mgr.register_relation(RelId(2));
698        mgr.update_cardinality(RelId(1), 1);
699        mgr.update_cardinality(RelId(2), 1);
700
701        // Add column stats with high distinct count to make selectivity very low
702        let mut col_stats1 = ColumnStats::new(0, ScalarType::I64);
703        col_stats1.update_distinct(1_000_000);
704        mgr.add_column_stats(RelId(1), col_stats1);
705
706        let mut col_stats2 = ColumnStats::new(0, ScalarType::I64);
707        col_stats2.update_distinct(1_000_000);
708        mgr.add_column_stats(RelId(2), col_stats2);
709
710        let estimate = mgr.estimate_join_cardinality(RelId(1), RelId(2), &[0], &[0]);
711        // Should be at least 1
712        assert!(estimate >= 1);
713    }
714
715    #[test]
716    fn test_stats_manager_record_join_result() {
717        let mut mgr = StatsManager::new();
718        mgr.register_relation(RelId(1));
719        mgr.register_relation(RelId(2));
720        mgr.update_cardinality(RelId(1), 1000);
721        mgr.update_cardinality(RelId(2), 500);
722
723        // Record a join result
724        mgr.record_join_result(RelId(1), RelId(2), vec![0], vec![0], 500_000, 1000);
725
726        // Should have created a selectivity entry
727        let js = mgr.get_join_selectivity(RelId(1), RelId(2)).unwrap();
728        assert_eq!(js.left_keys, vec![0]);
729        assert_eq!(js.right_keys, vec![0]);
730        // Observed selectivity: 1000/500000 = 0.002
731        // EMA: 1.0 * 0.7 + 0.002 * 0.3 ≈ 0.7006
732        assert!(js.selectivity < 1.0);
733    }
734
735    #[test]
736    fn test_stats_manager_record_join_result_canonical_order() {
737        let mut mgr = StatsManager::new();
738        mgr.register_relation(RelId(1));
739        mgr.register_relation(RelId(2));
740
741        // Record with reverse order - should use canonical key
742        mgr.record_join_result(RelId(2), RelId(1), vec![0], vec![1], 1000, 100);
743
744        // Should be able to look up with either order
745        assert!(mgr.get_join_selectivity(RelId(1), RelId(2)).is_some());
746        assert!(mgr.get_join_selectivity(RelId(2), RelId(1)).is_some());
747
748        // Both lookups should return the same entry
749        let js1 = mgr.get_join_selectivity(RelId(1), RelId(2)).unwrap();
750        let js2 = mgr.get_join_selectivity(RelId(2), RelId(1)).unwrap();
751        assert_eq!(js1.selectivity, js2.selectivity);
752    }
753
754    #[test]
755    fn test_stats_manager_record_join_result_ema_update() {
756        let mut mgr = StatsManager::new();
757        mgr.register_relation(RelId(1));
758        mgr.register_relation(RelId(2));
759
760        // First observation
761        mgr.record_join_result(RelId(1), RelId(2), vec![0], vec![0], 1000, 100);
762        let sel1 = mgr
763            .get_join_selectivity(RelId(1), RelId(2))
764            .unwrap()
765            .selectivity;
766
767        // Second observation with different selectivity
768        mgr.record_join_result(RelId(1), RelId(2), vec![0], vec![0], 1000, 500);
769        let sel2 = mgr
770            .get_join_selectivity(RelId(1), RelId(2))
771            .unwrap()
772            .selectivity;
773
774        // Selectivity should have moved via EMA
775        assert!(sel2 != sel1);
776    }
777
778    #[test]
779    fn test_stats_manager_decay_all_heat() {
780        let mut mgr = StatsManager::new();
781        mgr.register_relation(RelId(1));
782        mgr.register_relation(RelId(2));
783
784        // Heat up relations
785        for _ in 0..10 {
786            mgr.record_access(RelId(1));
787            mgr.record_access(RelId(2));
788        }
789
790        let heat1_before = mgr.get_relation_stats(RelId(1)).unwrap().heat;
791        let heat2_before = mgr.get_relation_stats(RelId(2)).unwrap().heat;
792
793        mgr.decay_all_heat(0.5);
794
795        let heat1_after = mgr.get_relation_stats(RelId(1)).unwrap().heat;
796        let heat2_after = mgr.get_relation_stats(RelId(2)).unwrap().heat;
797
798        assert!((heat1_after - heat1_before * 0.5).abs() < 0.001);
799        assert!((heat2_after - heat2_before * 0.5).abs() < 0.001);
800    }
801
802    #[test]
803    fn test_stats_manager_hot_relations() {
804        let mut mgr = StatsManager::new();
805        mgr.register_relation(RelId(1));
806        mgr.register_relation(RelId(2));
807        mgr.register_relation(RelId(3));
808
809        // Heat up only relation 1
810        for _ in 0..20 {
811            mgr.record_access(RelId(1));
812        }
813
814        let hot = mgr.hot_relations(0.5);
815        assert_eq!(hot.len(), 1);
816        assert_eq!(hot[0], RelId(1));
817    }
818
819    #[test]
820    fn test_stats_manager_cold_relations() {
821        let mut mgr = StatsManager::new();
822        mgr.register_relation(RelId(1));
823        mgr.register_relation(RelId(2));
824        mgr.register_relation(RelId(3));
825
826        // Heat up only relation 1
827        for _ in 0..20 {
828            mgr.record_access(RelId(1));
829        }
830
831        let cold = mgr.cold_relations(0.5);
832        // Relations 2 and 3 should be cold
833        assert_eq!(cold.len(), 2);
834        assert!(cold.contains(&RelId(2)));
835        assert!(cold.contains(&RelId(3)));
836    }
837
838    #[test]
839    fn test_stats_manager_relation_ids() {
840        let mut mgr = StatsManager::new();
841        mgr.register_relation(RelId(5));
842        mgr.register_relation(RelId(10));
843        mgr.register_relation(RelId(15));
844
845        let ids: Vec<_> = mgr.relation_ids().collect();
846        assert_eq!(ids.len(), 3);
847        assert!(ids.contains(&RelId(5)));
848        assert!(ids.contains(&RelId(10)));
849        assert!(ids.contains(&RelId(15)));
850    }
851
852    #[test]
853    fn test_stats_manager_total_byte_size() {
854        let mut mgr = StatsManager::new();
855        mgr.register_relation(RelId(1));
856        mgr.register_relation(RelId(2));
857        mgr.update_byte_size(RelId(1), 1000);
858        mgr.update_byte_size(RelId(2), 2000);
859
860        assert_eq!(mgr.total_byte_size(), 3000);
861    }
862
863    #[test]
864    fn test_stats_manager_total_cardinality() {
865        let mut mgr = StatsManager::new();
866        mgr.register_relation(RelId(1));
867        mgr.register_relation(RelId(2));
868        mgr.update_cardinality(RelId(1), 1000);
869        mgr.update_cardinality(RelId(2), 2000);
870
871        assert_eq!(mgr.total_cardinality(), 3000);
872    }
873
874    #[test]
875    fn test_stats_manager_clear() {
876        let mut mgr = StatsManager::new();
877        mgr.register_relation(RelId(1));
878        mgr.register_relation(RelId(2));
879        mgr.record_join_result(RelId(1), RelId(2), vec![0], vec![0], 1000, 100);
880
881        mgr.clear();
882
883        assert_eq!(mgr.relation_count(), 0);
884        assert!(mgr.get_relation_stats(RelId(1)).is_none());
885        assert!(mgr.get_join_selectivity(RelId(1), RelId(2)).is_none());
886    }
887
888    #[test]
889    fn test_stats_manager_get_relation_stats_mut() {
890        let mut mgr = StatsManager::new();
891        mgr.register_relation(RelId(1));
892
893        if let Some(stats) = mgr.get_relation_stats_mut(RelId(1)) {
894            stats.update_cardinality(999);
895            stats.has_index = true;
896        }
897
898        let stats = mgr.get_relation_stats(RelId(1)).unwrap();
899        assert_eq!(stats.cardinality, 999);
900        assert!(stats.has_index);
901    }
902
903    #[test]
904    fn test_stats_manager_join_estimate_uses_cached_selectivity() {
905        let mut mgr = StatsManager::new();
906        mgr.register_relation(RelId(1));
907        mgr.register_relation(RelId(2));
908        mgr.update_cardinality(RelId(1), 1000);
909        mgr.update_cardinality(RelId(2), 500);
910
911        // Record a join with known selectivity
912        // Observed: 2500 / 500000 = 0.005
913        mgr.record_join_result(RelId(1), RelId(2), vec![0], vec![0], 500_000, 2500);
914
915        // Subsequent estimates should use the cached selectivity
916        let estimate = mgr.estimate_join_cardinality(RelId(1), RelId(2), &[0], &[0]);
917
918        // The cached selectivity is an EMA, initial 1.0 * 0.7 + 0.005 * 0.3 = 0.7015
919        // Estimate = 1000 * 500 * 0.7015 = 350750
920        let js = mgr.get_join_selectivity(RelId(1), RelId(2)).unwrap();
921        let expected = ((1000_f64 * 500_f64 * js.selectivity) as u64).max(1);
922        assert_eq!(estimate, expected);
923    }
924
925    #[test]
926    fn test_stats_manager_set_join_selectivity_canonicalizes_keys() {
927        let mut mgr = StatsManager::new();
928        mgr.register_relation(RelId(1));
929        mgr.register_relation(RelId(2));
930
931        // Set in reverse order; manager should store in canonical (1,2).
932        mgr.set_join_selectivity(RelId(2), RelId(1), vec![3], vec![7], 0.05);
933
934        let js = mgr.get_join_selectivity(RelId(1), RelId(2)).unwrap();
935        assert_eq!(js.left_rel, RelId(1));
936        assert_eq!(js.right_rel, RelId(2));
937        assert_eq!(js.left_keys, vec![7]);
938        assert_eq!(js.right_keys, vec![3]);
939        assert!((js.selectivity - 0.05).abs() < 1e-9);
940    }
941
942    #[test]
943    fn test_stats_manager_snapshot_and_merge() {
944        let mut mgr = StatsManager::new();
945        mgr.register_relation(RelId(1));
946        mgr.update_cardinality(RelId(1), 123);
947        mgr.record_access(RelId(1));
948        mgr.set_join_selectivity(RelId(1), RelId(2), vec![0], vec![0], 0.2);
949
950        let snap = mgr.snapshot();
951
952        let mut mgr2 = StatsManager::new();
953        mgr2.merge_snapshot(&snap);
954
955        let r1 = mgr2.get_relation_stats(RelId(1)).unwrap();
956        assert_eq!(r1.cardinality, 123);
957
958        let js = mgr2.get_join_selectivity(RelId(1), RelId(2)).unwrap();
959        assert_eq!(js.left_keys, vec![0]);
960        assert_eq!(js.right_keys, vec![0]);
961        assert!((js.selectivity - 0.2).abs() < 1e-9);
962    }
963
964    #[test]
965    fn test_canonical_join_key() {
966        assert_eq!(
967            StatsManager::canonical_join_key(RelId(1), RelId(2)),
968            (RelId(1), RelId(2))
969        );
970        assert_eq!(
971            StatsManager::canonical_join_key(RelId(2), RelId(1)),
972            (RelId(1), RelId(2))
973        );
974        assert_eq!(
975            StatsManager::canonical_join_key(RelId(5), RelId(5)),
976            (RelId(5), RelId(5))
977        );
978    }
979}