Skip to main content

xlog_runtime/
statistics.rs

1//! Query statistics tracking for adaptive optimization
2//!
3//! Tracks access patterns and selectivity to guide index building decisions.
4
5use std::collections::HashMap;
6
7/// Join execution strategy
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum JoinStrategy {
10    /// Hash join - build hash table on right, probe with left
11    Hash,
12    /// Nested loop join - for small right tables
13    NestedLoop,
14    /// Sort-merge join - for pre-sorted data
15    SortMerge,
16    /// Index nested loop - use existing index
17    IndexNestedLoop,
18}
19
20impl JoinStrategy {
21    /// Threshold for switching to nested loop (right table size)
22    const NESTED_LOOP_THRESHOLD: u64 = 1000;
23
24    /// Select optimal join strategy based on table sizes and data characteristics
25    pub fn select(
26        _left_rows: u64,
27        right_rows: u64,
28        pre_sorted: Option<bool>,
29        _stats: &QueryStatistics,
30    ) -> Self {
31        // If data is pre-sorted, sort-merge is efficient
32        if pre_sorted == Some(true) {
33            return JoinStrategy::SortMerge;
34        }
35
36        // For small right tables, nested loop avoids hash table overhead
37        if right_rows < Self::NESTED_LOOP_THRESHOLD {
38            return JoinStrategy::NestedLoop;
39        }
40
41        // Default to hash join
42        JoinStrategy::Hash
43    }
44}
45
46/// Statistics for a specific join pair
47#[derive(Debug, Clone, Default)]
48pub struct JoinStats {
49    /// Number of times this join pair was executed.
50    pub count: u64,
51    /// Cumulative selectivity across all executions.
52    pub total_selectivity: f64,
53    /// Average selectivity (total_selectivity / count).
54    pub avg_selectivity: f64,
55}
56
57/// Query statistics tracker
58#[derive(Debug, Default)]
59pub struct QueryStatistics {
60    scan_counts: HashMap<String, u64>,
61    join_stats: HashMap<(String, String), JoinStats>,
62    total_ops: u64,
63}
64
65impl QueryStatistics {
66    /// Create an empty statistics tracker.
67    pub fn new() -> Self {
68        Self::default()
69    }
70
71    /// Record a scan access to a relation.
72    pub fn record_scan(&mut self, relation: &str) {
73        *self.scan_counts.entry(relation.to_string()).or_insert(0) += 1;
74        self.total_ops += 1;
75    }
76
77    /// Record a join execution and its observed selectivity.
78    pub fn record_join(&mut self, left: &str, right: &str, selectivity: f64) {
79        let key = (left.to_string(), right.to_string());
80        let stats = self.join_stats.entry(key).or_default();
81        stats.count += 1;
82        stats.total_selectivity += selectivity;
83        stats.avg_selectivity = stats.total_selectivity / stats.count as f64;
84        self.total_ops += 1;
85    }
86
87    /// Return the number of scan accesses for a relation.
88    pub fn scan_count(&self, relation: &str) -> u64 {
89        self.scan_counts.get(relation).copied().unwrap_or(0)
90    }
91
92    /// Look up join statistics for a specific pair of relations.
93    pub fn join_stats(&self, left: &str, right: &str) -> Option<&JoinStats> {
94        self.join_stats.get(&(left.to_string(), right.to_string()))
95    }
96
97    /// Compute the access heat for a relation (scans + 2*join accesses).
98    pub fn heat(&self, relation: &str) -> u64 {
99        let scan_heat = self.scan_count(relation);
100        let join_heat: u64 = self
101            .join_stats
102            .iter()
103            .filter(|((l, r), _)| l == relation || r == relation)
104            .map(|(_, stats)| stats.count * 2)
105            .sum();
106        scan_heat + join_heat
107    }
108
109    /// Return all relations sorted by descending heat.
110    pub fn relations_by_heat(&self) -> Vec<(String, u64)> {
111        let mut relations: Vec<_> = self
112            .scan_counts
113            .keys()
114            .map(|r| (r.clone(), self.heat(r)))
115            .collect();
116
117        for (left, right) in self.join_stats.keys() {
118            if !self.scan_counts.contains_key(left) {
119                relations.push((left.clone(), self.heat(left)));
120            }
121            if !self.scan_counts.contains_key(right) {
122                relations.push((right.clone(), self.heat(right)));
123            }
124        }
125
126        relations.sort_by_key(|relation| std::cmp::Reverse(relation.1));
127        relations.dedup_by(|a, b| a.0 == b.0);
128        relations
129    }
130
131    /// Clear all tracked statistics.
132    pub fn clear(&mut self) {
133        self.scan_counts.clear();
134        self.join_stats.clear();
135        self.total_ops = 0;
136    }
137
138    /// Total number of recorded operations.
139    pub fn total_ops(&self) -> u64 {
140        self.total_ops
141    }
142}