1mod buffers;
16mod evidence;
17mod resident;
18mod results;
19
20pub use evidence::{EvidenceForcing, ForceabilityReason};
21pub use resident::{
22 compile_resident_plan, McNoHostStats, McResidentResult, ResidentPlan, ResidentRejectKind,
23 ResidentRejection,
24};
25
26#[cfg(feature = "host-io")]
27use std::collections::{HashMap, HashSet};
28use std::sync::Arc;
29
30#[cfg(feature = "host-io")]
31use cudarc::driver::DeviceSlice;
32use xlog_core::{MemoryBudget, Result, XlogError};
33use xlog_cuda::memory::TrackedCudaSlice;
34use xlog_cuda::{CudaDevice, CudaKernelProvider, GpuMemoryManager};
35#[cfg(feature = "host-io")]
36use xlog_logic::ast::{BodyLiteral, Rule};
37use xlog_logic::ast::{Directives, Evidence, ProbMethod, ProbQuery, Program};
38
39use crate::exact::GpuConfig;
40#[cfg(feature = "host-io")]
41use crate::provenance::Value;
42use crate::provenance::{atom_key_from_ground_atom, GroundAtom};
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
46pub enum McSamplingMethod {
47 Rejection,
49 EvidenceClamping,
51}
52
53impl McSamplingMethod {
54 pub fn as_str(self) -> &'static str {
55 match self {
56 McSamplingMethod::Rejection => "rejection",
57 McSamplingMethod::EvidenceClamping => "evidence_clamping",
58 }
59 }
60}
61
62impl From<ProbMethod> for McSamplingMethod {
63 fn from(value: ProbMethod) -> Self {
64 match value {
65 ProbMethod::Rejection => Self::Rejection,
66 ProbMethod::EvidenceClamping => Self::EvidenceClamping,
67 }
68 }
69}
70
71#[derive(Debug, Clone, Copy, PartialEq, Eq)]
77pub enum McCountStrategy {
78 QueriesAndEvidence,
80 QueriesOnly,
82}
83
84impl McCountStrategy {
85 pub fn from_method(method: McSamplingMethod) -> Self {
87 match method {
88 McSamplingMethod::Rejection => Self::QueriesAndEvidence,
89 McSamplingMethod::EvidenceClamping => Self::QueriesOnly,
90 }
91 }
92}
93
94#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
97pub struct McTimingBreakdown {
98 pub sampler_us: u64,
99 pub sample_reset_us: u64,
100 pub sample_build_us: u64,
101 pub eval_us: u64,
102 pub count_us: u64,
103}
104
105impl McTimingBreakdown {
106 pub fn total_us(&self) -> u64 {
107 self.sampler_us
108 .saturating_add(self.sample_reset_us)
109 .saturating_add(self.sample_build_us)
110 .saturating_add(self.eval_us)
111 .saturating_add(self.count_us)
112 }
113}
114
115pub const NONMONOTONE_SEMANTICS: &str = "Synchronous iteration per SCC; if a fixpoint is reached, use it; if a cycle is detected, use the intersection of all states in the cycle (skeptical tuples only); if the iteration budget is exceeded, use the intersection across all visited states (conservative).";
117
118#[derive(Debug, Clone)]
119#[non_exhaustive]
120pub struct McEvalConfig {
125 pub samples: usize,
127 pub seed: u64,
129 pub confidence: f64,
131 pub max_nonmonotone_iterations: usize,
133 pub sampling_method: Option<McSamplingMethod>,
135 pub allow_cpu_oracle_fallback: bool,
141}
142
143impl Default for McEvalConfig {
144 fn default() -> Self {
145 Self {
146 samples: 10000,
147 seed: 0,
148 confidence: 0.95,
149 max_nonmonotone_iterations: 1024,
150 sampling_method: None,
151 allow_cpu_oracle_fallback: false,
152 }
153 }
154}
155
156impl McEvalConfig {
157 pub fn from_directives(directives: &Directives) -> Result<Self> {
158 let mut cfg = Self::default();
159 if let Some(samples) = directives.prob_samples {
160 cfg.samples = samples;
161 }
162 if let Some(seed) = directives.prob_seed {
163 cfg.seed = seed;
164 }
165 if let Some(confidence) = directives.prob_confidence {
166 cfg.confidence = confidence;
167 }
168 if let Some(iterations) = directives.prob_max_nonmonotone_iterations {
169 cfg.max_nonmonotone_iterations = iterations;
170 }
171 cfg.sampling_method = directives.prob_method.map(McSamplingMethod::from);
172 cfg.validate()?;
173 Ok(cfg)
174 }
175
176 pub fn validate(&self) -> Result<()> {
177 if self.samples == 0 {
178 return Err(XlogError::Compilation(
179 "MC inference requires samples > 0".to_string(),
180 ));
181 }
182 if !(0.0 < self.confidence && self.confidence < 1.0) || self.confidence.is_nan() {
183 return Err(XlogError::Compilation(format!(
184 "MC inference requires 0 < confidence < 1, got {}",
185 self.confidence
186 )));
187 }
188 if self.max_nonmonotone_iterations == 0 {
189 return Err(XlogError::Compilation(
190 "MC inference requires max_nonmonotone_iterations > 0".to_string(),
191 ));
192 }
193 Ok(())
194 }
195}
196
197#[derive(Debug, Clone)]
198pub struct McQueryEstimate {
199 pub atom: GroundAtom,
200 pub prob: f64,
201 pub log_prob: f64,
202 pub stderr: f64,
203 pub ci_low: f64,
204 pub ci_high: f64,
205}
206
207#[derive(Debug, Clone, Copy, PartialEq, Eq)]
209pub enum McEngine {
210 GpuResident,
212 CpuOracle,
216}
217
218impl McEngine {
219 pub fn as_str(&self) -> &'static str {
221 match self {
222 McEngine::GpuResident => "gpu-resident",
223 McEngine::CpuOracle => "cpu-oracle",
224 }
225 }
226}
227
228#[derive(Debug, Clone)]
229pub struct McResult {
230 pub total_samples: usize,
231 pub evidence_samples: usize,
232 pub seed: u64,
233 pub confidence: f64,
234 pub query_estimates: Vec<McQueryEstimate>,
235 pub nonmonotone_sccs: usize,
236 pub nonmonotone_cycles: usize,
237 pub nonmonotone_iteration_limit_hits: usize,
238 pub sampling_method: McSamplingMethod,
239 pub engine: McEngine,
243}
244
245#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
257pub struct McHotLoopTransfers {
258 pub htod_calls: u64,
260 pub dtoh_calls: u64,
262 pub htod_bytes: u64,
264 pub dtoh_bytes: u64,
266}
267
268impl McHotLoopTransfers {
269 pub fn is_zero(&self) -> bool {
271 self.htod_calls == 0 && self.dtoh_calls == 0
272 }
273}
274
275pub struct McDeviceResult {
277 pub query_counts: TrackedCudaSlice<u32>,
278 pub evidence_count: TrackedCudaSlice<u32>,
279 pub total_samples: usize,
280 pub seed: u64,
281 pub confidence: f64,
282 pub nonmonotone_sccs: usize,
283 pub nonmonotone_cycles: usize,
284 pub nonmonotone_iteration_limit_hits: usize,
285 pub sampling_method: McSamplingMethod,
286 pub hot_loop_transfers: McHotLoopTransfers,
290 pub no_host: McNoHostStats,
292}
293
294#[derive(Debug, Clone)]
295pub(super) struct ProbFactSpec {
296 pub(super) var_idx: usize,
297 pub(super) atom: GroundAtom,
298}
299
300#[derive(Debug, Clone)]
301pub(super) struct AdSpec {
302 pub(super) decision_vars: Vec<usize>,
303 pub(super) choices: Vec<GroundAtom>,
304 #[cfg_attr(not(feature = "host-io"), allow(dead_code))]
305 pub(super) has_none: bool,
306}
307
308#[cfg(feature = "host-io")]
309#[derive(Debug, Clone, Default, PartialEq, Eq)]
310pub(super) struct Relation {
311 pub(super) tuples: HashSet<Vec<Value>>,
312}
313
314#[cfg(feature = "host-io")]
315impl Relation {
316 pub(super) fn insert_tuple(&mut self, tuple: Vec<Value>) {
317 self.tuples.insert(tuple);
318 }
319
320 pub(super) fn contains(&self, tuple: &[Value]) -> bool {
321 self.tuples.contains(tuple)
322 }
323
324 pub(super) fn is_empty(&self) -> bool {
325 self.tuples.is_empty()
326 }
327}
328
329#[cfg(feature = "host-io")]
330#[derive(Debug, Clone)]
331pub(super) enum SccKind {
332 MonotoneNonRecursive,
333 MonotoneRecursive,
334 NonMonotone,
335}
336
337#[cfg(feature = "host-io")]
338#[derive(Debug, Clone)]
339pub(super) struct SccPlan {
340 pub(super) predicates: Vec<String>,
341 pub(super) rules: Vec<Rule>,
342 pub(super) kind: SccKind,
343}
344
345#[cfg_attr(not(feature = "host-io"), allow(dead_code))]
346#[derive(Debug, Clone, Default)]
347pub(super) struct EvalStats {
348 pub(super) nonmonotone_sccs: usize,
349 pub(super) nonmonotone_cycles: usize,
350 pub(super) nonmonotone_iteration_limit_hits: usize,
351}
352
353#[derive(Clone)]
354pub struct McProgram {
355 pub(super) gpu_config: GpuConfig,
356 pub(super) program: Program,
357 #[cfg(feature = "host-io")]
358 pub(super) base_store: HashMap<String, Relation>,
359 #[cfg(feature = "host-io")]
360 pub(super) scc_plans: Vec<SccPlan>,
361 pub(super) queries: Vec<GroundAtom>,
362 pub(super) evidence: Vec<(GroundAtom, bool)>,
363 pub(super) bernoulli_probs: Vec<f32>,
364 pub(super) prob_facts: Vec<ProbFactSpec>,
365 pub(super) annotated_disjunctions: Vec<AdSpec>,
366}
367
368impl McProgram {
369 pub fn compile_source(source: &str) -> Result<Self> {
370 let program = xlog_logic::parse_program(source)?;
371 Self::compile_program(&program)
372 }
373
374 pub fn compile_source_with_gpu(source: &str, config: GpuConfig) -> Result<Self> {
375 let mut program = Self::compile_source(source)?;
376 program.gpu_config = config;
377 Ok(program)
378 }
379
380 pub fn num_vars(&self) -> usize {
381 self.bernoulli_probs.len()
382 }
383
384 #[cfg(feature = "host-io")]
399 pub fn evaluate(&self, cfg: McEvalConfig) -> Result<McResult> {
400 let provider = Arc::new(self.provider()?);
401 let cfg_clone = cfg.clone();
402 let device_result =
403 match self.evaluate_gpu_device_with_provider(cfg_clone, provider.clone()) {
404 Ok(result) => result,
405 Err(XlogError::Compilation(message))
406 if message.starts_with("resident MC engine rejected program") =>
407 {
408 if cfg.allow_cpu_oracle_fallback {
412 return self.evaluate_cpu(cfg);
413 }
414 return Err(XlogError::Compilation(format!(
415 "{message}; MC fail-closed: set \
416 McEvalConfig::allow_cpu_oracle_fallback to explicitly \
417 run the labeled CPU oracle instead"
418 )));
419 }
420 Err(err) => return Err(err),
421 };
422
423 let mut host_counts = vec![0u32; device_result.query_counts.len()];
424 if !host_counts.is_empty() {
425 provider
426 .device()
427 .inner()
428 .dtoh_sync_copy_into(&device_result.query_counts, &mut host_counts)
429 .map_err(|e| {
430 XlogError::Kernel(format!("Failed to download MC query counts: {}", e))
431 })?;
432 }
433
434 let mut host_evidence = [0u32];
435 provider
436 .device()
437 .inner()
438 .dtoh_sync_copy_into(&device_result.evidence_count, &mut host_evidence)
439 .map_err(|e| {
440 XlogError::Kernel(format!("Failed to download MC evidence count: {}", e))
441 })?;
442
443 let evidence_samples = if self.evidence.is_empty() {
444 cfg.samples
445 } else {
446 host_evidence[0] as usize
447 };
448
449 if device_result.sampling_method != McSamplingMethod::EvidenceClamping
450 && !self.evidence.is_empty()
451 && evidence_samples == 0
452 {
453 return Err(XlogError::Execution(format!(
454 "MC inference error: evidence was never satisfied across {} samples (seed={})",
455 cfg.samples, cfg.seed
456 )));
457 }
458
459 let z = results::normal_quantile(0.5 + cfg.confidence / 2.0);
460 let mut query_estimates: Vec<McQueryEstimate> = Vec::with_capacity(self.queries.len());
461 for (i, atom) in self.queries.iter().enumerate() {
462 let k = host_counts.get(i).copied().unwrap_or(0) as usize;
463 let (p, stderr, ci_low, ci_high) = results::binomial_estimate(k, evidence_samples, z);
464 let log_prob = if p == 0.0 { f64::NEG_INFINITY } else { p.ln() };
465
466 query_estimates.push(McQueryEstimate {
467 atom: atom.clone(),
468 prob: p,
469 log_prob,
470 stderr,
471 ci_low,
472 ci_high,
473 });
474 }
475
476 Ok(McResult {
477 total_samples: cfg.samples,
478 evidence_samples,
479 seed: cfg.seed,
480 confidence: cfg.confidence,
481 query_estimates,
482 nonmonotone_sccs: device_result.nonmonotone_sccs,
483 nonmonotone_cycles: device_result.nonmonotone_cycles,
484 engine: McEngine::GpuResident,
485 nonmonotone_iteration_limit_hits: device_result.nonmonotone_iteration_limit_hits,
486 sampling_method: device_result.sampling_method,
487 })
488 }
489
490 #[cfg(feature = "host-io")]
501 pub fn evaluate_cpu(&self, cfg: McEvalConfig) -> Result<McResult> {
502 if cfg.samples == 0 {
503 return Err(XlogError::Execution(
504 "MC inference requires samples > 0".to_string(),
505 ));
506 }
507 if !(0.0 < cfg.confidence && cfg.confidence < 1.0) || cfg.confidence.is_nan() {
508 return Err(XlogError::Execution(format!(
509 "MC inference requires 0 < confidence < 1, got {}",
510 cfg.confidence
511 )));
512 }
513 if cfg.max_nonmonotone_iterations == 0 {
514 return Err(XlogError::Execution(
515 "MC inference requires max_nonmonotone_iterations > 0".to_string(),
516 ));
517 }
518
519 let (method, forcing) = self.resolve_sampling_method(cfg.sampling_method)?;
520 let is_clamped = method == McSamplingMethod::EvidenceClamping;
521
522 let mut n_evidence: usize = 0;
523 let mut n_query_true: Vec<usize> = vec![0; self.queries.len()];
524 let mut stats = EvalStats::default();
525
526 let num_vars = self.bernoulli_probs.len();
527 let samples_matrix: Vec<u8> = if num_vars == 0 {
528 Vec::new()
529 } else {
530 let total = num_vars
531 .checked_mul(cfg.samples)
532 .ok_or_else(|| XlogError::Execution("MC sample matrix overflow".to_string()))?;
533 let provider = Arc::new(self.provider()?);
534
535 let mut d_force_mask = provider.memory().alloc::<u8>(num_vars.max(1))?;
537 let mut d_forced_value = provider.memory().alloc::<u8>(num_vars.max(1))?;
538 if is_clamped {
539 provider
540 .htod_sync_copy_into_tracked(&forcing.force_mask, &mut d_force_mask)
541 .map_err(|e| {
542 XlogError::Kernel(format!("Failed to upload force_mask: {}", e))
543 })?;
544 provider
545 .htod_sync_copy_into_tracked(&forcing.forced_value, &mut d_forced_value)
546 .map_err(|e| {
547 XlogError::Kernel(format!("Failed to upload forced_value: {}", e))
548 })?;
549 } else {
550 provider
551 .device()
552 .inner()
553 .memset_zeros(&mut d_force_mask)
554 .map_err(|e| XlogError::Kernel(format!("Failed to zero force_mask: {}", e)))?;
555 provider
556 .device()
557 .inner()
558 .memset_zeros(&mut d_forced_value)
559 .map_err(|e| {
560 XlogError::Kernel(format!("Failed to zero forced_value: {}", e))
561 })?;
562 }
563
564 let samples_device = provider.sample_bernoulli_matrix_device(
565 &self.bernoulli_probs,
566 cfg.samples,
567 cfg.seed,
568 &d_force_mask.slice(..),
569 &d_forced_value.slice(..),
570 )?;
571 let mut host = vec![0u8; total];
572 if !host.is_empty() {
573 provider
574 .device()
575 .inner()
576 .dtoh_sync_copy_into(&samples_device, &mut host)
577 .map_err(|e| {
578 XlogError::Kernel(format!("Failed to download MC samples: {}", e))
579 })?;
580 }
581 host
582 };
583
584 for sample_idx in 0..cfg.samples {
585 let sample_bits = if num_vars == 0 {
586 &[][..]
587 } else {
588 let start = sample_idx * num_vars;
589 let end = start + num_vars;
590 &samples_matrix[start..end]
591 };
592
593 let mut store = self.base_store.clone();
594 self.apply_sample_facts(&mut store, sample_bits)?;
595
596 let sample_stats = results::evaluate_program_inplace(
597 &self.scc_plans,
598 &mut store,
599 cfg.max_nonmonotone_iterations,
600 )?;
601 stats.nonmonotone_sccs += sample_stats.nonmonotone_sccs;
602 stats.nonmonotone_cycles += sample_stats.nonmonotone_cycles;
603 stats.nonmonotone_iteration_limit_hits += sample_stats.nonmonotone_iteration_limit_hits;
604
605 if !is_clamped && !results::evidence_satisfied(&store, &self.evidence) {
607 continue;
608 }
609
610 n_evidence += 1;
611 for (i, q) in self.queries.iter().enumerate() {
612 if results::atom_holds(&store, q) {
613 n_query_true[i] += 1;
614 }
615 }
616 }
617
618 if !is_clamped && !self.evidence.is_empty() && n_evidence == 0 {
619 return Err(XlogError::Execution(format!(
620 "MC inference error: evidence was never satisfied across {} samples (seed={})",
621 cfg.samples, cfg.seed
622 )));
623 }
624
625 let denom = if self.evidence.is_empty() || is_clamped {
627 cfg.samples
628 } else {
629 n_evidence
630 };
631
632 let z = results::normal_quantile(0.5 + cfg.confidence / 2.0);
633
634 let mut query_estimates: Vec<McQueryEstimate> = Vec::with_capacity(self.queries.len());
635 for (i, atom) in self.queries.iter().enumerate() {
636 let k = n_query_true[i];
637 let (p, stderr, ci_low, ci_high) = results::binomial_estimate(k, denom, z);
638 let log_prob = if p == 0.0 { f64::NEG_INFINITY } else { p.ln() };
639
640 query_estimates.push(McQueryEstimate {
641 atom: atom.clone(),
642 prob: p,
643 log_prob,
644 stderr,
645 ci_low,
646 ci_high,
647 });
648 }
649
650 Ok(McResult {
651 total_samples: cfg.samples,
652 evidence_samples: denom,
653 seed: cfg.seed,
654 confidence: cfg.confidence,
655 query_estimates,
656 nonmonotone_sccs: stats.nonmonotone_sccs,
657 nonmonotone_cycles: stats.nonmonotone_cycles,
658 nonmonotone_iteration_limit_hits: stats.nonmonotone_iteration_limit_hits,
659 sampling_method: method,
660 engine: McEngine::CpuOracle,
661 })
662 }
663
664 #[cfg(feature = "host-io")]
670 pub fn evaluate_gpu(&self, cfg: McEvalConfig) -> Result<McResult> {
671 self.evaluate(cfg)
672 }
673
674 pub fn evaluate_gpu_device(&self, cfg: McEvalConfig) -> Result<McDeviceResult> {
683 let provider = Arc::new(self.provider()?);
684 self.evaluate_gpu_device_with_provider(cfg, provider)
685 }
686
687 pub fn evaluate_gpu_device_with_provider(
698 &self,
699 cfg: McEvalConfig,
700 provider: Arc<CudaKernelProvider>,
701 ) -> Result<McDeviceResult> {
702 let r = self.evaluate_resident_with_provider(cfg, provider)?;
708 let no_host = r.no_host;
709 Ok(McDeviceResult {
710 query_counts: r.query_counts,
711 evidence_count: r.evidence_count,
712 total_samples: r.total_samples,
713 seed: r.seed,
714 confidence: r.confidence,
715 nonmonotone_sccs: 0,
719 nonmonotone_cycles: 0,
720 nonmonotone_iteration_limit_hits: 0,
721 sampling_method: r.sampling_method,
722 hot_loop_transfers: McHotLoopTransfers {
726 htod_calls: no_host.tracked_htod_calls,
727 dtoh_calls: no_host.tracked_dtoh_calls,
728 htod_bytes: 0,
729 dtoh_bytes: 0,
730 },
731 no_host,
732 })
733 }
734
735 fn compile_program(program: &Program) -> Result<Self> {
736 let mut queries: Vec<GroundAtom> = Vec::new();
737 for ProbQuery { atom } in &program.prob_queries {
738 queries.push(atom_key_from_ground_atom(atom)?);
739 }
740
741 let mut evidence: Vec<(GroundAtom, bool)> = Vec::new();
742 for Evidence { atom, value } in &program.evidence {
743 evidence.push((atom_key_from_ground_atom(atom)?, *value));
744 }
745
746 let mut prob_facts = program.prob_facts.clone();
747 buffers::extend_prob_facts_with_coin(program, &mut prob_facts)?;
748 let (bernoulli_probs, prob_facts, annotated_disjunctions) =
749 buffers::compile_sampling_plan(&prob_facts, &program.annotated_disjunctions)?;
750
751 #[cfg(feature = "host-io")]
752 let mut base_store: HashMap<String, Relation> = HashMap::new();
753
754 #[cfg(feature = "host-io")]
756 {
757 for fact in program.facts() {
758 let atom = atom_key_from_ground_atom(&fact.head)?;
759 base_store
760 .entry(atom.predicate.clone())
761 .or_default()
762 .insert_tuple(atom.args);
763 }
764 }
765
766 #[cfg(feature = "host-io")]
769 {
770 let mut referenced: HashSet<String> = HashSet::new();
771 for rule in &program.rules {
772 referenced.insert(rule.head.predicate.clone());
773 for lit in &rule.body {
774 match lit {
775 BodyLiteral::Positive(a) | BodyLiteral::Negated(a) => {
776 referenced.insert(a.predicate.clone());
777 }
778 BodyLiteral::Epistemic(lit) => {
779 referenced.insert(lit.atom.predicate.clone());
780 }
781 BodyLiteral::Comparison(_)
782 | BodyLiteral::IsExpr(_)
783 | BodyLiteral::Univ(_) => {}
784 }
785 }
786 }
787 for pf in &program.prob_facts {
788 referenced.insert(pf.atom.predicate.clone());
789 }
790 for ad in &program.annotated_disjunctions {
791 for pf in &ad.choices {
792 referenced.insert(pf.atom.predicate.clone());
793 }
794 }
795 for q in &queries {
796 referenced.insert(q.predicate.clone());
797 }
798 for (e, _) in &evidence {
799 referenced.insert(e.predicate.clone());
800 }
801 for pred in referenced {
802 base_store.entry(pred).or_default();
803 }
804 }
805
806 #[cfg(feature = "host-io")]
807 let scc_plans = results::build_scc_plans(program)?;
808
809 Ok(Self {
810 gpu_config: GpuConfig::default(),
811 program: program.clone(),
812 #[cfg(feature = "host-io")]
813 base_store,
814 #[cfg(feature = "host-io")]
815 scc_plans,
816 queries,
817 evidence,
818 bernoulli_probs,
819 prob_facts,
820 annotated_disjunctions,
821 })
822 }
823
824 fn provider(&self) -> Result<CudaKernelProvider> {
825 if self.gpu_config.memory_bytes == 0 {
826 return Err(XlogError::Kernel(
827 "GPU memory budget must be non-zero".to_string(),
828 ));
829 }
830
831 let device = Arc::new(CudaDevice::new(self.gpu_config.device_ordinal)?);
832 let memory = Arc::new(GpuMemoryManager::new(
833 device.clone(),
834 MemoryBudget::with_limit(self.gpu_config.memory_bytes),
835 ));
836 CudaKernelProvider::new(device, memory)
837 }
838}