1use std::{collections::BTreeSet, ffi::c_void, sync::Arc};
4
5use cudarc::driver::LaunchConfig;
6use xlog_core::{RelId, Result, ScalarType, Schema, XlogError};
7use xlog_cuda::provider::{
8 epistemic_kernels, HostLaunchMetadataTransferStats, HostTransferStats, EPISTEMIC_MODULE,
9};
10use xlog_cuda::{
11 memory::{validate_logical_row_count, TrackedCudaSlice},
12 sys, AsKernelParam, CudaBuffer, CudaColumn, DeviceSlice, DriverError, LaunchAsync,
13};
14use xlog_ir::rir::{MultiwayPlan, PlannedHashReason, RirNode, StreamGroupId};
15use xlog_ir::{
16 EirEpistemicMode, EirEpistemicOp, EirTerm, EpistemicCpuFallbackCounters,
17 EpistemicExecutablePlan, EpistemicGpuBufferKind, EpistemicGpuHotPathPhase, EpistemicGpuPlan,
18 EpistemicTupleMembershipBinding, EpistemicWcojReductionStatus,
19};
20
21use super::Executor;
22
23const XLOG_CONSTRAINT_RELATION_PREFIX: &str = "__xlog_constraint_";
24
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27pub struct EpistemicGpuWorkspaceCapacities {
28 pub max_candidates: usize,
30 pub max_worlds: usize,
32 pub max_models_per_reduction: usize,
34}
35
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub struct EpistemicGpuWorkspaceLayout {
39 pub candidate_assumption_bytes: usize,
41 pub world_view_bytes: usize,
43 pub model_membership_bytes: usize,
45 pub rejection_reason_slots: usize,
47}
48
49impl EpistemicGpuWorkspaceLayout {
50 pub fn for_plan(
52 plan: &EpistemicGpuPlan,
53 capacities: EpistemicGpuWorkspaceCapacities,
54 ) -> Result<Self> {
55 require_positive(
56 capacities.max_candidates,
57 "epistemic GPU workspace candidates",
58 )?;
59 require_positive(capacities.max_worlds, "epistemic GPU workspace worlds")?;
60 require_positive(
61 capacities.max_models_per_reduction,
62 "epistemic GPU workspace models",
63 )?;
64 require_positive(
65 plan.epistemic_literals.len(),
66 "epistemic GPU workspace literals",
67 )?;
68 require_positive(plan.reductions.len(), "epistemic GPU workspace reductions")?;
69
70 let literal_count = plan.epistemic_literals.len();
71 let reduction_count = plan.reductions.len();
72 let candidate_assumption_bytes = checked_product(capacities.max_candidates, literal_count)?;
73 let world_view_stride = capacities
74 .max_worlds
75 .max(world_view_bitset_bytes_per_candidate(literal_count)?);
76 let world_view_bytes = checked_product(capacities.max_candidates, world_view_stride)?;
77 let model_membership_bytes = checked_product(
78 checked_product(
79 checked_product(
80 capacities.max_candidates,
81 capacities.max_models_per_reduction,
82 )?,
83 reduction_count,
84 )?,
85 literal_count,
86 )?;
87
88 Ok(Self {
89 candidate_assumption_bytes,
90 world_view_bytes,
91 model_membership_bytes,
92 rejection_reason_slots: capacities.max_candidates,
93 })
94 }
95
96 pub fn total_bytes(&self) -> usize {
98 self.try_total_bytes()
99 .expect("epistemic GPU workspace layout byte total overflowed")
100 }
101
102 pub fn try_total_bytes(&self) -> Result<usize> {
104 let rejection_reason_bytes =
105 checked_product(self.rejection_reason_slots, std::mem::size_of::<u32>())?;
106 checked_sum(
107 checked_sum(
108 checked_sum(self.candidate_assumption_bytes, self.world_view_bytes)?,
109 self.model_membership_bytes,
110 )?,
111 rejection_reason_bytes,
112 )
113 }
114}
115
116pub struct EpistemicGpuWorkspace {
118 pub layout: EpistemicGpuWorkspaceLayout,
120 pub candidate_assumptions: TrackedCudaSlice<u8>,
122 pub world_views: TrackedCudaSlice<u8>,
124 pub model_membership: TrackedCudaSlice<u8>,
126 pub rejection_reasons: TrackedCudaSlice<u32>,
128 pub constraint_violation_index: TrackedCudaSlice<u32>,
135}
136
137impl EpistemicGpuWorkspace {
138 pub fn require_buffer_lengths_match_layout(&self, construct: &str) -> Result<()> {
140 if self.candidate_assumptions.len() != self.layout.candidate_assumption_bytes
141 || self.world_views.len() != self.layout.world_view_bytes
142 || self.model_membership.len() != self.layout.model_membership_bytes
143 || self.rejection_reasons.len() != self.layout.rejection_reason_slots
144 || self.constraint_violation_index.len() != self.layout.rejection_reason_slots
145 {
146 return Err(XlogError::UnsupportedEpistemicConstruct {
147 construct: construct.to_string(),
148 context: format!(
149 "prepared GPU workspace buffer lengths do not match layout: \
150 candidate_bytes={}/{} world_view_bytes={}/{} model_membership_bytes={}/{} \
151 rejection_reason_slots={}/{} constraint_violation_index_slots={}/{}",
152 self.candidate_assumptions.len(),
153 self.layout.candidate_assumption_bytes,
154 self.world_views.len(),
155 self.layout.world_view_bytes,
156 self.model_membership.len(),
157 self.layout.model_membership_bytes,
158 self.rejection_reasons.len(),
159 self.layout.rejection_reason_slots,
160 self.constraint_violation_index.len(),
161 self.layout.rejection_reason_slots
162 ),
163 });
164 }
165
166 Ok(())
167 }
168}
169
170#[derive(Debug, Clone, Copy, PartialEq, Eq)]
172pub struct EpistemicGpuWorkspaceResetTrace {
173 pub candidate_assumption_bytes: usize,
175 pub world_view_bytes: usize,
177 pub model_membership_bytes: usize,
179 pub rejection_reason_bytes: usize,
181 pub device_zero_ops: u32,
183 pub host_write_ops: u32,
185}
186
187#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
189pub struct EpistemicGpuKernelTimingTrace {
190 pub cuda_event_pairs: u32,
192 pub timing_sync_ops: u32,
194 pub kernel_elapsed_nanos: u64,
196}
197
198#[derive(Debug, Clone, Copy, PartialEq, Eq)]
200pub struct EpistemicGpuCandidateGenerationTrace {
201 pub literal_count: usize,
203 pub generated_candidates: usize,
205 pub candidate_assumption_bytes: usize,
207 pub kernel_launches: u32,
209 pub host_write_ops: u32,
211 pub kernel_timing: EpistemicGpuKernelTimingTrace,
213}
214
215#[derive(Debug, Clone, Copy, PartialEq, Eq)]
217pub struct EpistemicGpuCandidateValidationTrace {
218 pub literal_count: usize,
220 pub validated_candidates: usize,
222 pub candidate_assumption_bytes_checked: usize,
224 pub world_view_bytes_checked: usize,
226 pub rejection_reason_slots_written: usize,
228 pub kernel_launches: u32,
230 pub host_write_ops: u32,
232 pub kernel_timing: EpistemicGpuKernelTimingTrace,
234}
235
236#[derive(Debug, Clone, Copy, PartialEq, Eq)]
238pub struct EpistemicGpuMaterializationTrace {
239 pub materialized_candidates: usize,
241 pub world_view_slots_written: usize,
243 pub kernel_launches: u32,
245 pub host_write_ops: u32,
247 pub kernel_timing: EpistemicGpuKernelTimingTrace,
249}
250
251#[derive(Debug, Clone, Copy, PartialEq, Eq)]
253pub struct EpistemicGpuFinalResultMaterializationTrace {
254 pub materialized_candidates: usize,
256 pub output_row_count_device_reads: u32,
258 pub world_view_slots_written: usize,
260 pub kernel_launches: u32,
262 pub host_write_ops: u32,
264 pub kernel_timing: EpistemicGpuKernelTimingTrace,
266}
267
268#[derive(Debug, Clone, Copy, PartialEq, Eq)]
270pub struct EpistemicGpuFinalTupleMaterializationTrace {
271 pub output_column_count: usize,
273 pub output_row_capacity: usize,
275 pub tuple_bytes_capacity: usize,
277 pub output_row_count_device_reads: u32,
279 pub model_membership_bytes_checked: usize,
281 pub bounded_model_slots_per_reduction: usize,
283 pub row_specific_membership_row_capacity: usize,
285 pub row_filter_row_capacity_outside_model_slot_window: usize,
287 pub world_view_slots_checked: usize,
289 pub row_filter_count: usize,
291 pub negated_row_filter_count: usize,
293 pub final_row_count_device_writes: u32,
295 pub kernel_launches: u32,
297 pub host_write_ops: u32,
299 pub kernel_timing: EpistemicGpuKernelTimingTrace,
301}
302
303#[derive(Debug, Clone, Copy, PartialEq, Eq)]
305pub struct EpistemicGpuTransferBudgetTrace {
306 pub candidate_count: usize,
308 pub tracked_dtoh_bytes: u64,
310 pub tracked_htod_bytes: u64,
312 pub tracked_dtoh_calls: u64,
314 pub tracked_htod_calls: u64,
316 pub tracked_aggregate_htod_bytes: u64,
318 pub tracked_aggregate_htod_calls: u64,
320 pub tracked_launch_metadata_htod_bytes: u64,
322 pub tracked_launch_metadata_htod_calls: u64,
324 pub tracked_data_plane_htod_bytes: u64,
326 pub tracked_data_plane_htod_calls: u64,
328 pub per_candidate_host_round_trips: u64,
330}
331
332#[derive(Debug, Clone, Copy, PartialEq, Eq)]
334pub struct EpistemicGpuFinalResultTransferTrace {
335 pub final_output_rows: usize,
337 pub final_output_column_count: usize,
339 pub final_output_row_width_bytes: usize,
341 pub final_output_payload_bytes: u64,
343 pub row_count_device_reads: u32,
345 pub tracked_data_plane_dtoh_calls: u64,
347 pub tracked_data_plane_dtoh_bytes: u64,
349}
350
351#[derive(Debug, Clone, Copy, PartialEq, Eq)]
353pub struct EpistemicGpuConstraintValidationTrace {
354 pub checked_constraint_relations: usize,
356 pub violated_constraint_relations: usize,
358 pub row_count_device_reads: u32,
360}
361
362impl EpistemicGpuConstraintValidationTrace {
363 pub fn require_matches_preflight(
365 &self,
366 construct: &str,
367 preflight: &EpistemicGpuRuntimePreflight,
368 ) -> Result<()> {
369 if self.checked_constraint_relations != preflight.reduced_constraint_relation_count
370 || self.violated_constraint_relations != 0
371 || self.row_count_device_reads as usize > self.checked_constraint_relations
372 {
373 return Err(XlogError::UnsupportedEpistemicConstruct {
374 construct: construct.to_string(),
375 context: format!(
376 "constraint validation trace must match reduced runtime preflight, got \
377 checked={} expected_checked={} violations={} row_count_reads={}",
378 self.checked_constraint_relations,
379 preflight.reduced_constraint_relation_count,
380 self.violated_constraint_relations,
381 self.row_count_device_reads
382 ),
383 });
384 }
385
386 Ok(())
387 }
388}
389
390#[derive(Debug, Clone, Copy, PartialEq, Eq)]
392pub enum EpistemicGpuRejectionReason {
393 InactiveWorld,
395 InvalidCandidateBit,
397 MissingReducedModel,
399 UnsatisfiedMembership,
401 WorldViewConstraintViolation,
403}
404
405impl EpistemicGpuRejectionReason {
406 pub const fn code(self) -> u32 {
408 match self {
409 Self::InactiveWorld => 2,
410 Self::InvalidCandidateBit => 3,
411 Self::MissingReducedModel => 4,
412 Self::UnsatisfiedMembership => 5,
413 Self::WorldViewConstraintViolation => 6,
414 }
415 }
416
417 pub fn from_code(code: u32) -> Result<Self> {
419 match code {
420 2 => Ok(Self::InactiveWorld),
421 3 => Ok(Self::InvalidCandidateBit),
422 4 => Ok(Self::MissingReducedModel),
423 5 => Ok(Self::UnsatisfiedMembership),
424 6 => Ok(Self::WorldViewConstraintViolation),
425 other => Err(XlogError::UnsupportedEpistemicConstruct {
426 construct: "epistemic GPU rejection reason".to_string(),
427 context: format!("unknown device rejection code {other}"),
428 }),
429 }
430 }
431}
432
433#[derive(Debug, Clone, PartialEq, Eq)]
435pub struct EpistemicGpuSemanticTrace {
436 pub generated_candidates: usize,
438 pub guesses: usize,
440 pub propagated_candidates: usize,
442 pub pruned_candidates: usize,
444 pub tested_candidates: usize,
446 pub reduced_model_slots_checked: usize,
448 pub accepted_candidates: usize,
450 pub accepted_candidate_indices: Vec<usize>,
452 pub accepted_world_views: usize,
454 pub rejected_candidates: usize,
456 pub rejected_candidate_indices: Vec<usize>,
458 pub rejection_reasons: Vec<u32>,
460 pub constraint_violation_indices: Vec<Option<u32>>,
466 pub rejection_reason_device_reads: u32,
468 pub rejection_reason_metadata_bytes: u64,
470 pub cpu_candidate_enumerations: u32,
472 pub cpu_world_view_validations: u32,
474}
475
476#[derive(Debug, Clone, Copy, PartialEq, Eq)]
478pub struct EpistemicGpuModelMembershipTrace {
479 pub literal_count: usize,
481 pub candidates_checked: usize,
483 pub reduction_count: usize,
485 pub models_per_reduction: usize,
487 pub model_membership_bytes_written: usize,
489 pub output_row_count_device_reads: u32,
491 pub tuple_source_row_count_device_reads: u32,
493 pub tuple_source_key_column_device_reads: u32,
495 pub rejection_reason_slots_checked: usize,
497 pub membership_source: EpistemicGpuModelMembershipSource,
499 pub kernel_launches: u32,
501 pub host_write_ops: u32,
503 pub kernel_timing: EpistemicGpuKernelTimingTrace,
505}
506
507#[derive(Debug, Clone, Copy, PartialEq, Eq)]
509pub enum EpistemicGpuModelMembershipSource {
510 ReducedOutputRowCountOnly,
512 StableModelTupleBuffer,
514}
515
516#[derive(Debug, Clone, Copy, PartialEq, Eq)]
518pub struct EpistemicGpuWorldViewValidationTrace {
519 pub literal_count: usize,
521 pub candidates_checked: usize,
523 pub reduction_count: usize,
525 pub models_per_reduction: usize,
527 pub model_membership_bytes_checked: usize,
529 pub world_view_slots_checked: usize,
531 pub rejection_reason_slots_written: usize,
533 pub kernel_launches: u32,
535 pub host_write_ops: u32,
537 pub kernel_timing: EpistemicGpuKernelTimingTrace,
539}
540
541#[derive(Debug, Clone, Copy, PartialEq, Eq)]
549pub struct EpistemicGpuConstraintWorldViewValidationTrace {
550 pub constraint_count: usize,
552 pub constraint_literal_refs: usize,
554 pub candidates_checked: usize,
556 pub rejection_reason_slots_written: usize,
558 pub kernel_launches: u32,
560 pub host_write_ops: u32,
562 pub kernel_timing: EpistemicGpuKernelTimingTrace,
564}
565
566#[derive(Debug, Clone, Copy, PartialEq, Eq)]
568pub struct EpistemicGpuPropagationTrace {
569 pub literal_count: usize,
571 pub propagated_candidates: usize,
573 pub world_view_bytes_written: usize,
575 pub rejection_reason_slots_written: usize,
577 pub kernel_launches: u32,
579 pub host_write_ops: u32,
581 pub kernel_timing: EpistemicGpuKernelTimingTrace,
583}
584
585impl EpistemicGpuKernelTimingTrace {
586 pub const fn unrecorded() -> Self {
588 Self {
589 cuda_event_pairs: 0,
590 timing_sync_ops: 0,
591 kernel_elapsed_nanos: 0,
592 }
593 }
594
595 pub fn from_cuda_elapsed_ms(elapsed_ms: f32) -> Result<Self> {
597 if !elapsed_ms.is_finite() || elapsed_ms < 0.0 {
598 return Err(XlogError::Execution(format!(
599 "invalid epistemic GPU kernel elapsed time: {elapsed_ms}"
600 )));
601 }
602 let elapsed_nanos = ((elapsed_ms as f64) * 1_000_000.0).round();
603 if elapsed_nanos >= u64::MAX as f64 {
604 return Err(XlogError::UnsupportedEpistemicConstruct {
605 construct: "epistemic GPU kernel timing trace".to_string(),
606 context: format!(
607 "CUDA elapsed time {elapsed_ms}ms exceeds the u64 nanosecond trace counter"
608 ),
609 });
610 }
611
612 Ok(Self {
613 cuda_event_pairs: 1,
614 timing_sync_ops: 1,
615 kernel_elapsed_nanos: elapsed_nanos as u64,
616 })
617 }
618
619 pub const fn is_recorded(&self) -> bool {
621 self.cuda_event_pairs > 0 && self.timing_sync_ops > 0
622 }
623
624 pub fn saturating_add(self, other: Self) -> Self {
626 Self {
627 cuda_event_pairs: self.cuda_event_pairs.saturating_add(other.cuda_event_pairs),
628 timing_sync_ops: self.timing_sync_ops.saturating_add(other.timing_sync_ops),
629 kernel_elapsed_nanos: self
630 .kernel_elapsed_nanos
631 .saturating_add(other.kernel_elapsed_nanos),
632 }
633 }
634
635 pub fn checked_add(self, other: Self) -> Result<Self> {
637 Ok(Self {
638 cuda_event_pairs: self
639 .cuda_event_pairs
640 .checked_add(other.cuda_event_pairs)
641 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
642 construct: "epistemic GPU kernel timing trace".to_string(),
643 context: format!(
644 "CUDA event-pair counter overflowed while adding {} to {}",
645 other.cuda_event_pairs, self.cuda_event_pairs
646 ),
647 })?,
648 timing_sync_ops: self
649 .timing_sync_ops
650 .checked_add(other.timing_sync_ops)
651 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
652 construct: "epistemic GPU kernel timing trace".to_string(),
653 context: format!(
654 "CUDA timing-sync counter overflowed while adding {} to {}",
655 other.timing_sync_ops, self.timing_sync_ops
656 ),
657 })?,
658 kernel_elapsed_nanos: self
659 .kernel_elapsed_nanos
660 .checked_add(other.kernel_elapsed_nanos)
661 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
662 construct: "epistemic GPU kernel timing trace".to_string(),
663 context: format!(
664 "kernel elapsed-time counter overflowed while adding {} to {}",
665 other.kernel_elapsed_nanos, self.kernel_elapsed_nanos
666 ),
667 })?,
668 })
669 }
670
671 pub fn sum(traces: impl IntoIterator<Item = Self>) -> Self {
673 traces
674 .into_iter()
675 .fold(Self::unrecorded(), Self::saturating_add)
676 }
677
678 pub fn checked_sum(traces: impl IntoIterator<Item = Self>) -> Result<Self> {
680 traces
681 .into_iter()
682 .try_fold(Self::unrecorded(), Self::checked_add)
683 }
684}
685
686impl EpistemicGpuCandidateGenerationTrace {
687 pub fn for_counts(literal_count: usize, candidate_count: usize) -> Result<Self> {
689 require_positive(literal_count, "epistemic GPU candidate literals")?;
690 require_positive(candidate_count, "epistemic GPU candidate count")?;
691 if literal_count > 31 {
692 return Err(XlogError::UnsupportedEpistemicConstruct {
693 construct: "epistemic GPU candidate generation".to_string(),
694 context: format!("literal count {literal_count} exceeds 31-bit candidate mask"),
695 });
696 }
697 if candidate_count > (1usize << literal_count) {
698 return Err(XlogError::ResourceExhausted {
699 context: "epistemic GPU candidate count".to_string(),
700 estimated_bytes: candidate_count as u64,
701 budget_bytes: (1usize << literal_count) as u64,
702 });
703 }
704
705 let candidate_assumption_bytes = checked_product(literal_count, candidate_count)?;
706 require_u32_launch_bound(
707 candidate_assumption_bytes,
708 "epistemic GPU candidate generation launch",
709 )?;
710
711 Ok(Self {
712 literal_count,
713 generated_candidates: candidate_count,
714 candidate_assumption_bytes,
715 kernel_launches: 1,
716 host_write_ops: 0,
717 kernel_timing: EpistemicGpuKernelTimingTrace::unrecorded(),
718 })
719 }
720
721 pub const fn with_kernel_timing(
723 mut self,
724 kernel_timing: EpistemicGpuKernelTimingTrace,
725 ) -> Self {
726 self.kernel_timing = kernel_timing;
727 self
728 }
729}
730
731impl EpistemicGpuCandidateValidationTrace {
732 pub fn for_counts(literal_count: usize, candidate_count: usize) -> Result<Self> {
734 require_positive(literal_count, "epistemic GPU candidate validation literals")?;
735 require_positive(
736 candidate_count,
737 "epistemic GPU candidate validation candidates",
738 )?;
739 require_u32_launch_dimensions(
740 &[literal_count, candidate_count],
741 "epistemic GPU validation launch",
742 )?;
743 let candidate_assumption_bytes_checked = checked_product(literal_count, candidate_count)?;
744
745 Ok(Self {
746 literal_count,
747 validated_candidates: candidate_count,
748 candidate_assumption_bytes_checked,
749 world_view_bytes_checked: candidate_count,
750 rejection_reason_slots_written: candidate_count,
751 kernel_launches: 1,
752 host_write_ops: 0,
753 kernel_timing: EpistemicGpuKernelTimingTrace::unrecorded(),
754 })
755 }
756
757 pub const fn with_kernel_timing(
759 mut self,
760 kernel_timing: EpistemicGpuKernelTimingTrace,
761 ) -> Self {
762 self.kernel_timing = kernel_timing;
763 self
764 }
765
766 pub fn require_matches_candidate_generation(
768 &self,
769 construct: &str,
770 candidate_generation: &EpistemicGpuCandidateGenerationTrace,
771 ) -> Result<()> {
772 let expected_world_view_bytes = checked_product(
773 world_view_bitset_bytes_per_candidate(candidate_generation.literal_count)?,
774 candidate_generation.generated_candidates,
775 )?;
776 if self.literal_count != candidate_generation.literal_count
777 || self.validated_candidates != candidate_generation.generated_candidates
778 || self.candidate_assumption_bytes_checked
779 != candidate_generation.candidate_assumption_bytes
780 || self.world_view_bytes_checked != expected_world_view_bytes
781 || self.rejection_reason_slots_written != candidate_generation.generated_candidates
782 {
783 return Err(XlogError::UnsupportedEpistemicConstruct {
784 construct: construct.to_string(),
785 context: format!(
786 "candidate validation trace does not match generated GPU candidates: \
787 literals={}/{} candidates={}/{} candidate_bytes={}/{} \
788 world_view_bytes={}/{} rejection_slots={}/{}",
789 self.literal_count,
790 candidate_generation.literal_count,
791 self.validated_candidates,
792 candidate_generation.generated_candidates,
793 self.candidate_assumption_bytes_checked,
794 candidate_generation.candidate_assumption_bytes,
795 self.world_view_bytes_checked,
796 expected_world_view_bytes,
797 self.rejection_reason_slots_written,
798 candidate_generation.generated_candidates
799 ),
800 });
801 }
802
803 Ok(())
804 }
805}
806
807impl EpistemicGpuMaterializationTrace {
808 pub fn for_count(candidate_count: usize) -> Result<Self> {
810 require_positive(candidate_count, "epistemic GPU materialization candidates")?;
811 require_u32_launch_bound(candidate_count, "epistemic GPU materialization launch")?;
812
813 Ok(Self {
814 materialized_candidates: candidate_count,
815 world_view_slots_written: candidate_count,
816 kernel_launches: 1,
817 host_write_ops: 0,
818 kernel_timing: EpistemicGpuKernelTimingTrace::unrecorded(),
819 })
820 }
821
822 pub const fn with_kernel_timing(
824 mut self,
825 kernel_timing: EpistemicGpuKernelTimingTrace,
826 ) -> Self {
827 self.kernel_timing = kernel_timing;
828 self
829 }
830}
831
832impl EpistemicGpuFinalResultMaterializationTrace {
833 pub fn for_count(candidate_count: usize) -> Result<Self> {
835 require_positive(
836 candidate_count,
837 "epistemic GPU final-result materialization candidates",
838 )?;
839 require_u32_launch_bound(candidate_count, "epistemic GPU final-result launch")?;
840
841 Ok(Self {
842 materialized_candidates: candidate_count,
843 output_row_count_device_reads: 1,
844 world_view_slots_written: candidate_count,
845 kernel_launches: 1,
846 host_write_ops: 0,
847 kernel_timing: EpistemicGpuKernelTimingTrace::unrecorded(),
848 })
849 }
850
851 pub const fn with_kernel_timing(
853 mut self,
854 kernel_timing: EpistemicGpuKernelTimingTrace,
855 ) -> Self {
856 self.kernel_timing = kernel_timing;
857 self
858 }
859}
860
861impl EpistemicGpuFinalTupleMaterializationTrace {
862 pub fn for_counts(
864 output_column_count: usize,
865 output_row_capacity: usize,
866 tuple_bytes_capacity: usize,
867 literal_count: usize,
868 candidate_count: usize,
869 reduction_count: usize,
870 models_per_reduction: usize,
871 ) -> Result<Self> {
872 if output_column_count > u32::MAX as usize {
873 return Err(XlogError::ResourceExhausted {
874 context: "epistemic GPU final-tuple output columns".to_string(),
875 estimated_bytes: output_column_count as u64,
876 budget_bytes: u32::MAX as u64,
877 });
878 }
879 require_u32_launch_bound(output_row_capacity, "epistemic GPU final-tuple output rows")?;
880 require_positive(literal_count, "epistemic GPU final-tuple literals")?;
881 require_positive(candidate_count, "epistemic GPU final-tuple candidates")?;
882 require_positive(reduction_count, "epistemic GPU final-tuple reductions")?;
883 require_positive(models_per_reduction, "epistemic GPU final-tuple models")?;
884 let model_membership_bytes_checked = checked_product(
885 checked_product(
886 checked_product(candidate_count, reduction_count)?,
887 models_per_reduction,
888 )?,
889 literal_count,
890 )?;
891 require_u32_launch_bound(
892 model_membership_bytes_checked,
893 "epistemic GPU final-tuple membership launch",
894 )?;
895 let output_row_count_device_reads = checked_sum(output_column_count, 1)?;
896 let kernel_launches = checked_sum(output_row_count_device_reads, 1)?;
897 if kernel_launches > u32::MAX as usize {
898 return Err(XlogError::ResourceExhausted {
899 context: "epistemic GPU final-tuple kernel launches".to_string(),
900 estimated_bytes: kernel_launches as u64,
901 budget_bytes: u32::MAX as u64,
902 });
903 }
904
905 Ok(Self {
906 output_column_count,
907 output_row_capacity,
908 tuple_bytes_capacity,
909 output_row_count_device_reads: output_row_count_device_reads as u32,
910 model_membership_bytes_checked,
911 bounded_model_slots_per_reduction: models_per_reduction,
912 row_specific_membership_row_capacity: 0,
913 row_filter_row_capacity_outside_model_slot_window: 0,
914 world_view_slots_checked: candidate_count,
915 row_filter_count: 0,
916 negated_row_filter_count: 0,
917 final_row_count_device_writes: 1,
918 kernel_launches: kernel_launches as u32,
919 host_write_ops: 0,
920 kernel_timing: EpistemicGpuKernelTimingTrace::unrecorded(),
921 })
922 }
923
924 pub const fn with_kernel_timing(
926 mut self,
927 kernel_timing: EpistemicGpuKernelTimingTrace,
928 ) -> Self {
929 self.kernel_timing = kernel_timing;
930 self
931 }
932
933 pub fn with_row_filter_counts(
935 mut self,
936 row_filter_count: usize,
937 negated_row_filter_count: usize,
938 ) -> Result<Self> {
939 if negated_row_filter_count > row_filter_count {
940 return Err(XlogError::ResourceExhausted {
941 context: "epistemic GPU final-tuple negated row filters".to_string(),
942 estimated_bytes: negated_row_filter_count as u64,
943 budget_bytes: row_filter_count as u64,
944 });
945 }
946 self.row_filter_count = row_filter_count;
947 self.negated_row_filter_count = negated_row_filter_count;
948 if row_filter_count > 0 {
949 self.row_specific_membership_row_capacity = self
950 .output_row_capacity
951 .min(self.bounded_model_slots_per_reduction);
952 self.row_filter_row_capacity_outside_model_slot_window = self
953 .output_row_capacity
954 .saturating_sub(self.row_specific_membership_row_capacity);
955 }
956 Ok(self)
957 }
958
959 pub fn require_row_filter_materialization_evidence(
961 &self,
962 construct: &str,
963 final_output_rows: usize,
964 ) -> Result<()> {
965 if final_output_rows > self.output_row_capacity {
966 return Err(XlogError::UnsupportedEpistemicConstruct {
967 construct: construct.to_string(),
968 context: format!(
969 "final tuple materialization reported {} logical rows for output row \
970 capacity {}",
971 final_output_rows, self.output_row_capacity
972 ),
973 });
974 }
975 if self.negated_row_filter_count > self.row_filter_count {
976 return Err(XlogError::UnsupportedEpistemicConstruct {
977 construct: construct.to_string(),
978 context: format!(
979 "row-filtered final tuple materialization reported {} negated row filters \
980 for {} total row filters",
981 self.negated_row_filter_count, self.row_filter_count
982 ),
983 });
984 }
985 if self.row_filter_count == 0 {
986 if self.row_specific_membership_row_capacity != 0
987 || self.row_filter_row_capacity_outside_model_slot_window != 0
988 {
989 return Err(XlogError::UnsupportedEpistemicConstruct {
990 construct: construct.to_string(),
991 context: format!(
992 "final tuple materialization without row filters reported row-filter \
993 coverage row_specific_capacity={} fallback_capacity={}",
994 self.row_specific_membership_row_capacity,
995 self.row_filter_row_capacity_outside_model_slot_window
996 ),
997 });
998 }
999 return Ok(());
1000 }
1001
1002 if final_output_rows == 0 && self.output_row_capacity == 0 {
1010 return Ok(());
1011 }
1012
1013 let covered_row_capacity = checked_sum(
1014 self.row_specific_membership_row_capacity,
1015 self.row_filter_row_capacity_outside_model_slot_window,
1016 )?;
1017 if self.output_row_capacity == 0
1018 || self.row_specific_membership_row_capacity == 0
1019 || covered_row_capacity != self.output_row_capacity
1020 {
1021 return Err(XlogError::UnsupportedEpistemicConstruct {
1022 construct: construct.to_string(),
1023 context: format!(
1024 "row-filtered final tuple materialization requires GPU row-filter coverage, \
1025 got row_filters={} final_output_rows={} output_row_capacity={} \
1026 row_specific_capacity={} fallback_capacity={} model_slots_per_reduction={}",
1027 self.row_filter_count,
1028 final_output_rows,
1029 self.output_row_capacity,
1030 self.row_specific_membership_row_capacity,
1031 self.row_filter_row_capacity_outside_model_slot_window,
1032 self.bounded_model_slots_per_reduction
1033 ),
1034 });
1035 }
1036
1037 let fallback_rows =
1038 final_output_rows.saturating_sub(self.row_specific_membership_row_capacity);
1039 if fallback_rows > self.row_filter_row_capacity_outside_model_slot_window {
1040 return Err(XlogError::UnsupportedEpistemicConstruct {
1041 construct: construct.to_string(),
1042 context: format!(
1043 "row-filtered final tuple materialization has {} logical rows beyond the \
1044 row-specific model-slot window but only {} fallback row-filter capacity",
1045 fallback_rows, self.row_filter_row_capacity_outside_model_slot_window
1046 ),
1047 });
1048 }
1049 Ok(())
1050 }
1051}
1052
1053impl EpistemicGpuTransferBudgetTrace {
1054 pub fn from_host_transfer_stats(
1056 candidate_count: usize,
1057 before: HostTransferStats,
1058 after: HostTransferStats,
1059 ) -> Result<Self> {
1060 Self::from_host_transfer_stats_with_launch_metadata(
1061 candidate_count,
1062 before,
1063 after,
1064 HostLaunchMetadataTransferStats::default(),
1065 HostLaunchMetadataTransferStats::default(),
1066 )
1067 }
1068
1069 pub fn from_host_transfer_stats_with_launch_metadata(
1072 candidate_count: usize,
1073 before: HostTransferStats,
1074 after: HostTransferStats,
1075 launch_metadata_before: HostLaunchMetadataTransferStats,
1076 launch_metadata_after: HostLaunchMetadataTransferStats,
1077 ) -> Result<Self> {
1078 require_positive(candidate_count, "epistemic GPU transfer-budget candidates")?;
1079
1080 let tracked_dtoh_bytes =
1081 transfer_counter_delta("dtoh_bytes", before.dtoh_bytes, after.dtoh_bytes)?;
1082 let tracked_data_plane_htod_bytes =
1083 transfer_counter_delta("htod_bytes", before.htod_bytes, after.htod_bytes)?;
1084 let tracked_dtoh_calls =
1085 transfer_counter_delta("dtoh_calls", before.dtoh_calls, after.dtoh_calls)?;
1086 let tracked_data_plane_htod_calls =
1087 transfer_counter_delta("htod_calls", before.htod_calls, after.htod_calls)?;
1088 let tracked_launch_metadata_htod_bytes = transfer_counter_delta(
1089 "launch_metadata_htod_bytes",
1090 launch_metadata_before.htod_bytes,
1091 launch_metadata_after.htod_bytes,
1092 )?;
1093 let tracked_launch_metadata_htod_calls = transfer_counter_delta(
1094 "launch_metadata_htod_calls",
1095 launch_metadata_before.htod_calls,
1096 launch_metadata_after.htod_calls,
1097 )?;
1098 let tracked_aggregate_htod_bytes = tracked_data_plane_htod_bytes
1099 .checked_add(tracked_launch_metadata_htod_bytes)
1100 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
1101 construct: "epistemic GPU transfer budget".to_string(),
1102 context: format!(
1103 "aggregate H2D bytes overflowed while adding launch metadata: \
1104 data_plane_htod_bytes={tracked_data_plane_htod_bytes}, \
1105 launch_metadata_htod_bytes={tracked_launch_metadata_htod_bytes}"
1106 ),
1107 })?;
1108 let tracked_aggregate_htod_calls = tracked_data_plane_htod_calls
1109 .checked_add(tracked_launch_metadata_htod_calls)
1110 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
1111 construct: "epistemic GPU transfer budget".to_string(),
1112 context: format!(
1113 "aggregate H2D calls overflowed while adding launch metadata: \
1114 data_plane_htod_calls={tracked_data_plane_htod_calls}, \
1115 launch_metadata_htod_calls={tracked_launch_metadata_htod_calls}"
1116 ),
1117 })?;
1118
1119 if tracked_launch_metadata_htod_bytes != 0 && tracked_launch_metadata_htod_calls == 0 {
1120 return Err(XlogError::UnsupportedEpistemicConstruct {
1121 construct: "epistemic GPU transfer budget".to_string(),
1122 context: format!(
1123 "launch metadata H2D bytes require matching H2D calls, got bytes={} calls=0",
1124 tracked_launch_metadata_htod_bytes
1125 ),
1126 });
1127 }
1128 if tracked_launch_metadata_htod_calls != 0 && tracked_launch_metadata_htod_bytes == 0 {
1129 return Err(XlogError::UnsupportedEpistemicConstruct {
1130 construct: "epistemic GPU transfer budget".to_string(),
1131 context: format!(
1132 "launch metadata H2D calls require matching payload bytes, got calls={} bytes=0",
1133 tracked_launch_metadata_htod_calls
1134 ),
1135 });
1136 }
1137
1138 if tracked_dtoh_bytes != 0
1139 || tracked_data_plane_htod_bytes != 0
1140 || tracked_dtoh_calls != 0
1141 || tracked_data_plane_htod_calls != 0
1142 {
1143 return Err(XlogError::UnsupportedEpistemicConstruct {
1144 construct: "epistemic GPU transfer budget".to_string(),
1145 context: format!(
1146 "tracked host transfer in GPU hot path: tracked data-plane host transfer: \
1147 dtoh_bytes={tracked_dtoh_bytes}, \
1148 data_plane_htod_bytes={tracked_data_plane_htod_bytes}, \
1149 dtoh_calls={tracked_dtoh_calls}, \
1150 data_plane_htod_calls={tracked_data_plane_htod_calls}, \
1151 launch_metadata_htod_bytes={tracked_launch_metadata_htod_bytes}, \
1152 launch_metadata_htod_calls={tracked_launch_metadata_htod_calls}"
1153 ),
1154 });
1155 }
1156
1157 Ok(Self {
1158 candidate_count,
1159 tracked_dtoh_bytes,
1160 tracked_htod_bytes: tracked_data_plane_htod_bytes,
1161 tracked_dtoh_calls,
1162 tracked_htod_calls: tracked_data_plane_htod_calls,
1163 tracked_aggregate_htod_bytes,
1164 tracked_aggregate_htod_calls,
1165 tracked_launch_metadata_htod_bytes,
1166 tracked_launch_metadata_htod_calls,
1167 tracked_data_plane_htod_bytes,
1168 tracked_data_plane_htod_calls,
1169 per_candidate_host_round_trips: 0,
1170 })
1171 }
1172}
1173
1174impl EpistemicGpuFinalResultTransferTrace {
1175 pub fn from_final_output(
1177 provider: &xlog_cuda::CudaKernelProvider,
1178 final_output: &CudaBuffer,
1179 ) -> Result<Self> {
1180 let row_count_was_cached = final_output.cached_row_count().is_some();
1181 let final_output_rows = provider.device_row_count(final_output)?;
1182 let final_output_column_count = final_output.arity();
1183 let final_output_row_width_bytes = final_output.schema().row_size_bytes();
1184 let final_output_payload_bytes =
1185 checked_product(final_output_rows, final_output_row_width_bytes)? as u64;
1186
1187 Ok(Self {
1188 final_output_rows,
1189 final_output_column_count,
1190 final_output_row_width_bytes,
1191 final_output_payload_bytes,
1192 row_count_device_reads: u32::from(!row_count_was_cached),
1193 tracked_data_plane_dtoh_calls: 0,
1194 tracked_data_plane_dtoh_bytes: 0,
1195 })
1196 }
1197
1198 pub fn require_matches_final_output(
1200 &self,
1201 construct: &str,
1202 final_output: &CudaBuffer,
1203 ) -> Result<()> {
1204 let Some(cached_rows) = final_output.cached_row_count() else {
1205 return Err(XlogError::UnsupportedEpistemicConstruct {
1206 construct: construct.to_string(),
1207 context:
1208 "final-result transfer certification requires cached device final row count"
1209 .to_string(),
1210 });
1211 };
1212 let logical_rows =
1213 validate_logical_row_count(final_output.num_rows(), cached_rows as usize).map_err(
1214 |err| XlogError::UnsupportedEpistemicConstruct {
1215 construct: construct.to_string(),
1216 context: format!("invalid final-output logical row count: {err}"),
1217 },
1218 )?;
1219 let row_width = final_output.schema().row_size_bytes();
1220 let payload_bytes = checked_product(logical_rows, row_width)? as u64;
1221 if self.final_output_rows != logical_rows
1222 || self.final_output_column_count != final_output.arity()
1223 || self.final_output_row_width_bytes != row_width
1224 || self.final_output_payload_bytes != payload_bytes
1225 {
1226 return Err(XlogError::UnsupportedEpistemicConstruct {
1227 construct: construct.to_string(),
1228 context: format!(
1229 "final-result transfer trace does not match final device output: rows={}/{} \
1230 columns={}/{} row_width={}/{} payload_bytes={}/{}",
1231 self.final_output_rows,
1232 logical_rows,
1233 self.final_output_column_count,
1234 final_output.arity(),
1235 self.final_output_row_width_bytes,
1236 row_width,
1237 self.final_output_payload_bytes,
1238 payload_bytes
1239 ),
1240 });
1241 }
1242 if self.row_count_device_reads > 1 {
1243 return Err(XlogError::UnsupportedEpistemicConstruct {
1244 construct: construct.to_string(),
1245 context: format!(
1246 "final-result transfer reads one device row-count scalar at most, got {}",
1247 self.row_count_device_reads
1248 ),
1249 });
1250 }
1251
1252 Ok(())
1253 }
1254}
1255
1256impl EpistemicGpuSemanticTrace {
1257 pub fn require_matches_execution_traces(
1259 &self,
1260 construct: &str,
1261 candidate_generation: &EpistemicGpuCandidateGenerationTrace,
1262 propagation: &EpistemicGpuPropagationTrace,
1263 model_membership: &EpistemicGpuModelMembershipTrace,
1264 world_view_validation: &EpistemicGpuWorldViewValidationTrace,
1265 ) -> Result<()> {
1266 let expected_pruned = self
1267 .generated_candidates
1268 .checked_sub(propagation.propagated_candidates)
1269 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
1270 construct: construct.to_string(),
1271 context: format!(
1272 "semantic trace phase counts cannot propagate more candidates than were \
1273 generated: generated={} propagated={}",
1274 self.generated_candidates, propagation.propagated_candidates
1275 ),
1276 })?;
1277 let expected_reduced_model_slots = checked_product(
1278 checked_product(
1279 world_view_validation.candidates_checked,
1280 model_membership.reduction_count,
1281 )?,
1282 model_membership.models_per_reduction,
1283 )?;
1284 let expected_guesses = checked_product(
1285 candidate_generation.generated_candidates,
1286 candidate_generation.literal_count,
1287 )?;
1288 if self.generated_candidates != candidate_generation.generated_candidates
1289 || self.guesses != expected_guesses
1290 || self.propagated_candidates != propagation.propagated_candidates
1291 || self.pruned_candidates != expected_pruned
1292 || self.tested_candidates != world_view_validation.candidates_checked
1293 || self.reduced_model_slots_checked != expected_reduced_model_slots
1294 {
1295 return Err(XlogError::UnsupportedEpistemicConstruct {
1296 construct: construct.to_string(),
1297 context: format!(
1298 "semantic trace phase counts must match retained GPU execution traces, got \
1299 generated={} expected_generated={} guesses={} expected_guesses={} \
1300 propagated={} expected_propagated={} pruned={} expected_pruned={} \
1301 tested={} expected_tested={} reduced_model_slots={} \
1302 expected_reduced_model_slots={}",
1303 self.generated_candidates,
1304 candidate_generation.generated_candidates,
1305 self.guesses,
1306 expected_guesses,
1307 self.propagated_candidates,
1308 propagation.propagated_candidates,
1309 self.pruned_candidates,
1310 expected_pruned,
1311 self.tested_candidates,
1312 world_view_validation.candidates_checked,
1313 self.reduced_model_slots_checked,
1314 expected_reduced_model_slots
1315 ),
1316 });
1317 }
1318
1319 Ok(())
1320 }
1321
1322 pub fn require_rejection_metadata_accounting(&self, construct: &str) -> Result<()> {
1324 let expected_metadata_bytes =
1325 checked_product(self.generated_candidates, std::mem::size_of::<u32>())? as u64;
1326 if self.rejection_reason_device_reads != 1
1327 || self.rejection_reason_metadata_bytes != expected_metadata_bytes
1328 {
1329 return Err(XlogError::UnsupportedEpistemicConstruct {
1330 construct: construct.to_string(),
1331 context: format!(
1332 "semantic trace rejection metadata accounting must match the bounded device \
1333 rejection-buffer read, got reads={} bytes={} expected_reads=1 \
1334 expected_bytes={}",
1335 self.rejection_reason_device_reads,
1336 self.rejection_reason_metadata_bytes,
1337 expected_metadata_bytes
1338 ),
1339 });
1340 }
1341
1342 Ok(())
1343 }
1344
1345 pub fn require_candidate_index_partition(&self, construct: &str) -> Result<()> {
1347 let accounted_candidates = self.accepted_candidates.checked_add(self.rejected_candidates).ok_or_else(|| {
1348 XlogError::UnsupportedEpistemicConstruct {
1349 construct: construct.to_string(),
1350 context: format!(
1351 "semantic trace candidate index partition accounting overflowed: accepted={} rejected={}",
1352 self.accepted_candidates, self.rejected_candidates
1353 ),
1354 }
1355 })?;
1356 if self.accepted_candidate_indices.len() != self.accepted_candidates
1357 || self.rejected_candidate_indices.len() != self.rejected_candidates
1358 || self.accepted_world_views != self.accepted_candidates
1359 || accounted_candidates != self.generated_candidates
1360 {
1361 return Err(XlogError::UnsupportedEpistemicConstruct {
1362 construct: construct.to_string(),
1363 context: format!(
1364 "semantic trace candidate index partition requires counts and index vectors \
1365 to match generated candidates, got generated={} accepted={} \
1366 accepted_indices={} accepted_world_views={} rejected={} rejected_indices={}",
1367 self.generated_candidates,
1368 self.accepted_candidates,
1369 self.accepted_candidate_indices.len(),
1370 self.accepted_world_views,
1371 self.rejected_candidates,
1372 self.rejected_candidate_indices.len()
1373 ),
1374 });
1375 }
1376 if self.rejection_reasons.len() != self.rejected_candidates {
1377 return Err(XlogError::UnsupportedEpistemicConstruct {
1378 construct: construct.to_string(),
1379 context: format!(
1380 "semantic trace rejection reason count must match rejected candidates, got \
1381 reasons={} rejected={}",
1382 self.rejection_reasons.len(),
1383 self.rejected_candidates
1384 ),
1385 });
1386 }
1387 self.typed_rejection_reasons()?;
1388
1389 let mut seen = BTreeSet::new();
1390 for (kind, indices) in [
1391 ("accepted", self.accepted_candidate_indices.as_slice()),
1392 ("rejected", self.rejected_candidate_indices.as_slice()),
1393 ] {
1394 for &index in indices {
1395 if index >= self.generated_candidates {
1396 return Err(XlogError::UnsupportedEpistemicConstruct {
1397 construct: construct.to_string(),
1398 context: format!(
1399 "semantic trace candidate index partition has out-of-range {kind} \
1400 index {index} for generated candidate count {}",
1401 self.generated_candidates
1402 ),
1403 });
1404 }
1405 if !seen.insert(index) {
1406 return Err(XlogError::UnsupportedEpistemicConstruct {
1407 construct: construct.to_string(),
1408 context: format!(
1409 "semantic trace candidate index partition contains duplicate \
1410 candidate index {index}"
1411 ),
1412 });
1413 }
1414 }
1415 }
1416 if seen.len() != self.generated_candidates {
1417 return Err(XlogError::UnsupportedEpistemicConstruct {
1418 construct: construct.to_string(),
1419 context: format!(
1420 "semantic trace candidate index partition covers {} of {} generated \
1421 candidates",
1422 seen.len(),
1423 self.generated_candidates
1424 ),
1425 });
1426 }
1427
1428 Ok(())
1429 }
1430
1431 pub fn typed_rejection_reasons(&self) -> Result<Vec<EpistemicGpuRejectionReason>> {
1433 self.rejection_reasons
1434 .iter()
1435 .copied()
1436 .map(EpistemicGpuRejectionReason::from_code)
1437 .collect()
1438 }
1439
1440 pub fn from_device_rejection_reasons(
1442 provider: &xlog_cuda::CudaKernelProvider,
1443 workspace: &EpistemicGpuWorkspace,
1444 candidate_generation: &EpistemicGpuCandidateGenerationTrace,
1445 propagation: &EpistemicGpuPropagationTrace,
1446 model_membership: &EpistemicGpuModelMembershipTrace,
1447 world_view_validation: &EpistemicGpuWorldViewValidationTrace,
1448 ) -> Result<Self> {
1449 let candidate_count = candidate_generation.generated_candidates;
1450 require_positive(candidate_count, "epistemic GPU semantic-trace candidates")?;
1451 if candidate_count > workspace.layout.rejection_reason_slots {
1452 return Err(XlogError::ResourceExhausted {
1453 context: "epistemic GPU semantic-trace rejection metadata".to_string(),
1454 estimated_bytes: candidate_count as u64,
1455 budget_bytes: workspace.layout.rejection_reason_slots as u64,
1456 });
1457 }
1458 if propagation.literal_count != candidate_generation.literal_count
1459 || model_membership.literal_count != candidate_generation.literal_count
1460 || world_view_validation.literal_count != candidate_generation.literal_count
1461 {
1462 return Err(XlogError::UnsupportedEpistemicConstruct {
1463 construct: "epistemic GPU semantic trace".to_string(),
1464 context: format!(
1465 "semantic trace requires all GPU stages to agree on literal count, got \
1466 generated={} propagated={} membership={} validation={}",
1467 candidate_generation.literal_count,
1468 propagation.literal_count,
1469 model_membership.literal_count,
1470 world_view_validation.literal_count
1471 ),
1472 });
1473 }
1474 let pruned_candidates = candidate_count
1475 .checked_sub(propagation.propagated_candidates)
1476 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
1477 construct: "epistemic GPU semantic trace".to_string(),
1478 context: format!(
1479 "semantic trace cannot prune more candidates than were generated: \
1480 generated={} propagated={}",
1481 candidate_count, propagation.propagated_candidates
1482 ),
1483 })?;
1484 if propagation.rejection_reason_slots_written < candidate_count {
1485 return Err(XlogError::UnsupportedEpistemicConstruct {
1486 construct: "epistemic GPU semantic trace".to_string(),
1487 context: format!(
1488 "semantic trace requires rejection metadata for every generated candidate, \
1489 got generated={} rejection_slots_initialized={}",
1490 candidate_count, propagation.rejection_reason_slots_written
1491 ),
1492 });
1493 }
1494 if model_membership.candidates_checked != candidate_count
1495 || world_view_validation.candidates_checked != candidate_count
1496 {
1497 return Err(XlogError::UnsupportedEpistemicConstruct {
1498 construct: "epistemic GPU semantic trace".to_string(),
1499 context: format!(
1500 "semantic trace requires GPU validation coverage for every generated \
1501 candidate, got generated={} membership_checked={} validation_checked={}",
1502 candidate_count,
1503 model_membership.candidates_checked,
1504 world_view_validation.candidates_checked
1505 ),
1506 });
1507 }
1508 if model_membership.reduction_count != world_view_validation.reduction_count
1509 || model_membership.models_per_reduction != world_view_validation.models_per_reduction
1510 {
1511 return Err(XlogError::UnsupportedEpistemicConstruct {
1512 construct: "epistemic GPU semantic trace".to_string(),
1513 context: format!(
1514 "semantic trace requires model-membership and world-view validation layouts \
1515 to match, got membership_reductions={} validation_reductions={} \
1516 membership_models_per_reduction={} validation_models_per_reduction={}",
1517 model_membership.reduction_count,
1518 world_view_validation.reduction_count,
1519 model_membership.models_per_reduction,
1520 world_view_validation.models_per_reduction
1521 ),
1522 });
1523 }
1524
1525 let raw_rejection_reasons = provider
1526 .dtoh_small_metadata_untracked(&workspace.rejection_reasons, candidate_count)?;
1527 let raw_constraint_violation_index = provider.dtoh_small_metadata_untracked(
1531 &workspace.constraint_violation_index,
1532 candidate_count,
1533 )?;
1534 let constraint_violation_code =
1535 EpistemicGpuRejectionReason::WorldViewConstraintViolation.code();
1536 let mut accepted_candidate_indices = Vec::new();
1537 let mut rejected_candidate_indices = Vec::new();
1538 let mut rejection_reasons = Vec::new();
1539 let mut constraint_violation_indices: Vec<Option<u32>> = Vec::new();
1540 for (candidate_index, reason) in raw_rejection_reasons.into_iter().enumerate() {
1541 if reason == 0 {
1542 accepted_candidate_indices.push(candidate_index);
1543 } else {
1544 EpistemicGpuRejectionReason::from_code(reason)?;
1545 rejected_candidate_indices.push(candidate_index);
1546 rejection_reasons.push(reason);
1547 let firing = raw_constraint_violation_index
1554 .get(candidate_index)
1555 .copied()
1556 .unwrap_or(u32::MAX);
1557 if reason == constraint_violation_code && firing != u32::MAX {
1558 constraint_violation_indices.push(Some(firing));
1559 } else {
1560 constraint_violation_indices.push(None);
1561 }
1562 }
1563 }
1564 let accepted_candidates = accepted_candidate_indices.len();
1565 let rejected_candidates = rejection_reasons.len();
1566 let reduced_model_slots_checked = checked_product(
1567 checked_product(
1568 world_view_validation.candidates_checked,
1569 model_membership.reduction_count,
1570 )?,
1571 model_membership.models_per_reduction,
1572 )?;
1573 let rejection_reason_metadata_bytes =
1574 checked_product(candidate_count, std::mem::size_of::<u32>())? as u64;
1575
1576 Ok(Self {
1577 generated_candidates: candidate_count,
1578 guesses: checked_product(candidate_count, candidate_generation.literal_count)?,
1579 propagated_candidates: propagation.propagated_candidates,
1580 pruned_candidates,
1581 tested_candidates: world_view_validation.candidates_checked,
1582 reduced_model_slots_checked,
1583 accepted_candidates,
1584 accepted_candidate_indices,
1585 accepted_world_views: accepted_candidates,
1586 rejected_candidates,
1587 rejected_candidate_indices,
1588 rejection_reasons,
1589 constraint_violation_indices,
1590 rejection_reason_device_reads: 1,
1596 rejection_reason_metadata_bytes,
1597 cpu_candidate_enumerations: 0,
1598 cpu_world_view_validations: 0,
1599 })
1600 }
1601}
1602
1603fn transfer_counter_delta(name: &str, before: u64, after: u64) -> Result<u64> {
1604 after
1605 .checked_sub(before)
1606 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
1607 construct: "epistemic GPU transfer budget".to_string(),
1608 context: format!(
1609 "host transfer counter decreased during GPU hot path: {name} before={before}, \
1610 after={after}"
1611 ),
1612 })
1613}
1614
1615impl EpistemicGpuModelMembershipTrace {
1616 pub fn for_counts(
1618 literal_count: usize,
1619 candidate_count: usize,
1620 reduction_count: usize,
1621 models_per_reduction: usize,
1622 ) -> Result<Self> {
1623 require_positive(literal_count, "epistemic GPU model-membership literals")?;
1624 require_positive(candidate_count, "epistemic GPU model-membership candidates")?;
1625 require_positive(reduction_count, "epistemic GPU model-membership reductions")?;
1626 require_positive(
1627 models_per_reduction,
1628 "epistemic GPU model-membership models",
1629 )?;
1630 let model_membership_bytes_written = checked_product(
1631 checked_product(
1632 checked_product(candidate_count, reduction_count)?,
1633 models_per_reduction,
1634 )?,
1635 literal_count,
1636 )?;
1637 require_u32_launch_bound(
1638 model_membership_bytes_written,
1639 "epistemic GPU model-membership launch",
1640 )?;
1641
1642 Ok(Self {
1643 literal_count,
1644 candidates_checked: candidate_count,
1645 reduction_count,
1646 models_per_reduction,
1647 model_membership_bytes_written,
1648 output_row_count_device_reads: 1,
1649 tuple_source_row_count_device_reads: 0,
1650 tuple_source_key_column_device_reads: 0,
1651 rejection_reason_slots_checked: candidate_count,
1652 membership_source: EpistemicGpuModelMembershipSource::ReducedOutputRowCountOnly,
1653 kernel_launches: 1,
1654 host_write_ops: 0,
1655 kernel_timing: EpistemicGpuKernelTimingTrace::unrecorded(),
1656 })
1657 }
1658
1659 pub fn for_stable_model_tuple_sources(
1661 literal_count: usize,
1662 candidate_count: usize,
1663 reduction_count: usize,
1664 models_per_reduction: usize,
1665 tuple_source_count: usize,
1666 ) -> Result<Self> {
1667 Self::for_stable_model_tuple_sources_with_key_columns(
1668 literal_count,
1669 candidate_count,
1670 reduction_count,
1671 models_per_reduction,
1672 tuple_source_count,
1673 0,
1674 )
1675 }
1676
1677 pub fn for_stable_model_tuple_sources_with_key_columns(
1679 literal_count: usize,
1680 candidate_count: usize,
1681 reduction_count: usize,
1682 models_per_reduction: usize,
1683 tuple_source_count: usize,
1684 tuple_source_key_column_count: usize,
1685 ) -> Result<Self> {
1686 require_positive(literal_count, "epistemic GPU model-membership literals")?;
1687 require_positive(candidate_count, "epistemic GPU model-membership candidates")?;
1688 require_positive(reduction_count, "epistemic GPU model-membership reductions")?;
1689 require_positive(
1690 models_per_reduction,
1691 "epistemic GPU model-membership models",
1692 )?;
1693 require_positive(
1694 tuple_source_count,
1695 "epistemic GPU model-membership tuple sources",
1696 )?;
1697 if tuple_source_count > u32::MAX as usize {
1698 return Err(XlogError::ResourceExhausted {
1699 context: "epistemic GPU model-membership tuple sources".to_string(),
1700 estimated_bytes: tuple_source_count as u64,
1701 budget_bytes: u32::MAX as u64,
1702 });
1703 }
1704 if tuple_source_key_column_count > u32::MAX as usize {
1705 return Err(XlogError::ResourceExhausted {
1706 context: "epistemic GPU model-membership tuple key columns".to_string(),
1707 estimated_bytes: tuple_source_key_column_count as u64,
1708 budget_bytes: u32::MAX as u64,
1709 });
1710 }
1711 let model_membership_bytes_written = checked_product(
1712 checked_product(
1713 checked_product(candidate_count, reduction_count)?,
1714 models_per_reduction,
1715 )?,
1716 literal_count,
1717 )?;
1718 require_u32_launch_bound(
1719 model_membership_bytes_written,
1720 "epistemic GPU model-membership launch",
1721 )?;
1722
1723 Ok(Self {
1724 literal_count,
1725 candidates_checked: candidate_count,
1726 reduction_count,
1727 models_per_reduction,
1728 model_membership_bytes_written,
1729 output_row_count_device_reads: 0,
1730 tuple_source_row_count_device_reads: tuple_source_count as u32,
1731 tuple_source_key_column_device_reads: tuple_source_key_column_count as u32,
1732 rejection_reason_slots_checked: candidate_count,
1733 membership_source: EpistemicGpuModelMembershipSource::StableModelTupleBuffer,
1734 kernel_launches: tuple_source_count as u32,
1735 host_write_ops: 0,
1736 kernel_timing: EpistemicGpuKernelTimingTrace::unrecorded(),
1737 })
1738 }
1739
1740 pub const fn with_kernel_timing(
1742 mut self,
1743 kernel_timing: EpistemicGpuKernelTimingTrace,
1744 ) -> Self {
1745 self.kernel_timing = kernel_timing;
1746 self
1747 }
1748
1749 pub fn require_stable_model_tuple_source(&self) -> Result<()> {
1751 if self.membership_source != EpistemicGpuModelMembershipSource::StableModelTupleBuffer {
1752 return Err(XlogError::UnsupportedEpistemicConstruct {
1753 construct: "epistemic GPU stable-model membership certification".to_string(),
1754 context: format!(
1755 "model-membership source {:?} is bounded staging only; actual reduced \
1756 stable-model tuple membership is required before returning accepted \
1757 epistemic execution",
1758 self.membership_source
1759 ),
1760 });
1761 }
1762
1763 Ok(())
1764 }
1765
1766 pub fn require_planned_tuple_key_column_reads(
1768 &self,
1769 expected_key_column_reads: usize,
1770 ) -> Result<()> {
1771 if self.tuple_source_key_column_device_reads as usize != expected_key_column_reads {
1772 return Err(XlogError::UnsupportedEpistemicConstruct {
1773 construct: "epistemic GPU stable-model membership certification".to_string(),
1774 context: format!(
1775 "model-membership tuple-key device column reads must match the planned \
1776 nonzero-arity tuple keys, got reads={} expected={}",
1777 self.tuple_source_key_column_device_reads, expected_key_column_reads
1778 ),
1779 });
1780 }
1781
1782 Ok(())
1783 }
1784}
1785
1786impl EpistemicGpuWorldViewValidationTrace {
1787 pub fn for_counts(
1789 literal_count: usize,
1790 candidate_count: usize,
1791 reduction_count: usize,
1792 models_per_reduction: usize,
1793 ) -> Result<Self> {
1794 require_positive(
1795 literal_count,
1796 "epistemic GPU world-view validation literals",
1797 )?;
1798 require_positive(
1799 candidate_count,
1800 "epistemic GPU world-view validation candidates",
1801 )?;
1802 require_positive(
1803 reduction_count,
1804 "epistemic GPU world-view validation reductions",
1805 )?;
1806 require_positive(
1807 models_per_reduction,
1808 "epistemic GPU world-view validation models",
1809 )?;
1810 let model_membership_bytes_checked = checked_product(
1811 checked_product(
1812 checked_product(candidate_count, reduction_count)?,
1813 models_per_reduction,
1814 )?,
1815 literal_count,
1816 )?;
1817 require_u32_launch_bound(
1818 model_membership_bytes_checked,
1819 "epistemic GPU world-view validation membership launch",
1820 )?;
1821
1822 Ok(Self {
1823 literal_count,
1824 candidates_checked: candidate_count,
1825 reduction_count,
1826 models_per_reduction,
1827 model_membership_bytes_checked,
1828 world_view_slots_checked: candidate_count,
1829 rejection_reason_slots_written: candidate_count,
1830 kernel_launches: 1,
1831 host_write_ops: 0,
1832 kernel_timing: EpistemicGpuKernelTimingTrace::unrecorded(),
1833 })
1834 }
1835
1836 pub const fn with_kernel_timing(
1838 mut self,
1839 kernel_timing: EpistemicGpuKernelTimingTrace,
1840 ) -> Self {
1841 self.kernel_timing = kernel_timing;
1842 self
1843 }
1844}
1845
1846impl EpistemicGpuPropagationTrace {
1847 pub fn for_counts(literal_count: usize, candidate_count: usize) -> Result<Self> {
1849 require_positive(literal_count, "epistemic GPU propagation literals")?;
1850 require_positive(candidate_count, "epistemic GPU propagation candidates")?;
1851 require_u32_launch_dimensions(
1852 &[literal_count, candidate_count],
1853 "epistemic GPU propagation launch",
1854 )?;
1855
1856 Ok(Self {
1857 literal_count,
1858 propagated_candidates: candidate_count,
1859 world_view_bytes_written: candidate_count,
1860 rejection_reason_slots_written: candidate_count,
1861 kernel_launches: 1,
1862 host_write_ops: 0,
1863 kernel_timing: EpistemicGpuKernelTimingTrace::unrecorded(),
1864 })
1865 }
1866
1867 pub const fn with_kernel_timing(
1869 mut self,
1870 kernel_timing: EpistemicGpuKernelTimingTrace,
1871 ) -> Self {
1872 self.kernel_timing = kernel_timing;
1873 self
1874 }
1875}
1876
1877impl EpistemicGpuWorkspaceResetTrace {
1878 pub fn for_layout(layout: EpistemicGpuWorkspaceLayout) -> Self {
1880 Self::try_for_layout(layout)
1881 .expect("epistemic GPU workspace reset trace byte total overflowed")
1882 }
1883
1884 pub fn try_for_layout(layout: EpistemicGpuWorkspaceLayout) -> Result<Self> {
1886 Ok(Self {
1887 candidate_assumption_bytes: layout.candidate_assumption_bytes,
1888 world_view_bytes: layout.world_view_bytes,
1889 model_membership_bytes: layout.model_membership_bytes,
1890 rejection_reason_bytes: checked_product(
1891 layout.rejection_reason_slots,
1892 std::mem::size_of::<u32>(),
1893 )?,
1894 device_zero_ops: 4,
1895 host_write_ops: 0,
1896 })
1897 }
1898
1899 pub fn total_zeroed_bytes(&self) -> usize {
1901 self.try_total_zeroed_bytes()
1902 .expect("epistemic GPU workspace reset byte total overflowed")
1903 }
1904
1905 pub fn try_total_zeroed_bytes(&self) -> Result<usize> {
1907 checked_sum(
1908 checked_sum(
1909 checked_sum(self.candidate_assumption_bytes, self.world_view_bytes)?,
1910 self.model_membership_bytes,
1911 )?,
1912 self.rejection_reason_bytes,
1913 )
1914 }
1915
1916 pub fn require_matches_layout(
1918 &self,
1919 construct: &str,
1920 layout: EpistemicGpuWorkspaceLayout,
1921 ) -> Result<()> {
1922 let expected = Self::try_for_layout(layout)?;
1923 if *self != expected {
1924 return Err(XlogError::UnsupportedEpistemicConstruct {
1925 construct: construct.to_string(),
1926 context: format!(
1927 "workspace reset trace does not match prepared GPU workspace layout: \
1928 candidate_bytes={}/{} world_view_bytes={}/{} model_membership_bytes={}/{} \
1929 rejection_reason_bytes={}/{} device_zero_ops={}/{} host_write_ops={}/{}",
1930 self.candidate_assumption_bytes,
1931 expected.candidate_assumption_bytes,
1932 self.world_view_bytes,
1933 expected.world_view_bytes,
1934 self.model_membership_bytes,
1935 expected.model_membership_bytes,
1936 self.rejection_reason_bytes,
1937 expected.rejection_reason_bytes,
1938 self.device_zero_ops,
1939 expected.device_zero_ops,
1940 self.host_write_ops,
1941 expected.host_write_ops
1942 ),
1943 });
1944 }
1945
1946 Ok(())
1947 }
1948}
1949
1950#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1952pub struct EpistemicGpuRuntimePreflight {
1953 pub epistemic_mode: EirEpistemicMode,
1955 pub workspace_layout: EpistemicGpuWorkspaceLayout,
1957 pub reduced_runtime_rule_count: usize,
1959 pub reduced_constraint_relation_count: usize,
1961 pub wcoj_required_reduction_count: usize,
1963 pub multiway_reduction_count: usize,
1965 pub kclique_wcoj_plan_count: usize,
1967 pub wcoj_triangle_route_count: usize,
1969 pub wcoj_4cycle_route_count: usize,
1971 pub free_join_route_count: usize,
1978 pub kclique_wcoj_plan_count_by_arity: [usize; 4],
1980 pub kclique_wcoj_max_arity: u8,
1982 pub kclique_wcoj_edge_permutation_count: usize,
1984 pub kclique_stream_group_count: usize,
1986 pub kclique_skew_scheduled_plan_count: usize,
1988 pub planned_hash_route_count: usize,
1990 pub planned_hash_planner_wins_count: usize,
1992 pub planned_hash_incomplete_stats_count: usize,
1994 pub planned_hash_cost_evidence_count: usize,
1996 pub sorted_layout_requirement_count: usize,
1998 pub helper_split_spec_count: usize,
2000 pub helper_relation_rule_count: usize,
2002 pub helper_relation_scan_count: usize,
2004 pub tuple_membership_binding_count: usize,
2006 pub solver_assumption_binding_count: usize,
2008 pub solver_required_capability_count: usize,
2010 pub solver_required_status_count: usize,
2012 pub know_operator_count: usize,
2014 pub possible_operator_count: usize,
2016 pub not_know_operator_count: usize,
2018 pub not_possible_operator_count: usize,
2020 pub cpu_fallbacks: EpistemicCpuFallbackCounters,
2022}
2023
2024impl EpistemicGpuRuntimePreflight {
2025 pub fn is_g91_mode(&self) -> bool {
2027 matches!(self.epistemic_mode, EirEpistemicMode::G91)
2028 }
2029
2030 pub fn is_faeel_mode(&self) -> bool {
2032 matches!(self.epistemic_mode, EirEpistemicMode::Faeel)
2033 }
2034
2035 pub fn for_executable_plan(
2037 executable: &EpistemicExecutablePlan,
2038 capacities: EpistemicGpuWorkspaceCapacities,
2039 ) -> Result<Self> {
2040 if !executable.gpu_plan.cpu_fallbacks.is_zero() {
2041 return Err(XlogError::UnsupportedEpistemicConstruct {
2042 construct: "epistemic GPU runtime preflight".to_string(),
2043 context: "nonzero CPU fallback counters".to_string(),
2044 });
2045 }
2046 executable.gpu_plan.validate_tuple_membership_bindings()?;
2047 executable.gpu_plan.validate_solver_contract()?;
2048 require_epistemic_gpu_kernel_phases(&executable.gpu_plan)?;
2056 require_epistemic_gpu_buffer_contract(&executable.gpu_plan)?;
2057
2058 let workspace_layout =
2059 EpistemicGpuWorkspaceLayout::for_plan(&executable.gpu_plan, capacities)?;
2060 let mut routes = RuntimeRouteSummary::default();
2061 let mut reduced_runtime_rule_count = 0usize;
2062 let mut reduced_constraint_relation_names = Vec::new();
2063 let wcoj_required_reduction_count = executable
2064 .gpu_plan
2065 .reductions
2066 .iter()
2067 .filter(|reduction| {
2068 matches!(
2069 reduction.wcoj_status,
2070 EpistemicWcojReductionStatus::RequiresPlannerEligibility
2071 )
2072 })
2073 .count();
2074 let helper_relation_ids = helper_relation_ids(executable);
2075 let mut helper_relation_rule_count = 0usize;
2076 let mut helper_relation_scan_count = 0usize;
2077
2078 for rule in executable
2079 .reduced_runtime_plan
2080 .rules_by_scc
2081 .iter()
2082 .flatten()
2083 {
2084 reduced_runtime_rule_count += 1;
2085 if rule.head.starts_with(XLOG_CONSTRAINT_RELATION_PREFIX)
2086 && !reduced_constraint_relation_names
2087 .iter()
2088 .any(|name| name == &rule.head)
2089 {
2090 reduced_constraint_relation_names.push(rule.head.as_str());
2091 }
2092 if rule.head.starts_with("__kclique_helper_") {
2093 helper_relation_rule_count += 1;
2094 }
2095 helper_relation_scan_count +=
2096 count_helper_relation_scans(&rule.body, &helper_relation_ids);
2097 summarize_runtime_routes(&rule.body, &mut routes);
2098 }
2099
2100 if wcoj_required_reduction_count > routes.multiway_reduction_count {
2101 return Err(XlogError::UnsupportedEpistemicConstruct {
2102 construct: "epistemic GPU WCOJ route certification".to_string(),
2103 context: format!(
2104 "plan requires {} WCOJ-eligible epistemic reductions, but reduced runtime \
2105 plan exposes {} MultiWayJoin routes",
2106 wcoj_required_reduction_count, routes.multiway_reduction_count
2107 ),
2108 });
2109 }
2110
2111 let planned_hash_reason_count = routes
2112 .planned_hash_planner_wins_count
2113 .checked_add(routes.planned_hash_incomplete_stats_count)
2114 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
2115 construct: "epistemic GPU planned-hash certification".to_string(),
2116 context: "planned-hash reason counters overflowed".to_string(),
2117 })?;
2118 if planned_hash_reason_count != routes.planned_hash_route_count
2119 || routes.planned_hash_cost_evidence_count < routes.planned_hash_planner_wins_count
2120 {
2121 return Err(XlogError::UnsupportedEpistemicConstruct {
2122 construct: "epistemic GPU planned-hash certification".to_string(),
2123 context: format!(
2124 "planned_hash_routes={}, planner_wins={}, incomplete_stats={}, \
2125 finite_cost_evidence={}",
2126 routes.planned_hash_route_count,
2127 routes.planned_hash_planner_wins_count,
2128 routes.planned_hash_incomplete_stats_count,
2129 routes.planned_hash_cost_evidence_count
2130 ),
2131 });
2132 }
2133
2134 if routes.kclique_wcoj_plan_count > 0 && routes.kclique_wcoj_edge_permutation_count == 0 {
2135 return Err(XlogError::UnsupportedEpistemicConstruct {
2136 construct: "epistemic GPU K-clique WCOJ certification".to_string(),
2137 context: format!(
2138 "K-clique WCOJ plans require live edge-permutation slots, got \
2139 kclique_plans={} edge_permutation_slots=0",
2140 routes.kclique_wcoj_plan_count
2141 ),
2142 });
2143 }
2144
2145 if routes.helper_split_spec_count > 0
2146 && (helper_relation_rule_count < routes.helper_split_spec_count
2147 || helper_relation_scan_count < routes.helper_split_spec_count)
2148 {
2149 return Err(XlogError::UnsupportedEpistemicConstruct {
2150 construct: "epistemic GPU helper-split certification".to_string(),
2151 context: format!(
2152 "helper_split_specs={}, helper_relation_rules={}, \
2153 helper_relation_scans={}",
2154 routes.helper_split_spec_count,
2155 helper_relation_rule_count,
2156 helper_relation_scan_count
2157 ),
2158 });
2159 }
2160
2161 let mut know_operator_count = 0usize;
2162 let mut possible_operator_count = 0usize;
2163 let mut not_know_operator_count = 0usize;
2164 let mut not_possible_operator_count = 0usize;
2165 for literal in &executable.gpu_plan.epistemic_literals {
2166 match (literal.op, literal.negated) {
2167 (EirEpistemicOp::Know, false) => know_operator_count += 1,
2168 (EirEpistemicOp::Possible, false) => possible_operator_count += 1,
2169 (EirEpistemicOp::Know, true) => not_know_operator_count += 1,
2170 (EirEpistemicOp::Possible, true) => not_possible_operator_count += 1,
2171 }
2172 }
2173
2174 Ok(Self {
2175 epistemic_mode: executable.gpu_plan.mode,
2176 workspace_layout,
2177 reduced_runtime_rule_count,
2178 reduced_constraint_relation_count: reduced_constraint_relation_names.len(),
2179 wcoj_required_reduction_count,
2180 multiway_reduction_count: routes.multiway_reduction_count,
2181 kclique_wcoj_plan_count: routes.kclique_wcoj_plan_count,
2182 wcoj_triangle_route_count: routes.wcoj_triangle_route_count,
2183 wcoj_4cycle_route_count: routes.wcoj_4cycle_route_count,
2184 free_join_route_count: routes.free_join_route_count,
2185 kclique_wcoj_plan_count_by_arity: routes.kclique_wcoj_plan_count_by_arity,
2186 kclique_wcoj_max_arity: routes.kclique_wcoj_max_arity,
2187 kclique_wcoj_edge_permutation_count: routes.kclique_wcoj_edge_permutation_count,
2188 kclique_stream_group_count: routes.kclique_stream_groups.len(),
2189 kclique_skew_scheduled_plan_count: routes.kclique_skew_scheduled_plan_count,
2190 planned_hash_route_count: routes.planned_hash_route_count,
2191 planned_hash_planner_wins_count: routes.planned_hash_planner_wins_count,
2192 planned_hash_incomplete_stats_count: routes.planned_hash_incomplete_stats_count,
2193 planned_hash_cost_evidence_count: routes.planned_hash_cost_evidence_count,
2194 sorted_layout_requirement_count: routes.sorted_layout_requirement_count,
2195 helper_split_spec_count: routes.helper_split_spec_count,
2196 helper_relation_rule_count,
2197 helper_relation_scan_count,
2198 tuple_membership_binding_count: executable.gpu_plan.tuple_membership_bindings.len(),
2199 solver_assumption_binding_count: executable
2200 .gpu_plan
2201 .solver_contract
2202 .assumption_bindings
2203 .len(),
2204 solver_required_capability_count: executable
2205 .gpu_plan
2206 .solver_contract
2207 .distinct_required_capability_count(),
2208 solver_required_status_count: executable
2209 .gpu_plan
2210 .solver_contract
2211 .distinct_required_status_count(),
2212 know_operator_count,
2213 possible_operator_count,
2214 not_know_operator_count,
2215 not_possible_operator_count,
2216 cpu_fallbacks: executable.gpu_plan.cpu_fallbacks,
2217 })
2218 }
2219}
2220
2221pub struct EpistemicGpuPreparedExecution {
2223 pub preflight: EpistemicGpuRuntimePreflight,
2225 pub tuple_membership_bindings: Vec<EpistemicTupleMembershipBinding>,
2227 pub workspace: EpistemicGpuWorkspace,
2229 pub workspace_reset: EpistemicGpuWorkspaceResetTrace,
2231}
2232
2233#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2235pub struct EpistemicGpuRuntimeTrace {
2236 pub preflight: EpistemicGpuRuntimePreflight,
2238 pub counters_before: EpistemicGpuRuntimeCounters,
2240 pub counters_after: EpistemicGpuRuntimeCounters,
2242 pub counter_delta: EpistemicGpuRuntimeCounters,
2244 pub wcoj_certification: EpistemicGpuRuntimeWcojCertification,
2246}
2247
2248impl EpistemicGpuRuntimeTrace {
2249 pub fn from_preflight_and_counters(
2251 preflight: EpistemicGpuRuntimePreflight,
2252 counters_before: EpistemicGpuRuntimeCounters,
2253 counters_after: EpistemicGpuRuntimeCounters,
2254 ) -> Self {
2255 Self::try_from_preflight_and_counters(preflight, counters_before, counters_after)
2256 .expect("runtime counter snapshots must be monotonic")
2257 }
2258
2259 pub fn try_from_preflight_and_counters(
2262 preflight: EpistemicGpuRuntimePreflight,
2263 counters_before: EpistemicGpuRuntimeCounters,
2264 counters_after: EpistemicGpuRuntimeCounters,
2265 ) -> Result<Self> {
2266 let counter_delta = counters_after.checked_delta_since(counters_before)?;
2267 let wcoj_certification = EpistemicGpuRuntimeWcojCertification::try_for_preflight_and_delta(
2268 &preflight,
2269 &counter_delta,
2270 )?;
2271
2272 Ok(Self {
2273 preflight,
2274 counters_before,
2275 counters_after,
2276 counter_delta,
2277 wcoj_certification,
2278 })
2279 }
2280
2281 pub fn require_wcoj_certification(&self) -> Result<()> {
2283 match self.wcoj_certification {
2284 EpistemicGpuRuntimeWcojCertification::MissingRequiredWcojDispatch {
2285 required_multiway_reductions,
2286 required_kclique_plans,
2287 observed_wcoj_dispatches,
2288 observed_kclique_dispatches,
2289 } => Err(XlogError::UnsupportedEpistemicConstruct {
2290 construct: "epistemic GPU WCOJ dispatch certification".to_string(),
2291 context: format!(
2292 "required_multiway_reductions={required_multiway_reductions}, \
2293 required_kclique_plans={required_kclique_plans}, \
2294 observed_wcoj_dispatches={observed_wcoj_dispatches}, \
2295 observed_kclique_dispatches={observed_kclique_dispatches}"
2296 ),
2297 }),
2298 EpistemicGpuRuntimeWcojCertification::MissingRequiredWcojLayout {
2299 required_sorted_layouts,
2300 observed_layout_events,
2301 } => Err(XlogError::UnsupportedEpistemicConstruct {
2302 construct: "epistemic GPU WCOJ layout certification".to_string(),
2303 context: format!(
2304 "required_sorted_layouts={required_sorted_layouts}, \
2305 observed_layout_events={observed_layout_events}"
2306 ),
2307 }),
2308 EpistemicGpuRuntimeWcojCertification::MissingRequiredKcliqueMetadata {
2309 required_kclique_plans,
2310 observed_metadata_builds,
2311 observed_metadata_build_nanos,
2312 } => Err(XlogError::UnsupportedEpistemicConstruct {
2313 construct: "epistemic GPU K-clique metadata certification".to_string(),
2314 context: format!(
2315 "required_kclique_plans={required_kclique_plans}, \
2316 observed_metadata_builds={observed_metadata_builds}, \
2317 observed_metadata_build_nanos={observed_metadata_build_nanos}"
2318 ),
2319 }),
2320 EpistemicGpuRuntimeWcojCertification::NotRequired { .. }
2321 | EpistemicGpuRuntimeWcojCertification::Certified { .. } => Ok(()),
2322 }
2323 }
2324}
2325
2326#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
2328pub struct EpistemicGpuRuntimeCounters {
2329 pub wcoj_triangle_dispatch_count: u64,
2331 pub wcoj_4cycle_dispatch_count: u64,
2333 pub chain_dispatch_count: u64,
2335 pub wcoj_clique5_dispatch_count: u64,
2337 pub wcoj_clique6_dispatch_count: u64,
2339 pub wcoj_clique7_dispatch_count: u64,
2341 pub wcoj_clique8_dispatch_count: u64,
2343 pub free_join_dispatch_count: u64,
2349 pub factorized_delta_dispatch_count: u64,
2354 pub provider_wcoj_triangle_hg_dispatch_count: u64,
2356 pub wcoj_layout_sort_invocation_count: u64,
2358 pub wcoj_layout_fast_path_hit_count: u64,
2360 pub kclique_metadata_build_count: u64,
2362 pub kclique_metadata_build_nanos: u64,
2364 pub kclique_histogram_refresh_count: u64,
2366 pub kclique_histogram_refresh_nanos: u128,
2368}
2369
2370impl EpistemicGpuRuntimeCounters {
2371 fn checked_counter_delta(counter: &str, after: u64, before: u64) -> Result<u64> {
2372 after
2373 .checked_sub(before)
2374 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
2375 construct: "epistemic GPU runtime counter trace".to_string(),
2376 context: format!(
2377 "runtime proof counter {counter} decreased from {before} to {after}"
2378 ),
2379 })
2380 }
2381
2382 fn checked_counter_delta_u128(counter: &str, after: u128, before: u128) -> Result<u128> {
2383 after
2384 .checked_sub(before)
2385 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
2386 construct: "epistemic GPU runtime counter trace".to_string(),
2387 context: format!(
2388 "runtime proof counter {counter} decreased from {before} to {after}"
2389 ),
2390 })
2391 }
2392
2393 fn checked_counter_sum(counter: &str, values: &[u64]) -> Result<u64> {
2394 values.iter().try_fold(0u64, |acc, value| {
2395 acc.checked_add(*value)
2396 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
2397 construct: "epistemic GPU runtime counter trace".to_string(),
2398 context: format!(
2399 "runtime proof counter {counter} overflowed while adding {value} to {acc}"
2400 ),
2401 })
2402 })
2403 }
2404
2405 pub fn checked_delta_since(self, before: Self) -> Result<Self> {
2407 Ok(Self {
2408 wcoj_triangle_dispatch_count: Self::checked_counter_delta(
2409 "wcoj_triangle_dispatch_count",
2410 self.wcoj_triangle_dispatch_count,
2411 before.wcoj_triangle_dispatch_count,
2412 )?,
2413 wcoj_4cycle_dispatch_count: Self::checked_counter_delta(
2414 "wcoj_4cycle_dispatch_count",
2415 self.wcoj_4cycle_dispatch_count,
2416 before.wcoj_4cycle_dispatch_count,
2417 )?,
2418 chain_dispatch_count: Self::checked_counter_delta(
2419 "chain_dispatch_count",
2420 self.chain_dispatch_count,
2421 before.chain_dispatch_count,
2422 )?,
2423 wcoj_clique5_dispatch_count: Self::checked_counter_delta(
2424 "wcoj_clique5_dispatch_count",
2425 self.wcoj_clique5_dispatch_count,
2426 before.wcoj_clique5_dispatch_count,
2427 )?,
2428 wcoj_clique6_dispatch_count: Self::checked_counter_delta(
2429 "wcoj_clique6_dispatch_count",
2430 self.wcoj_clique6_dispatch_count,
2431 before.wcoj_clique6_dispatch_count,
2432 )?,
2433 wcoj_clique7_dispatch_count: Self::checked_counter_delta(
2434 "wcoj_clique7_dispatch_count",
2435 self.wcoj_clique7_dispatch_count,
2436 before.wcoj_clique7_dispatch_count,
2437 )?,
2438 wcoj_clique8_dispatch_count: Self::checked_counter_delta(
2439 "wcoj_clique8_dispatch_count",
2440 self.wcoj_clique8_dispatch_count,
2441 before.wcoj_clique8_dispatch_count,
2442 )?,
2443 factorized_delta_dispatch_count: Self::checked_counter_delta(
2444 "factorized_delta_dispatch_count",
2445 self.factorized_delta_dispatch_count,
2446 before.factorized_delta_dispatch_count,
2447 )?,
2448 free_join_dispatch_count: Self::checked_counter_delta(
2449 "free_join_dispatch_count",
2450 self.free_join_dispatch_count,
2451 before.free_join_dispatch_count,
2452 )?,
2453 provider_wcoj_triangle_hg_dispatch_count: Self::checked_counter_delta(
2454 "provider_wcoj_triangle_hg_dispatch_count",
2455 self.provider_wcoj_triangle_hg_dispatch_count,
2456 before.provider_wcoj_triangle_hg_dispatch_count,
2457 )?,
2458 wcoj_layout_sort_invocation_count: Self::checked_counter_delta(
2459 "wcoj_layout_sort_invocation_count",
2460 self.wcoj_layout_sort_invocation_count,
2461 before.wcoj_layout_sort_invocation_count,
2462 )?,
2463 wcoj_layout_fast_path_hit_count: Self::checked_counter_delta(
2464 "wcoj_layout_fast_path_hit_count",
2465 self.wcoj_layout_fast_path_hit_count,
2466 before.wcoj_layout_fast_path_hit_count,
2467 )?,
2468 kclique_metadata_build_count: Self::checked_counter_delta(
2469 "kclique_metadata_build_count",
2470 self.kclique_metadata_build_count,
2471 before.kclique_metadata_build_count,
2472 )?,
2473 kclique_metadata_build_nanos: Self::checked_counter_delta(
2474 "kclique_metadata_build_nanos",
2475 self.kclique_metadata_build_nanos,
2476 before.kclique_metadata_build_nanos,
2477 )?,
2478 kclique_histogram_refresh_count: Self::checked_counter_delta(
2479 "kclique_histogram_refresh_count",
2480 self.kclique_histogram_refresh_count,
2481 before.kclique_histogram_refresh_count,
2482 )?,
2483 kclique_histogram_refresh_nanos: Self::checked_counter_delta_u128(
2484 "kclique_histogram_refresh_nanos",
2485 self.kclique_histogram_refresh_nanos,
2486 before.kclique_histogram_refresh_nanos,
2487 )?,
2488 })
2489 }
2490
2491 pub fn saturating_delta_since(self, before: Self) -> Self {
2493 Self {
2494 wcoj_triangle_dispatch_count: self
2495 .wcoj_triangle_dispatch_count
2496 .saturating_sub(before.wcoj_triangle_dispatch_count),
2497 wcoj_4cycle_dispatch_count: self
2498 .wcoj_4cycle_dispatch_count
2499 .saturating_sub(before.wcoj_4cycle_dispatch_count),
2500 chain_dispatch_count: self
2501 .chain_dispatch_count
2502 .saturating_sub(before.chain_dispatch_count),
2503 wcoj_clique5_dispatch_count: self
2504 .wcoj_clique5_dispatch_count
2505 .saturating_sub(before.wcoj_clique5_dispatch_count),
2506 wcoj_clique6_dispatch_count: self
2507 .wcoj_clique6_dispatch_count
2508 .saturating_sub(before.wcoj_clique6_dispatch_count),
2509 wcoj_clique7_dispatch_count: self
2510 .wcoj_clique7_dispatch_count
2511 .saturating_sub(before.wcoj_clique7_dispatch_count),
2512 wcoj_clique8_dispatch_count: self
2513 .wcoj_clique8_dispatch_count
2514 .saturating_sub(before.wcoj_clique8_dispatch_count),
2515 free_join_dispatch_count: self
2516 .free_join_dispatch_count
2517 .saturating_sub(before.free_join_dispatch_count),
2518 factorized_delta_dispatch_count: self
2519 .factorized_delta_dispatch_count
2520 .saturating_sub(before.factorized_delta_dispatch_count),
2521 provider_wcoj_triangle_hg_dispatch_count: self
2522 .provider_wcoj_triangle_hg_dispatch_count
2523 .saturating_sub(before.provider_wcoj_triangle_hg_dispatch_count),
2524 wcoj_layout_sort_invocation_count: self
2525 .wcoj_layout_sort_invocation_count
2526 .saturating_sub(before.wcoj_layout_sort_invocation_count),
2527 wcoj_layout_fast_path_hit_count: self
2528 .wcoj_layout_fast_path_hit_count
2529 .saturating_sub(before.wcoj_layout_fast_path_hit_count),
2530 kclique_metadata_build_count: self
2531 .kclique_metadata_build_count
2532 .saturating_sub(before.kclique_metadata_build_count),
2533 kclique_metadata_build_nanos: self
2534 .kclique_metadata_build_nanos
2535 .saturating_sub(before.kclique_metadata_build_nanos),
2536 kclique_histogram_refresh_count: self
2537 .kclique_histogram_refresh_count
2538 .saturating_sub(before.kclique_histogram_refresh_count),
2539 kclique_histogram_refresh_nanos: self
2540 .kclique_histogram_refresh_nanos
2541 .saturating_sub(before.kclique_histogram_refresh_nanos),
2542 }
2543 }
2544
2545 pub fn wcoj_dispatch_count(&self) -> u64 {
2547 self.wcoj_triangle_dispatch_count
2548 .saturating_add(self.wcoj_4cycle_dispatch_count)
2549 .saturating_add(self.wcoj_clique_dispatch_count())
2550 }
2551
2552 pub fn checked_wcoj_dispatch_count(&self) -> Result<u64> {
2554 Self::checked_counter_sum(
2555 "wcoj_dispatch_count",
2556 &[
2557 self.wcoj_triangle_dispatch_count,
2558 self.wcoj_4cycle_dispatch_count,
2559 self.checked_wcoj_clique_dispatch_count()?,
2560 ],
2561 )
2562 }
2563
2564 pub fn wcoj_clique_dispatch_count(&self) -> u64 {
2566 self.wcoj_clique5_dispatch_count
2567 .saturating_add(self.wcoj_clique6_dispatch_count)
2568 .saturating_add(self.wcoj_clique7_dispatch_count)
2569 .saturating_add(self.wcoj_clique8_dispatch_count)
2570 }
2571
2572 pub fn checked_wcoj_clique_dispatch_count(&self) -> Result<u64> {
2574 Self::checked_counter_sum(
2575 "wcoj_clique_dispatch_count",
2576 &[
2577 self.wcoj_clique5_dispatch_count,
2578 self.wcoj_clique6_dispatch_count,
2579 self.wcoj_clique7_dispatch_count,
2580 self.wcoj_clique8_dispatch_count,
2581 ],
2582 )
2583 }
2584}
2585
2586#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2588pub enum EpistemicGpuRuntimeWcojCertification {
2589 NotRequired {
2591 observed_wcoj_dispatches: u64,
2593 planned_hash_routes: usize,
2595 planned_hash_planner_wins: usize,
2597 planned_hash_incomplete_stats: usize,
2599 planned_hash_cost_evidence: usize,
2601 },
2602 Certified {
2604 observed_wcoj_dispatches: u64,
2606 certified_multiway_reductions: usize,
2608 observed_kclique_dispatches: u64,
2610 certified_edge_permutation_slots: usize,
2612 certified_stream_groups: usize,
2614 certified_skew_scheduled_plans: usize,
2616 certified_sorted_layout_requirements: usize,
2618 certified_helper_split_specs: usize,
2620 certified_helper_relation_rules: usize,
2622 certified_helper_relation_scans: usize,
2624 observed_layout_sorts: u64,
2626 observed_layout_fast_path_hits: u64,
2628 observed_metadata_builds: u64,
2630 observed_metadata_build_nanos: u64,
2632 observed_histogram_refreshes: u64,
2634 observed_histogram_refresh_nanos: u128,
2636 },
2637 MissingRequiredWcojLayout {
2639 required_sorted_layouts: usize,
2641 observed_layout_events: u64,
2643 },
2644 MissingRequiredKcliqueMetadata {
2646 required_kclique_plans: usize,
2648 observed_metadata_builds: u64,
2650 observed_metadata_build_nanos: u64,
2652 },
2653 MissingRequiredWcojDispatch {
2655 required_multiway_reductions: usize,
2657 required_kclique_plans: usize,
2659 observed_wcoj_dispatches: u64,
2661 observed_kclique_dispatches: u64,
2663 },
2664}
2665
2666#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2668pub struct EpistemicGpuProviderIdentity {
2669 pub device_ordinal: usize,
2671 pub device_ptr: usize,
2673 pub memory_ptr: usize,
2675}
2676
2677impl EpistemicGpuProviderIdentity {
2678 pub fn from_provider(provider: &xlog_cuda::CudaKernelProvider) -> Self {
2680 Self {
2681 device_ordinal: provider.device().ordinal(),
2682 device_ptr: Arc::as_ptr(provider.device()) as usize,
2683 memory_ptr: Arc::as_ptr(provider.memory()) as usize,
2684 }
2685 }
2686}
2687
2688pub struct EpistemicGpuExecutionResult {
2690 pub provider_identity: EpistemicGpuProviderIdentity,
2692 pub prepared: EpistemicGpuPreparedExecution,
2694 pub candidate_generation: EpistemicGpuCandidateGenerationTrace,
2696 pub propagation: EpistemicGpuPropagationTrace,
2698 pub candidate_validation: EpistemicGpuCandidateValidationTrace,
2700 pub model_membership: EpistemicGpuModelMembershipTrace,
2702 pub world_view_validation: EpistemicGpuWorldViewValidationTrace,
2704 pub constraint_world_view_validation: EpistemicGpuConstraintWorldViewValidationTrace,
2706 pub materialization: EpistemicGpuMaterializationTrace,
2708 pub final_result_materialization: EpistemicGpuFinalResultMaterializationTrace,
2710 pub final_tuple_materialization: EpistemicGpuFinalTupleMaterializationTrace,
2712 pub transfer_budget: EpistemicGpuTransferBudgetTrace,
2714 pub final_result_transfer: EpistemicGpuFinalResultTransferTrace,
2716 pub constraint_validation: EpistemicGpuConstraintValidationTrace,
2718 pub semantic_trace: EpistemicGpuSemanticTrace,
2720 pub tuple_membership_bindings: Vec<EpistemicTupleMembershipBinding>,
2722 pub final_output: CudaBuffer,
2730 pub additional_head_outputs: Vec<(String, CudaBuffer)>,
2736 pub tuple_evidence_output: Option<CudaBuffer>,
2738 pub output: CudaBuffer,
2740 pub trace: EpistemicGpuRuntimeTrace,
2742}
2743
2744impl EpistemicGpuExecutionResult {
2745 pub fn tuple_evidence_output(&self) -> &CudaBuffer {
2747 self.tuple_evidence_output
2748 .as_ref()
2749 .unwrap_or(&self.final_output)
2750 }
2751
2752 pub fn require_runtime_dispatch_certification(&self) -> Result<()> {
2754 if self.trace.preflight != self.prepared.preflight {
2755 return Err(XlogError::UnsupportedEpistemicConstruct {
2756 construct: "epistemic GPU runtime dispatch certification".to_string(),
2757 context: "runtime trace preflight does not match prepared execution preflight"
2758 .to_string(),
2759 });
2760 }
2761 if self.prepared.workspace.layout != self.prepared.preflight.workspace_layout {
2762 return Err(XlogError::UnsupportedEpistemicConstruct {
2763 construct: "epistemic GPU runtime dispatch certification".to_string(),
2764 context: "prepared GPU workspace layout does not match preflight workspace layout"
2765 .to_string(),
2766 });
2767 }
2768 self.prepared
2769 .workspace
2770 .require_buffer_lengths_match_layout("epistemic GPU runtime dispatch certification")?;
2771 if self.tuple_membership_bindings.len()
2772 != self.prepared.preflight.tuple_membership_binding_count
2773 {
2774 return Err(XlogError::UnsupportedEpistemicConstruct {
2775 construct: "epistemic GPU runtime dispatch certification".to_string(),
2776 context: format!(
2777 "runtime tuple-membership bindings do not match prepared preflight, got {} \
2778 bindings for preflight count {}",
2779 self.tuple_membership_bindings.len(),
2780 self.prepared.preflight.tuple_membership_binding_count
2781 ),
2782 });
2783 }
2784 if self.tuple_membership_bindings != self.prepared.tuple_membership_bindings {
2785 return Err(XlogError::UnsupportedEpistemicConstruct {
2786 construct: "epistemic GPU runtime dispatch certification".to_string(),
2787 context: "runtime tuple-membership bindings do not match prepared GPU execution"
2788 .to_string(),
2789 });
2790 }
2791 self.model_membership
2792 .require_planned_tuple_key_column_reads(expected_tuple_key_column_reads(
2793 &self.prepared.tuple_membership_bindings,
2794 )?)?;
2795 self.prepared.workspace_reset.require_matches_layout(
2796 "epistemic GPU runtime dispatch certification",
2797 self.prepared.preflight.workspace_layout,
2798 )?;
2799 self.final_result_transfer.require_matches_final_output(
2800 "epistemic GPU runtime dispatch certification",
2801 &self.final_output,
2802 )?;
2803 self.constraint_validation.require_matches_preflight(
2804 "epistemic GPU runtime dispatch certification",
2805 &self.prepared.preflight,
2806 )?;
2807 self.candidate_validation
2808 .require_matches_candidate_generation(
2809 "epistemic GPU runtime dispatch certification",
2810 &self.candidate_generation,
2811 )?;
2812 self.semantic_trace.require_matches_execution_traces(
2813 "epistemic GPU runtime dispatch certification",
2814 &self.candidate_generation,
2815 &self.propagation,
2816 &self.model_membership,
2817 &self.world_view_validation,
2818 )?;
2819 self.semantic_trace.require_rejection_metadata_accounting(
2820 "epistemic GPU runtime dispatch certification",
2821 )?;
2822 self.semantic_trace
2823 .require_candidate_index_partition("epistemic GPU runtime dispatch certification")?;
2824 let aggregate_kernel_timing = self.try_aggregate_kernel_timing()?;
2825 if !aggregate_kernel_timing.is_recorded() {
2826 return Err(XlogError::UnsupportedEpistemicConstruct {
2827 construct: "epistemic GPU runtime dispatch certification".to_string(),
2828 context: "accepted GPU execution did not record CUDA-event timing".to_string(),
2829 });
2830 }
2831 self.trace.require_wcoj_certification()
2832 }
2833
2834 pub fn aggregate_kernel_timing(&self) -> EpistemicGpuKernelTimingTrace {
2836 self.try_aggregate_kernel_timing()
2837 .expect("epistemic GPU kernel timing aggregation overflowed")
2838 }
2839
2840 pub fn try_aggregate_kernel_timing(&self) -> Result<EpistemicGpuKernelTimingTrace> {
2842 let traces = [
2843 self.candidate_generation.kernel_timing,
2844 self.propagation.kernel_timing,
2845 self.candidate_validation.kernel_timing,
2846 self.model_membership.kernel_timing,
2847 self.world_view_validation.kernel_timing,
2848 self.materialization.kernel_timing,
2849 self.final_result_materialization.kernel_timing,
2850 self.final_tuple_materialization.kernel_timing,
2851 ];
2852
2853 if traces
2854 .iter()
2855 .all(EpistemicGpuKernelTimingTrace::is_recorded)
2856 {
2857 EpistemicGpuKernelTimingTrace::checked_sum(traces)
2858 } else {
2859 Ok(EpistemicGpuKernelTimingTrace::unrecorded())
2860 }
2861 }
2862}
2863
2864#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
2866pub struct EpistemicGpuBatchExecutionTrace {
2867 pub component_count: usize,
2869 pub gpu_runtime_component_executions: usize,
2871 pub cpu_recomposition_steps: u64,
2873 pub cpu_candidate_enumerations: u64,
2875 pub cpu_world_view_validations: u64,
2877 pub cpu_solver_search_fallbacks: u64,
2879 pub cpu_probability_recomputations: u64,
2881 pub tracked_dtoh_calls: u64,
2883 pub tracked_htod_calls: u64,
2885 pub tracked_aggregate_htod_calls: u64,
2887 pub tracked_launch_metadata_htod_calls: u64,
2889 pub tracked_data_plane_htod_calls: u64,
2891 pub per_candidate_host_round_trips: u64,
2893 pub final_output_rows: usize,
2895 pub final_output_payload_bytes: u64,
2897 pub final_result_row_count_device_reads: u32,
2899 pub final_result_data_plane_dtoh_calls: u64,
2901 pub final_result_data_plane_dtoh_bytes: u64,
2903 pub checked_constraint_relations: usize,
2905 pub violated_constraint_relations: usize,
2907 pub constraint_row_count_device_reads: u32,
2909 pub accepted_world_views: usize,
2911 pub rejected_candidates: usize,
2913 pub know_operator_count: usize,
2915 pub possible_operator_count: usize,
2917 pub not_know_operator_count: usize,
2919 pub not_possible_operator_count: usize,
2921 pub aggregate_kernel_timing: EpistemicGpuKernelTimingTrace,
2923}
2924
2925impl EpistemicGpuBatchExecutionTrace {
2926 pub fn from_component_results(results: &[EpistemicGpuExecutionResult]) -> Self {
2928 Self::try_from_component_results(results)
2929 .expect("epistemic GPU batch trace aggregation overflowed")
2930 }
2931
2932 pub fn try_from_component_results(results: &[EpistemicGpuExecutionResult]) -> Result<Self> {
2935 let component_kernel_timings = results
2936 .iter()
2937 .map(EpistemicGpuExecutionResult::try_aggregate_kernel_timing)
2938 .collect::<Result<Vec<_>>>()?;
2939 let aggregate_kernel_timing = if component_kernel_timings
2940 .iter()
2941 .all(EpistemicGpuKernelTimingTrace::is_recorded)
2942 {
2943 EpistemicGpuKernelTimingTrace::checked_sum(component_kernel_timings)
2944 } else {
2945 Ok(EpistemicGpuKernelTimingTrace::unrecorded())
2946 };
2947 let aggregate_kernel_timing = aggregate_kernel_timing?;
2948
2949 Ok(Self {
2950 component_count: results.len(),
2951 gpu_runtime_component_executions: results.len(),
2952 cpu_recomposition_steps: 0,
2953 cpu_candidate_enumerations: checked_batch_sum_u64(
2954 "cpu_candidate_enumerations",
2955 results
2956 .iter()
2957 .map(|result| u64::from(result.semantic_trace.cpu_candidate_enumerations)),
2958 )?,
2959 cpu_world_view_validations: checked_batch_sum_u64(
2960 "cpu_world_view_validations",
2961 results
2962 .iter()
2963 .map(|result| u64::from(result.semantic_trace.cpu_world_view_validations)),
2964 )?,
2965 cpu_solver_search_fallbacks: checked_batch_sum_u64(
2966 "cpu_solver_search_fallbacks",
2967 results
2968 .iter()
2969 .map(|result| result.prepared.preflight.cpu_fallbacks.solver_search),
2970 )?,
2971 cpu_probability_recomputations: checked_batch_sum_u64(
2972 "cpu_probability_recomputations",
2973 results.iter().map(|result| {
2974 result
2975 .prepared
2976 .preflight
2977 .cpu_fallbacks
2978 .probabilistic_recompute
2979 }),
2980 )?,
2981 tracked_dtoh_calls: checked_batch_sum_u64(
2982 "tracked_dtoh_calls",
2983 results
2984 .iter()
2985 .map(|result| result.transfer_budget.tracked_dtoh_calls),
2986 )?,
2987 tracked_htod_calls: checked_batch_sum_u64(
2988 "tracked_htod_calls",
2989 results
2990 .iter()
2991 .map(|result| result.transfer_budget.tracked_htod_calls),
2992 )?,
2993 tracked_aggregate_htod_calls: checked_batch_sum_u64(
2994 "tracked_aggregate_htod_calls",
2995 results
2996 .iter()
2997 .map(|result| result.transfer_budget.tracked_aggregate_htod_calls),
2998 )?,
2999 tracked_launch_metadata_htod_calls: checked_batch_sum_u64(
3000 "tracked_launch_metadata_htod_calls",
3001 results
3002 .iter()
3003 .map(|result| result.transfer_budget.tracked_launch_metadata_htod_calls),
3004 )?,
3005 tracked_data_plane_htod_calls: checked_batch_sum_u64(
3006 "tracked_data_plane_htod_calls",
3007 results
3008 .iter()
3009 .map(|result| result.transfer_budget.tracked_data_plane_htod_calls),
3010 )?,
3011 per_candidate_host_round_trips: checked_batch_sum_u64(
3012 "per_candidate_host_round_trips",
3013 results
3014 .iter()
3015 .map(|result| result.transfer_budget.per_candidate_host_round_trips),
3016 )?,
3017 final_output_rows: checked_batch_sum_usize(
3018 "final_output_rows",
3019 results
3020 .iter()
3021 .map(|result| result.final_result_transfer.final_output_rows),
3022 )?,
3023 final_output_payload_bytes: checked_batch_sum_u64(
3024 "final_output_payload_bytes",
3025 results
3026 .iter()
3027 .map(|result| result.final_result_transfer.final_output_payload_bytes),
3028 )?,
3029 final_result_row_count_device_reads: checked_batch_sum_u32(
3030 "final_result_row_count_device_reads",
3031 results
3032 .iter()
3033 .map(|result| result.final_result_transfer.row_count_device_reads),
3034 )?,
3035 final_result_data_plane_dtoh_calls: checked_batch_sum_u64(
3036 "final_result_data_plane_dtoh_calls",
3037 results
3038 .iter()
3039 .map(|result| result.final_result_transfer.tracked_data_plane_dtoh_calls),
3040 )?,
3041 final_result_data_plane_dtoh_bytes: checked_batch_sum_u64(
3042 "final_result_data_plane_dtoh_bytes",
3043 results
3044 .iter()
3045 .map(|result| result.final_result_transfer.tracked_data_plane_dtoh_bytes),
3046 )?,
3047 checked_constraint_relations: checked_batch_sum_usize(
3048 "checked_constraint_relations",
3049 results
3050 .iter()
3051 .map(|result| result.constraint_validation.checked_constraint_relations),
3052 )?,
3053 violated_constraint_relations: checked_batch_sum_usize(
3054 "violated_constraint_relations",
3055 results
3056 .iter()
3057 .map(|result| result.constraint_validation.violated_constraint_relations),
3058 )?,
3059 constraint_row_count_device_reads: checked_batch_sum_u32(
3060 "constraint_row_count_device_reads",
3061 results
3062 .iter()
3063 .map(|result| result.constraint_validation.row_count_device_reads),
3064 )?,
3065 accepted_world_views: checked_batch_sum_usize(
3066 "accepted_world_views",
3067 results
3068 .iter()
3069 .map(|result| result.semantic_trace.accepted_world_views),
3070 )?,
3071 rejected_candidates: checked_batch_sum_usize(
3072 "rejected_candidates",
3073 results
3074 .iter()
3075 .map(|result| result.semantic_trace.rejected_candidates),
3076 )?,
3077 know_operator_count: checked_batch_sum_usize(
3078 "know_operator_count",
3079 results
3080 .iter()
3081 .map(|result| result.prepared.preflight.know_operator_count),
3082 )?,
3083 possible_operator_count: checked_batch_sum_usize(
3084 "possible_operator_count",
3085 results
3086 .iter()
3087 .map(|result| result.prepared.preflight.possible_operator_count),
3088 )?,
3089 not_know_operator_count: checked_batch_sum_usize(
3090 "not_know_operator_count",
3091 results
3092 .iter()
3093 .map(|result| result.prepared.preflight.not_know_operator_count),
3094 )?,
3095 not_possible_operator_count: checked_batch_sum_usize(
3096 "not_possible_operator_count",
3097 results
3098 .iter()
3099 .map(|result| result.prepared.preflight.not_possible_operator_count),
3100 )?,
3101 aggregate_kernel_timing,
3102 })
3103 }
3104}
3105
3106fn checked_batch_sum_u64(
3107 counter: &'static str,
3108 values: impl IntoIterator<Item = u64>,
3109) -> Result<u64> {
3110 values.into_iter().try_fold(0u64, |acc, value| {
3111 acc.checked_add(value)
3112 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
3113 construct: "epistemic GPU batch execution trace".to_string(),
3114 context: format!(
3115 "batch counter {counter} overflowed while aggregating component traces: \
3116 acc={acc} next={value}"
3117 ),
3118 })
3119 })
3120}
3121
3122fn checked_batch_sum_u32(
3123 counter: &'static str,
3124 values: impl IntoIterator<Item = u32>,
3125) -> Result<u32> {
3126 values.into_iter().try_fold(0u32, |acc, value| {
3127 acc.checked_add(value)
3128 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
3129 construct: "epistemic GPU batch execution trace".to_string(),
3130 context: format!(
3131 "batch counter {counter} overflowed while aggregating component traces: \
3132 acc={acc} next={value}"
3133 ),
3134 })
3135 })
3136}
3137
3138fn checked_batch_sum_usize(
3139 counter: &'static str,
3140 values: impl IntoIterator<Item = usize>,
3141) -> Result<usize> {
3142 values.into_iter().try_fold(0usize, |acc, value| {
3143 acc.checked_add(value)
3144 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
3145 construct: "epistemic GPU batch execution trace".to_string(),
3146 context: format!(
3147 "batch counter {counter} overflowed while aggregating component traces: \
3148 acc={acc} next={value}"
3149 ),
3150 })
3151 })
3152}
3153
3154pub struct EpistemicGpuBatchExecutionResult {
3156 pub results: Vec<EpistemicGpuExecutionResult>,
3158 pub trace: EpistemicGpuBatchExecutionTrace,
3160}
3161
3162impl EpistemicGpuBatchExecutionResult {
3163 pub fn require_trace_matches_components(&self, construct: &str) -> Result<()> {
3165 if self.results.is_empty() {
3166 return Err(XlogError::UnsupportedEpistemicConstruct {
3167 construct: construct.to_string(),
3168 context: "batch evidence requires at least one GPU component".to_string(),
3169 });
3170 }
3171 let expected = EpistemicGpuBatchExecutionTrace::try_from_component_results(&self.results)?;
3172 if self.trace != expected {
3173 return Err(XlogError::UnsupportedEpistemicConstruct {
3174 construct: construct.to_string(),
3175 context: format!(
3176 "batch aggregate trace does not match component GPU execution results: \
3177 trace_components={}/{} expected_components={}/{} \
3178 trace_final_rows={} expected_final_rows={} trace_dtoh_calls={} \
3179 expected_dtoh_calls={} trace_data_plane_htod_calls={} \
3180 expected_data_plane_htod_calls={} trace_constraint_violations={} \
3181 expected_constraint_violations={} trace_accepted_world_views={} \
3182 expected_accepted_world_views={}",
3183 self.trace.gpu_runtime_component_executions,
3184 self.trace.component_count,
3185 expected.gpu_runtime_component_executions,
3186 expected.component_count,
3187 self.trace.final_output_rows,
3188 expected.final_output_rows,
3189 self.trace.tracked_dtoh_calls,
3190 expected.tracked_dtoh_calls,
3191 self.trace.tracked_data_plane_htod_calls,
3192 expected.tracked_data_plane_htod_calls,
3193 self.trace.violated_constraint_relations,
3194 expected.violated_constraint_relations,
3195 self.trace.accepted_world_views,
3196 expected.accepted_world_views
3197 ),
3198 });
3199 }
3200 if !self.trace.aggregate_kernel_timing.is_recorded() {
3201 return Err(XlogError::UnsupportedEpistemicConstruct {
3202 construct: construct.to_string(),
3203 context: "batch GPU execution did not record aggregate CUDA-event timing"
3204 .to_string(),
3205 });
3206 }
3207 Ok(())
3208 }
3209}
3210
3211impl EpistemicGpuRuntimeWcojCertification {
3212 pub fn for_preflight_and_delta(
3214 preflight: &EpistemicGpuRuntimePreflight,
3215 delta: &EpistemicGpuRuntimeCounters,
3216 ) -> Self {
3217 Self::try_for_preflight_and_delta(preflight, delta)
3218 .expect("runtime WCOJ certification counters must not overflow")
3219 }
3220
3221 pub fn try_for_preflight_and_delta(
3224 preflight: &EpistemicGpuRuntimePreflight,
3225 delta: &EpistemicGpuRuntimeCounters,
3226 ) -> Result<Self> {
3227 let observed_wcoj_dispatches = delta.checked_wcoj_dispatch_count()?;
3228 let observed_kclique_dispatches = delta.checked_wcoj_clique_dispatch_count()?;
3229 let wcoj_routed_reduction_count = preflight
3230 .multiway_reduction_count
3231 .checked_sub(preflight.planned_hash_route_count)
3232 .and_then(|count| count.checked_sub(preflight.free_join_route_count))
3237 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
3238 construct: "epistemic GPU WCOJ route certification".to_string(),
3239 context: format!(
3240 "planned hash + free-join routes exceed observed route obligations: \
3241 multiway_reductions={} planned_hash_routes={} free_join_routes={}",
3242 preflight.multiway_reduction_count,
3243 preflight.planned_hash_route_count,
3244 preflight.free_join_route_count
3245 ),
3246 })?;
3247 let required_multiway_reductions = wcoj_routed_reduction_count;
3248
3249 if required_multiway_reductions == 0 {
3250 return Ok(Self::NotRequired {
3251 observed_wcoj_dispatches,
3252 planned_hash_routes: preflight.planned_hash_route_count,
3253 planned_hash_planner_wins: preflight.planned_hash_planner_wins_count,
3254 planned_hash_incomplete_stats: preflight.planned_hash_incomplete_stats_count,
3255 planned_hash_cost_evidence: preflight.planned_hash_cost_evidence_count,
3256 });
3257 }
3258
3259 if observed_wcoj_dispatches < required_multiway_reductions as u64
3260 || observed_kclique_dispatches < preflight.kclique_wcoj_plan_count as u64
3261 || delta.wcoj_triangle_dispatch_count < preflight.wcoj_triangle_route_count as u64
3262 || delta.wcoj_4cycle_dispatch_count < preflight.wcoj_4cycle_route_count as u64
3263 || delta.wcoj_clique5_dispatch_count
3264 < preflight.kclique_wcoj_plan_count_by_arity[0] as u64
3265 || delta.wcoj_clique6_dispatch_count
3266 < preflight.kclique_wcoj_plan_count_by_arity[1] as u64
3267 || delta.wcoj_clique7_dispatch_count
3268 < preflight.kclique_wcoj_plan_count_by_arity[2] as u64
3269 || delta.wcoj_clique8_dispatch_count
3270 < preflight.kclique_wcoj_plan_count_by_arity[3] as u64
3271 {
3272 return Ok(Self::MissingRequiredWcojDispatch {
3273 required_multiway_reductions,
3274 required_kclique_plans: preflight.kclique_wcoj_plan_count,
3275 observed_wcoj_dispatches,
3276 observed_kclique_dispatches,
3277 });
3278 }
3279
3280 let observed_layout_events = EpistemicGpuRuntimeCounters::checked_counter_sum(
3281 "wcoj_layout_events",
3282 &[
3283 delta.wcoj_layout_sort_invocation_count,
3284 delta.wcoj_layout_fast_path_hit_count,
3285 ],
3286 )?;
3287 if observed_layout_events < preflight.sorted_layout_requirement_count as u64 {
3288 return Ok(Self::MissingRequiredWcojLayout {
3289 required_sorted_layouts: preflight.sorted_layout_requirement_count,
3290 observed_layout_events,
3291 });
3292 }
3293
3294 if preflight.kclique_wcoj_plan_count > 0
3295 && (delta.kclique_metadata_build_count < preflight.kclique_wcoj_plan_count as u64
3296 || delta.kclique_metadata_build_nanos == 0)
3297 {
3298 return Ok(Self::MissingRequiredKcliqueMetadata {
3299 required_kclique_plans: preflight.kclique_wcoj_plan_count,
3300 observed_metadata_builds: delta.kclique_metadata_build_count,
3301 observed_metadata_build_nanos: delta.kclique_metadata_build_nanos,
3302 });
3303 }
3304
3305 Ok(Self::Certified {
3306 observed_wcoj_dispatches,
3307 certified_multiway_reductions: required_multiway_reductions,
3308 observed_kclique_dispatches,
3309 certified_edge_permutation_slots: preflight.kclique_wcoj_edge_permutation_count,
3310 certified_stream_groups: preflight.kclique_stream_group_count,
3311 certified_skew_scheduled_plans: preflight.kclique_skew_scheduled_plan_count,
3312 certified_sorted_layout_requirements: preflight.sorted_layout_requirement_count,
3313 certified_helper_split_specs: preflight.helper_split_spec_count,
3314 certified_helper_relation_rules: preflight.helper_relation_rule_count,
3315 certified_helper_relation_scans: preflight.helper_relation_scan_count,
3316 observed_layout_sorts: delta.wcoj_layout_sort_invocation_count,
3317 observed_layout_fast_path_hits: delta.wcoj_layout_fast_path_hit_count,
3318 observed_metadata_builds: delta.kclique_metadata_build_count,
3319 observed_metadata_build_nanos: delta.kclique_metadata_build_nanos,
3320 observed_histogram_refreshes: delta.kclique_histogram_refresh_count,
3321 observed_histogram_refresh_nanos: delta.kclique_histogram_refresh_nanos,
3322 })
3323 }
3324}
3325
3326#[allow(clippy::large_enum_variant)]
3327enum TupleSourceLaunch<'a> {
3328 ArityZero {
3329 literal_index: u32,
3330 reduction_index: u32,
3331 negated: u8,
3332 row_count: &'a TrackedCudaSlice<u32>,
3333 },
3334 ArityOne {
3335 literal_index: u32,
3336 reduction_index: u32,
3337 negated: u8,
3338 row_count: &'a TrackedCudaSlice<u32>,
3339 key_col0: &'a CudaColumn,
3340 key_col0_width: u32,
3341 expected_key_col0_bits: u64,
3342 expected_key_col0_type_code: u8,
3343 },
3344 ArityTwo {
3345 literal_index: u32,
3346 reduction_index: u32,
3347 negated: u8,
3348 row_count: &'a TrackedCudaSlice<u32>,
3349 key_col0: &'a CudaColumn,
3350 key_col0_width: u32,
3351 expected_key_col0_bits: u64,
3352 expected_key_col0_type_code: u8,
3353 key_col1: &'a CudaColumn,
3354 key_col1_width: u32,
3355 expected_key_col1_bits: u64,
3356 expected_key_col1_type_code: u8,
3357 },
3358 ArityThree {
3359 literal_index: u32,
3360 reduction_index: u32,
3361 negated: u8,
3362 row_count: &'a TrackedCudaSlice<u32>,
3363 key_col0: &'a CudaColumn,
3364 key_col0_width: u32,
3365 expected_key_col0_bits: u64,
3366 expected_key_col0_type_code: u8,
3367 key_col1: &'a CudaColumn,
3368 key_col1_width: u32,
3369 expected_key_col1_bits: u64,
3370 expected_key_col1_type_code: u8,
3371 key_col2: &'a CudaColumn,
3372 key_col2_width: u32,
3373 expected_key_col2_bits: u64,
3374 expected_key_col2_type_code: u8,
3375 },
3376 ArityN {
3377 literal_index: u32,
3378 reduction_index: u32,
3379 negated: u8,
3380 row_count: &'a TrackedCudaSlice<u32>,
3381 bound_value_row_count: &'a TrackedCudaSlice<u32>,
3382 key_col_count: u32,
3383 key_col_ptrs: TrackedCudaSlice<u64>,
3384 key_col_widths: TrackedCudaSlice<u32>,
3385 expected_key_bits: TrackedCudaSlice<u64>,
3386 expected_key_type_codes: TrackedCudaSlice<u8>,
3387 tuple_key_match_modes: TrackedCudaSlice<u8>,
3388 bound_value_col_ptrs: TrackedCudaSlice<u64>,
3389 bound_value_col_widths: TrackedCudaSlice<u32>,
3390 has_bound_value_keys: u8,
3391 },
3392}
3393
3394const TUPLE_KEY_MATCH_MODE_GROUND: u8 = 0;
3395const TUPLE_KEY_MATCH_MODE_BOUND_OUTPUT: u8 = 1;
3396const TUPLE_KEY_MATCH_MODE_WILDCARD: u8 = 2;
3398
3399#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3400struct TupleKeyExpectation {
3401 bits: u64,
3402 type_code: u8,
3403}
3404
3405impl TupleKeyExpectation {
3406 fn from_term(term: &EirTerm, column_type: ScalarType) -> Result<Self> {
3407 let bits = match (term, column_type) {
3408 (EirTerm::Integer(value), ScalarType::U32) => {
3409 u32::try_from(*value).map(u64::from).map_err(|_| {
3410 tuple_key_expectation_error(format!(
3411 "integer {value} is out of range for U32 tuple-key column"
3412 ))
3413 })?
3414 }
3415 (EirTerm::Integer(value), ScalarType::I32) => i32::try_from(*value)
3416 .map(|v| v as u32 as u64)
3417 .map_err(|_| {
3418 tuple_key_expectation_error(format!(
3419 "integer {value} is out of range for I32 tuple-key column"
3420 ))
3421 })?,
3422 (EirTerm::Integer(value), ScalarType::U64) => u64::try_from(*value).map_err(|_| {
3423 tuple_key_expectation_error(format!(
3424 "integer {value} is out of range for U64 tuple-key column"
3425 ))
3426 })?,
3427 (EirTerm::Integer(value), ScalarType::I64) => *value as u64,
3428 (EirTerm::Integer(value), ScalarType::Bool) => match *value {
3429 0 => 0,
3430 1 => 1,
3431 _ => {
3432 return Err(tuple_key_expectation_error(format!(
3433 "integer {value} is out of range for Bool tuple-key column"
3434 )))
3435 }
3436 },
3437 (EirTerm::Symbol(value), ScalarType::Symbol) => u64::from(*value),
3438 (EirTerm::String(value), ScalarType::Symbol) => {
3439 u64::from(xlog_core::symbol::intern(value))
3440 }
3441 (EirTerm::FloatBits(bits), ScalarType::F64) => *bits,
3442 (EirTerm::FloatBits(bits), ScalarType::F32) => {
3443 (f64::from_bits(*bits) as f32).to_bits() as u64
3444 }
3445 (EirTerm::Variable(_), _) => {
3446 return Err(tuple_key_expectation_error(format!(
3447 "term {term:?} cannot be encoded as a ground tuple-key expectation"
3448 )))
3449 }
3450 (
3451 EirTerm::Anonymous
3452 | EirTerm::List(_)
3453 | EirTerm::Cons { .. }
3454 | EirTerm::Compound { .. }
3455 | EirTerm::PredRef(_)
3456 | EirTerm::Aggregate { .. },
3457 _,
3458 ) => {
3459 return Err(tuple_key_expectation_error(format!(
3460 "term {term:?} cannot be used for GPU tuple-key matching"
3461 )))
3462 }
3463 _ => {
3464 return Err(tuple_key_expectation_error(format!(
3465 "term {term:?} cannot be encoded for {column_type:?} tuple-key column"
3466 )))
3467 }
3468 };
3469
3470 Ok(Self {
3471 bits,
3472 type_code: column_type.to_code(),
3473 })
3474 }
3475}
3476
3477fn tuple_key_expectation_error(context: String) -> XlogError {
3478 XlogError::UnsupportedEpistemicConstruct {
3479 construct: "epistemic GPU tuple-key expectation".to_string(),
3480 context,
3481 }
3482}
3483
3484impl Executor {
3485 fn resolve_modal_tuple_source(&self, predicate: &str, arity: usize) -> Option<&CudaBuffer> {
3504 let qualified = format!("{predicate}/{arity}");
3505 self.store()
3506 .get(qualified.as_str())
3507 .or_else(|| self.store().get(predicate))
3508 }
3509
3510 pub fn epistemic_gpu_runtime_counters(&self) -> EpistemicGpuRuntimeCounters {
3512 EpistemicGpuRuntimeCounters {
3513 wcoj_triangle_dispatch_count: self.wcoj_triangle_dispatch_count(),
3514 wcoj_4cycle_dispatch_count: self.wcoj_4cycle_dispatch_count(),
3515 chain_dispatch_count: self.chain_dispatch_count(),
3516 wcoj_clique5_dispatch_count: self.wcoj_clique5_dispatch_count(),
3517 wcoj_clique6_dispatch_count: self.wcoj_clique6_dispatch_count(),
3518 wcoj_clique7_dispatch_count: self.wcoj_clique7_dispatch_count(),
3519 wcoj_clique8_dispatch_count: self.wcoj_clique8_dispatch_count(),
3520 free_join_dispatch_count: self.free_join_dispatch_count(),
3521 factorized_delta_dispatch_count: self.factorized_delta_dispatch_count(),
3522 provider_wcoj_triangle_hg_dispatch_count: self
3523 .provider
3524 .wcoj_triangle_hg_dispatch_count(),
3525 wcoj_layout_sort_invocation_count: self.provider.wcoj_layout_sort_invocation_count(),
3526 wcoj_layout_fast_path_hit_count: self.provider.wcoj_layout_fast_path_hit_count(),
3527 kclique_metadata_build_count: self.provider.kclique_metadata_build_count(),
3528 kclique_metadata_build_nanos: self.provider.kclique_metadata_build_nanos(),
3529 kclique_histogram_refresh_count: self.kclique_histogram_refresh_count(),
3530 kclique_histogram_refresh_nanos: self.kclique_histogram_refresh_nanos(),
3531 }
3532 }
3533
3534 fn time_epistemic_gpu_kernel_launch(
3535 &self,
3536 operation: &str,
3537 launch: impl FnOnce() -> std::result::Result<(), DriverError>,
3538 ) -> Result<EpistemicGpuKernelTimingTrace> {
3539 let stream = self.provider.device().inner().stream().clone();
3540 let start = stream
3541 .record_event(Some(sys::CUevent_flags::CU_EVENT_DEFAULT))
3542 .map_err(|e| XlogError::execution_ctx(operation, "record start timing event", &e))?;
3543 launch().map_err(|e| XlogError::execution_ctx(operation, "launch kernel", &e))?;
3544 let end = stream
3545 .record_event(Some(sys::CUevent_flags::CU_EVENT_DEFAULT))
3546 .map_err(|e| XlogError::execution_ctx(operation, "record end timing event", &e))?;
3547 let elapsed_ms = start
3548 .elapsed_ms(&end)
3549 .map_err(|e| XlogError::execution_ctx(operation, "measure CUDA event elapsed", &e))?;
3550
3551 EpistemicGpuKernelTimingTrace::from_cuda_elapsed_ms(elapsed_ms)
3552 }
3553
3554 pub fn allocate_epistemic_gpu_workspace(
3556 &self,
3557 plan: &EpistemicGpuPlan,
3558 capacities: EpistemicGpuWorkspaceCapacities,
3559 ) -> Result<EpistemicGpuWorkspace> {
3560 let layout = EpistemicGpuWorkspaceLayout::for_plan(plan, capacities)?;
3561 let memory = self.provider.memory();
3562
3563 Ok(EpistemicGpuWorkspace {
3564 layout,
3565 candidate_assumptions: memory.alloc::<u8>(layout.candidate_assumption_bytes)?,
3566 world_views: memory.alloc::<u8>(layout.world_view_bytes)?,
3567 model_membership: memory.alloc::<u8>(layout.model_membership_bytes)?,
3568 rejection_reasons: memory.alloc::<u32>(layout.rejection_reason_slots)?,
3569 constraint_violation_index: memory.alloc::<u32>(layout.rejection_reason_slots)?,
3570 })
3571 }
3572
3573 pub fn reset_epistemic_gpu_workspace(
3575 &self,
3576 workspace: &mut EpistemicGpuWorkspace,
3577 ) -> Result<EpistemicGpuWorkspaceResetTrace> {
3578 let device = self.provider.device().inner();
3579
3580 device
3581 .memset_zeros(&mut workspace.candidate_assumptions)
3582 .map_err(|e| {
3583 XlogError::execution_ctx(
3584 "epistemic GPU workspace reset",
3585 "candidate assumptions memset",
3586 &e,
3587 )
3588 })?;
3589 device
3590 .memset_zeros(&mut workspace.world_views)
3591 .map_err(|e| {
3592 XlogError::execution_ctx("epistemic GPU workspace reset", "world views memset", &e)
3593 })?;
3594 device
3595 .memset_zeros(&mut workspace.model_membership)
3596 .map_err(|e| {
3597 XlogError::execution_ctx(
3598 "epistemic GPU workspace reset",
3599 "model membership memset",
3600 &e,
3601 )
3602 })?;
3603 device
3604 .memset_zeros(&mut workspace.rejection_reasons)
3605 .map_err(|e| {
3606 XlogError::execution_ctx(
3607 "epistemic GPU workspace reset",
3608 "rejection reasons memset",
3609 &e,
3610 )
3611 })?;
3612
3613 EpistemicGpuWorkspaceResetTrace::try_for_layout(workspace.layout)
3614 }
3615
3616 pub fn generate_epistemic_gpu_candidates(
3618 &self,
3619 workspace: &mut EpistemicGpuWorkspace,
3620 literal_count: usize,
3621 candidate_count: usize,
3622 ) -> Result<EpistemicGpuCandidateGenerationTrace> {
3623 let trace =
3624 EpistemicGpuCandidateGenerationTrace::for_counts(literal_count, candidate_count)?;
3625 if trace.candidate_assumption_bytes > workspace.layout.candidate_assumption_bytes {
3626 return Err(XlogError::ResourceExhausted {
3627 context: "epistemic GPU candidate assumption workspace".to_string(),
3628 estimated_bytes: trace.candidate_assumption_bytes as u64,
3629 budget_bytes: workspace.layout.candidate_assumption_bytes as u64,
3630 });
3631 }
3632 if trace.candidate_assumption_bytes > u32::MAX as usize {
3633 return Err(XlogError::ResourceExhausted {
3634 context: "epistemic GPU candidate generation launch".to_string(),
3635 estimated_bytes: trace.candidate_assumption_bytes as u64,
3636 budget_bytes: u32::MAX as u64,
3637 });
3638 }
3639
3640 let literal_count =
3641 checked_u32_dimension(literal_count, "epistemic GPU candidate generation literals")?;
3642 let candidate_count = checked_u32_dimension(
3643 candidate_count,
3644 "epistemic GPU candidate generation candidates",
3645 )?;
3646 let total = checked_u32_dimension(
3647 trace.candidate_assumption_bytes,
3648 "epistemic GPU candidate generation launch elements",
3649 )?;
3650 let func = self
3651 .provider
3652 .device()
3653 .inner()
3654 .get_func(
3655 EPISTEMIC_MODULE,
3656 epistemic_kernels::EPISTEMIC_GENERATE_CANDIDATE_ASSUMPTIONS_U8,
3657 )
3658 .ok_or_else(|| {
3659 XlogError::Execution("epistemic candidate generation kernel not found".to_string())
3660 })?;
3661 let config = LaunchConfig::for_num_elems(total);
3662
3663 let kernel_timing = self.time_epistemic_gpu_kernel_launch(
3664 "epistemic GPU candidate generation",
3665 || unsafe {
3666 func.clone().launch(
3669 config,
3670 (
3671 literal_count,
3672 candidate_count,
3673 &mut workspace.candidate_assumptions,
3674 ),
3675 )
3676 },
3677 )?;
3678
3679 Ok(trace.with_kernel_timing(kernel_timing))
3680 }
3681
3682 pub fn propagate_epistemic_gpu_candidates(
3684 &self,
3685 workspace: &mut EpistemicGpuWorkspace,
3686 literal_count: usize,
3687 candidate_count: usize,
3688 ) -> Result<EpistemicGpuPropagationTrace> {
3689 let mut trace = EpistemicGpuPropagationTrace::for_counts(literal_count, candidate_count)?;
3690 let candidate_assumption_bytes = checked_product(literal_count, candidate_count)?;
3691 if candidate_assumption_bytes > workspace.layout.candidate_assumption_bytes {
3692 return Err(XlogError::ResourceExhausted {
3693 context: "epistemic GPU propagation candidate workspace".to_string(),
3694 estimated_bytes: candidate_assumption_bytes as u64,
3695 budget_bytes: workspace.layout.candidate_assumption_bytes as u64,
3696 });
3697 }
3698 if trace.rejection_reason_slots_written > workspace.layout.rejection_reason_slots {
3699 return Err(XlogError::ResourceExhausted {
3700 context: "epistemic GPU propagation rejection workspace".to_string(),
3701 estimated_bytes: trace.rejection_reason_slots_written as u64,
3702 budget_bytes: workspace.layout.rejection_reason_slots as u64,
3703 });
3704 }
3705 if literal_count > u32::MAX as usize || candidate_count > u32::MAX as usize {
3706 return Err(XlogError::ResourceExhausted {
3707 context: "epistemic GPU propagation launch".to_string(),
3708 estimated_bytes: literal_count.max(candidate_count) as u64,
3709 budget_bytes: u32::MAX as u64,
3710 });
3711 }
3712
3713 let world_stride =
3714 workspace.layout.world_view_bytes / workspace.layout.rejection_reason_slots;
3715 if world_stride == 0 || world_stride > u32::MAX as usize {
3716 return Err(XlogError::ResourceExhausted {
3717 context: "epistemic GPU propagation world stride".to_string(),
3718 estimated_bytes: world_stride as u64,
3719 budget_bytes: u32::MAX as u64,
3720 });
3721 }
3722 let world_view_bitset_bytes_per_candidate =
3723 world_view_bitset_bytes_per_candidate(literal_count)?;
3724 if world_view_bitset_bytes_per_candidate > world_stride {
3725 return Err(XlogError::ResourceExhausted {
3726 context: "epistemic GPU propagation world-view bitset stride".to_string(),
3727 estimated_bytes: world_view_bitset_bytes_per_candidate as u64,
3728 budget_bytes: world_stride as u64,
3729 });
3730 }
3731 let world_view_bitset_bytes =
3732 checked_product(world_view_bitset_bytes_per_candidate, candidate_count)?;
3733 if world_view_bitset_bytes > workspace.layout.world_view_bytes {
3734 return Err(XlogError::ResourceExhausted {
3735 context: "epistemic GPU propagation world-view bitsets".to_string(),
3736 estimated_bytes: world_view_bitset_bytes as u64,
3737 budget_bytes: workspace.layout.world_view_bytes as u64,
3738 });
3739 }
3740 trace.world_view_bytes_written = checked_product(world_stride, candidate_count)?;
3741 if trace.world_view_bytes_written > workspace.layout.world_view_bytes {
3742 return Err(XlogError::ResourceExhausted {
3743 context: "epistemic GPU propagation world-view workspace".to_string(),
3744 estimated_bytes: trace.world_view_bytes_written as u64,
3745 budget_bytes: workspace.layout.world_view_bytes as u64,
3746 });
3747 }
3748
3749 let literal_count =
3750 checked_u32_dimension(literal_count, "epistemic GPU propagation literals")?;
3751 let candidate_count =
3752 checked_u32_dimension(candidate_count, "epistemic GPU propagation candidates")?;
3753 let world_stride =
3754 checked_u32_dimension(world_stride, "epistemic GPU propagation world stride")?;
3755 let func = self
3756 .provider
3757 .device()
3758 .inner()
3759 .get_func(
3760 EPISTEMIC_MODULE,
3761 epistemic_kernels::EPISTEMIC_PROPAGATE_CANDIDATES_U8,
3762 )
3763 .ok_or_else(|| {
3764 XlogError::Execution("epistemic candidate propagation kernel not found".to_string())
3765 })?;
3766 let config = LaunchConfig::for_num_elems(candidate_count);
3767
3768 let kernel_timing = self.time_epistemic_gpu_kernel_launch(
3769 "epistemic GPU candidate propagation",
3770 || unsafe {
3771 func.clone().launch(
3774 config,
3775 (
3776 literal_count,
3777 candidate_count,
3778 world_stride,
3779 &workspace.candidate_assumptions,
3780 &mut workspace.world_views,
3781 &mut workspace.rejection_reasons,
3782 ),
3783 )
3784 },
3785 )?;
3786
3787 Ok(trace.with_kernel_timing(kernel_timing))
3788 }
3789
3790 pub fn validate_epistemic_gpu_candidates(
3792 &self,
3793 workspace: &mut EpistemicGpuWorkspace,
3794 literal_count: usize,
3795 candidate_count: usize,
3796 ) -> Result<EpistemicGpuCandidateValidationTrace> {
3797 let mut trace =
3798 EpistemicGpuCandidateValidationTrace::for_counts(literal_count, candidate_count)?;
3799 if trace.candidate_assumption_bytes_checked > workspace.layout.candidate_assumption_bytes {
3800 return Err(XlogError::ResourceExhausted {
3801 context: "epistemic GPU validation candidate workspace".to_string(),
3802 estimated_bytes: trace.candidate_assumption_bytes_checked as u64,
3803 budget_bytes: workspace.layout.candidate_assumption_bytes as u64,
3804 });
3805 }
3806 if trace.rejection_reason_slots_written > workspace.layout.rejection_reason_slots {
3807 return Err(XlogError::ResourceExhausted {
3808 context: "epistemic GPU validation rejection workspace".to_string(),
3809 estimated_bytes: trace.rejection_reason_slots_written as u64,
3810 budget_bytes: workspace.layout.rejection_reason_slots as u64,
3811 });
3812 }
3813 if literal_count > u32::MAX as usize || candidate_count > u32::MAX as usize {
3814 return Err(XlogError::ResourceExhausted {
3815 context: "epistemic GPU validation launch".to_string(),
3816 estimated_bytes: literal_count.max(candidate_count) as u64,
3817 budget_bytes: u32::MAX as u64,
3818 });
3819 }
3820
3821 let world_stride =
3822 workspace.layout.world_view_bytes / workspace.layout.rejection_reason_slots;
3823 if world_stride == 0 || world_stride > u32::MAX as usize {
3824 return Err(XlogError::ResourceExhausted {
3825 context: "epistemic GPU validation world stride".to_string(),
3826 estimated_bytes: world_stride as u64,
3827 budget_bytes: u32::MAX as u64,
3828 });
3829 }
3830 let world_view_bitset_bytes_per_candidate =
3831 world_view_bitset_bytes_per_candidate(literal_count)?;
3832 if world_view_bitset_bytes_per_candidate > world_stride {
3833 return Err(XlogError::ResourceExhausted {
3834 context: "epistemic GPU validation world-view bitset stride".to_string(),
3835 estimated_bytes: world_view_bitset_bytes_per_candidate as u64,
3836 budget_bytes: world_stride as u64,
3837 });
3838 }
3839 let world_view_bitset_bytes =
3840 checked_product(world_view_bitset_bytes_per_candidate, candidate_count)?;
3841 if world_view_bitset_bytes > workspace.layout.world_view_bytes {
3842 return Err(XlogError::ResourceExhausted {
3843 context: "epistemic GPU validation world-view bitsets".to_string(),
3844 estimated_bytes: world_view_bitset_bytes as u64,
3845 budget_bytes: workspace.layout.world_view_bytes as u64,
3846 });
3847 }
3848 trace.world_view_bytes_checked = world_view_bitset_bytes;
3849 if trace.world_view_bytes_checked > workspace.layout.world_view_bytes {
3850 return Err(XlogError::ResourceExhausted {
3851 context: "epistemic GPU validation world-view workspace".to_string(),
3852 estimated_bytes: trace.world_view_bytes_checked as u64,
3853 budget_bytes: workspace.layout.world_view_bytes as u64,
3854 });
3855 }
3856
3857 let literal_count =
3858 checked_u32_dimension(literal_count, "epistemic GPU validation literals")?;
3859 let candidate_count =
3860 checked_u32_dimension(candidate_count, "epistemic GPU validation candidates")?;
3861 let world_stride =
3862 checked_u32_dimension(world_stride, "epistemic GPU validation world stride")?;
3863 let func = self
3864 .provider
3865 .device()
3866 .inner()
3867 .get_func(
3868 EPISTEMIC_MODULE,
3869 epistemic_kernels::EPISTEMIC_VALIDATE_CANDIDATE_BITS_U8,
3870 )
3871 .ok_or_else(|| {
3872 XlogError::Execution("epistemic candidate validation kernel not found".to_string())
3873 })?;
3874 let config = LaunchConfig::for_num_elems(candidate_count);
3875
3876 let kernel_timing = self.time_epistemic_gpu_kernel_launch(
3877 "epistemic GPU candidate validation",
3878 || unsafe {
3879 func.clone().launch(
3882 config,
3883 (
3884 literal_count,
3885 candidate_count,
3886 world_stride,
3887 &workspace.candidate_assumptions,
3888 &workspace.world_views,
3889 &mut workspace.rejection_reasons,
3890 ),
3891 )
3892 },
3893 )?;
3894
3895 Ok(trace.with_kernel_timing(kernel_timing))
3896 }
3897
3898 pub fn populate_epistemic_gpu_model_membership(
3900 &self,
3901 workspace: &mut EpistemicGpuWorkspace,
3902 output: &CudaBuffer,
3903 literal_count: usize,
3904 candidate_count: usize,
3905 reduction_count: usize,
3906 models_per_reduction: usize,
3907 ) -> Result<EpistemicGpuModelMembershipTrace> {
3908 let trace = EpistemicGpuModelMembershipTrace::for_counts(
3909 literal_count,
3910 candidate_count,
3911 reduction_count,
3912 models_per_reduction,
3913 )?;
3914 let candidate_assumption_bytes = checked_product(literal_count, candidate_count)?;
3915 if candidate_assumption_bytes > workspace.layout.candidate_assumption_bytes {
3916 return Err(XlogError::ResourceExhausted {
3917 context: "epistemic GPU model-membership candidate workspace".to_string(),
3918 estimated_bytes: candidate_assumption_bytes as u64,
3919 budget_bytes: workspace.layout.candidate_assumption_bytes as u64,
3920 });
3921 }
3922 if candidate_count > workspace.layout.world_view_bytes {
3923 return Err(XlogError::ResourceExhausted {
3924 context: "epistemic GPU model-membership world-view workspace".to_string(),
3925 estimated_bytes: candidate_count as u64,
3926 budget_bytes: workspace.layout.world_view_bytes as u64,
3927 });
3928 }
3929 if trace.model_membership_bytes_written > workspace.layout.model_membership_bytes {
3930 return Err(XlogError::ResourceExhausted {
3931 context: "epistemic GPU model-membership workspace".to_string(),
3932 estimated_bytes: trace.model_membership_bytes_written as u64,
3933 budget_bytes: workspace.layout.model_membership_bytes as u64,
3934 });
3935 }
3936 if trace.rejection_reason_slots_checked > workspace.layout.rejection_reason_slots {
3937 return Err(XlogError::ResourceExhausted {
3938 context: "epistemic GPU model-membership rejection workspace".to_string(),
3939 estimated_bytes: trace.rejection_reason_slots_checked as u64,
3940 budget_bytes: workspace.layout.rejection_reason_slots as u64,
3941 });
3942 }
3943 if trace.model_membership_bytes_written > u32::MAX as usize {
3944 return Err(XlogError::ResourceExhausted {
3945 context: "epistemic GPU model-membership launch".to_string(),
3946 estimated_bytes: trace.model_membership_bytes_written as u64,
3947 budget_bytes: u32::MAX as u64,
3948 });
3949 }
3950 if literal_count > u32::MAX as usize
3951 || candidate_count > u32::MAX as usize
3952 || reduction_count > u32::MAX as usize
3953 || models_per_reduction > u32::MAX as usize
3954 {
3955 return Err(XlogError::ResourceExhausted {
3956 context: "epistemic GPU model-membership dimensions".to_string(),
3957 estimated_bytes: literal_count
3958 .max(candidate_count)
3959 .max(reduction_count)
3960 .max(models_per_reduction) as u64,
3961 budget_bytes: u32::MAX as u64,
3962 });
3963 }
3964
3965 let world_stride =
3966 workspace.layout.world_view_bytes / workspace.layout.rejection_reason_slots;
3967 if world_stride == 0 || world_stride > u32::MAX as usize {
3968 return Err(XlogError::ResourceExhausted {
3969 context: "epistemic GPU model-membership world stride".to_string(),
3970 estimated_bytes: world_stride as u64,
3971 budget_bytes: u32::MAX as u64,
3972 });
3973 }
3974
3975 let literal_count = literal_count as u32;
3976 let candidate_count = candidate_count as u32;
3977 let reduction_count = reduction_count as u32;
3978 let models_per_reduction = models_per_reduction as u32;
3979 let world_stride = world_stride as u32;
3980 let total = trace.model_membership_bytes_written as u32;
3981 let func = self
3982 .provider
3983 .device()
3984 .inner()
3985 .get_func(
3986 EPISTEMIC_MODULE,
3987 epistemic_kernels::EPISTEMIC_POPULATE_MODEL_MEMBERSHIP_U8,
3988 )
3989 .ok_or_else(|| {
3990 XlogError::Execution("epistemic model-membership kernel not found".to_string())
3991 })?;
3992 let config = LaunchConfig::for_num_elems(total);
3993
3994 let kernel_timing =
3995 self.time_epistemic_gpu_kernel_launch("epistemic GPU model membership", || unsafe {
3996 func.clone().launch(
4000 config,
4001 (
4002 literal_count,
4003 candidate_count,
4004 reduction_count,
4005 models_per_reduction,
4006 world_stride,
4007 output.num_rows_device(),
4008 &workspace.candidate_assumptions,
4009 &workspace.world_views,
4010 &mut workspace.model_membership,
4011 &mut workspace.rejection_reasons,
4012 ),
4013 )
4014 })?;
4015
4016 Ok(trace.with_kernel_timing(kernel_timing))
4017 }
4018
4019 pub fn populate_epistemic_gpu_model_membership_from_tuple_sources(
4021 &self,
4022 workspace: &mut EpistemicGpuWorkspace,
4023 output: &CudaBuffer,
4024 gpu_plan: &EpistemicGpuPlan,
4025 candidate_count: usize,
4026 models_per_reduction: usize,
4027 ) -> Result<EpistemicGpuModelMembershipTrace> {
4028 gpu_plan.validate_tuple_membership_bindings()?;
4029
4030 let literal_count = gpu_plan.epistemic_literals.len();
4031 let reduction_count = gpu_plan.reductions.len();
4032 let tuple_source_key_column_count = gpu_plan
4033 .tuple_membership_bindings
4034 .iter()
4035 .try_fold(0usize, |acc, binding| {
4036 checked_sum(acc, binding.key_columns.len())
4037 })?;
4038 let mut trace =
4039 EpistemicGpuModelMembershipTrace::for_stable_model_tuple_sources_with_key_columns(
4040 literal_count,
4041 candidate_count,
4042 reduction_count,
4043 models_per_reduction,
4044 gpu_plan.tuple_membership_bindings.len(),
4045 tuple_source_key_column_count,
4046 )?;
4047 trace.output_row_count_device_reads = trace.kernel_launches;
4048 let candidate_assumption_bytes = checked_product(literal_count, candidate_count)?;
4049 if candidate_assumption_bytes > workspace.layout.candidate_assumption_bytes {
4050 return Err(XlogError::ResourceExhausted {
4051 context: "epistemic GPU model-membership candidate workspace".to_string(),
4052 estimated_bytes: candidate_assumption_bytes as u64,
4053 budget_bytes: workspace.layout.candidate_assumption_bytes as u64,
4054 });
4055 }
4056 if candidate_count > workspace.layout.world_view_bytes {
4057 return Err(XlogError::ResourceExhausted {
4058 context: "epistemic GPU model-membership world-view workspace".to_string(),
4059 estimated_bytes: candidate_count as u64,
4060 budget_bytes: workspace.layout.world_view_bytes as u64,
4061 });
4062 }
4063 if trace.model_membership_bytes_written > workspace.layout.model_membership_bytes {
4064 return Err(XlogError::ResourceExhausted {
4065 context: "epistemic GPU model-membership workspace".to_string(),
4066 estimated_bytes: trace.model_membership_bytes_written as u64,
4067 budget_bytes: workspace.layout.model_membership_bytes as u64,
4068 });
4069 }
4070 if trace.rejection_reason_slots_checked > workspace.layout.rejection_reason_slots {
4071 return Err(XlogError::ResourceExhausted {
4072 context: "epistemic GPU model-membership rejection workspace".to_string(),
4073 estimated_bytes: trace.rejection_reason_slots_checked as u64,
4074 budget_bytes: workspace.layout.rejection_reason_slots as u64,
4075 });
4076 }
4077 if trace.model_membership_bytes_written > u32::MAX as usize {
4078 return Err(XlogError::ResourceExhausted {
4079 context: "epistemic GPU model-membership launch".to_string(),
4080 estimated_bytes: trace.model_membership_bytes_written as u64,
4081 budget_bytes: u32::MAX as u64,
4082 });
4083 }
4084 if literal_count > u32::MAX as usize
4085 || candidate_count > u32::MAX as usize
4086 || reduction_count > u32::MAX as usize
4087 || models_per_reduction > u32::MAX as usize
4088 {
4089 return Err(XlogError::ResourceExhausted {
4090 context: "epistemic GPU model-membership dimensions".to_string(),
4091 estimated_bytes: literal_count
4092 .max(candidate_count)
4093 .max(reduction_count)
4094 .max(models_per_reduction) as u64,
4095 budget_bytes: u32::MAX as u64,
4096 });
4097 }
4098
4099 let world_stride =
4100 workspace.layout.world_view_bytes / workspace.layout.rejection_reason_slots;
4101 if world_stride == 0 || world_stride > u32::MAX as usize {
4102 return Err(XlogError::ResourceExhausted {
4103 context: "epistemic GPU model-membership world stride".to_string(),
4104 estimated_bytes: world_stride as u64,
4105 budget_bytes: u32::MAX as u64,
4106 });
4107 }
4108
4109 let per_binding_launch_elems = checked_product(candidate_count, models_per_reduction)?;
4110 if per_binding_launch_elems > u32::MAX as usize {
4111 return Err(XlogError::ResourceExhausted {
4112 context: "epistemic GPU model-membership tuple-source launch".to_string(),
4113 estimated_bytes: per_binding_launch_elems as u64,
4114 budget_bytes: u32::MAX as u64,
4115 });
4116 }
4117
4118 let mut tuple_sources = Vec::with_capacity(gpu_plan.tuple_membership_bindings.len());
4119 for binding in &gpu_plan.tuple_membership_bindings {
4120 let source_relation = self
4121 .resolve_modal_tuple_source(binding.predicate.as_str(), binding.arity)
4122 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
4123 construct: "epistemic GPU stable-model tuple membership".to_string(),
4124 context: format!(
4125 "missing reduced stable-model tuple source relation {} (arity {})",
4126 binding.predicate, binding.arity
4127 ),
4128 })?;
4129 if source_relation.arity() != binding.arity {
4130 return Err(XlogError::UnsupportedEpistemicConstruct {
4131 construct: "epistemic GPU stable-model tuple membership".to_string(),
4132 context: format!(
4133 "tuple source relation {} arity {} does not match binding arity {}",
4134 binding.predicate,
4135 source_relation.arity(),
4136 binding.arity
4137 ),
4138 });
4139 }
4140 let has_bound_value_keys = binding
4141 .key_terms
4142 .iter()
4143 .any(|term| matches!(term, EirTerm::Variable(_)));
4144 let has_value_level_keys = binding
4149 .key_terms
4150 .iter()
4151 .any(|term| matches!(term, EirTerm::Variable(_) | EirTerm::Anonymous));
4152 match binding.key_columns.as_slice() {
4153 [] => tuple_sources.push(TupleSourceLaunch::ArityZero {
4154 literal_index: binding.literal_index as u32,
4155 reduction_index: binding.reduction_index as u32,
4156 negated: binding.negated as u8,
4157 row_count: source_relation.num_rows_device(),
4158 }),
4159 &[key_col] if !has_value_level_keys => {
4160 let key_col0 = source_relation.column(key_col).ok_or_else(|| {
4161 XlogError::UnsupportedEpistemicConstruct {
4162 construct: "epistemic GPU stable-model tuple membership".to_string(),
4163 context: format!(
4164 "tuple source relation {} missing key column {}",
4165 binding.predicate, key_col
4166 ),
4167 }
4168 })?;
4169 let key_col0_type =
4170 source_relation
4171 .schema()
4172 .column_type(key_col)
4173 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
4174 construct: "epistemic GPU stable-model tuple membership"
4175 .to_string(),
4176 context: format!(
4177 "tuple source relation {} missing schema for key column {}",
4178 binding.predicate, key_col
4179 ),
4180 })?;
4181 let key_col0_width = key_col0_type.size_bytes();
4182 let key_col0_expectation =
4183 TupleKeyExpectation::from_term(&binding.key_terms[0], key_col0_type)?;
4184 if key_col0_width > u32::MAX as usize {
4185 return Err(XlogError::ResourceExhausted {
4186 context: "epistemic GPU tuple-key column width".to_string(),
4187 estimated_bytes: key_col0_width as u64,
4188 budget_bytes: u32::MAX as u64,
4189 });
4190 }
4191 tuple_sources.push(TupleSourceLaunch::ArityOne {
4192 literal_index: binding.literal_index as u32,
4193 reduction_index: binding.reduction_index as u32,
4194 negated: binding.negated as u8,
4195 row_count: source_relation.num_rows_device(),
4196 key_col0,
4197 key_col0_width: key_col0_width as u32,
4198 expected_key_col0_bits: key_col0_expectation.bits,
4199 expected_key_col0_type_code: key_col0_expectation.type_code,
4200 });
4201 }
4202 &[key_col0, key_col1] if !has_value_level_keys => {
4203 let key_col0_ref = source_relation.column(key_col0).ok_or_else(|| {
4204 XlogError::UnsupportedEpistemicConstruct {
4205 construct: "epistemic GPU stable-model tuple membership".to_string(),
4206 context: format!(
4207 "tuple source relation {} missing key column {}",
4208 binding.predicate, key_col0
4209 ),
4210 }
4211 })?;
4212 let key_col1_ref = source_relation.column(key_col1).ok_or_else(|| {
4213 XlogError::UnsupportedEpistemicConstruct {
4214 construct: "epistemic GPU stable-model tuple membership".to_string(),
4215 context: format!(
4216 "tuple source relation {} missing key column {}",
4217 binding.predicate, key_col1
4218 ),
4219 }
4220 })?;
4221 let key_col0_type =
4222 source_relation
4223 .schema()
4224 .column_type(key_col0)
4225 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
4226 construct: "epistemic GPU stable-model tuple membership"
4227 .to_string(),
4228 context: format!(
4229 "tuple source relation {} missing schema for key column {}",
4230 binding.predicate, key_col0
4231 ),
4232 })?;
4233 let key_col1_type =
4234 source_relation
4235 .schema()
4236 .column_type(key_col1)
4237 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
4238 construct: "epistemic GPU stable-model tuple membership"
4239 .to_string(),
4240 context: format!(
4241 "tuple source relation {} missing schema for key column {}",
4242 binding.predicate, key_col1
4243 ),
4244 })?;
4245 let key_col0_width = key_col0_type.size_bytes();
4246 let key_col1_width = key_col1_type.size_bytes();
4247 let key_col0_expectation =
4248 TupleKeyExpectation::from_term(&binding.key_terms[0], key_col0_type)?;
4249 let key_col1_expectation =
4250 TupleKeyExpectation::from_term(&binding.key_terms[1], key_col1_type)?;
4251 let max_width = key_col0_width.max(key_col1_width);
4252 if max_width > u32::MAX as usize {
4253 return Err(XlogError::ResourceExhausted {
4254 context: "epistemic GPU tuple-key column width".to_string(),
4255 estimated_bytes: max_width as u64,
4256 budget_bytes: u32::MAX as u64,
4257 });
4258 }
4259 tuple_sources.push(TupleSourceLaunch::ArityTwo {
4260 literal_index: binding.literal_index as u32,
4261 reduction_index: binding.reduction_index as u32,
4262 negated: binding.negated as u8,
4263 row_count: source_relation.num_rows_device(),
4264 key_col0: key_col0_ref,
4265 key_col0_width: key_col0_width as u32,
4266 expected_key_col0_bits: key_col0_expectation.bits,
4267 expected_key_col0_type_code: key_col0_expectation.type_code,
4268 key_col1: key_col1_ref,
4269 key_col1_width: key_col1_width as u32,
4270 expected_key_col1_bits: key_col1_expectation.bits,
4271 expected_key_col1_type_code: key_col1_expectation.type_code,
4272 });
4273 }
4274 &[key_col0, key_col1, key_col2] if !has_value_level_keys => {
4275 let key_col0_ref = source_relation.column(key_col0).ok_or_else(|| {
4276 XlogError::UnsupportedEpistemicConstruct {
4277 construct: "epistemic GPU stable-model tuple membership".to_string(),
4278 context: format!(
4279 "tuple source relation {} missing key column {}",
4280 binding.predicate, key_col0
4281 ),
4282 }
4283 })?;
4284 let key_col1_ref = source_relation.column(key_col1).ok_or_else(|| {
4285 XlogError::UnsupportedEpistemicConstruct {
4286 construct: "epistemic GPU stable-model tuple membership".to_string(),
4287 context: format!(
4288 "tuple source relation {} missing key column {}",
4289 binding.predicate, key_col1
4290 ),
4291 }
4292 })?;
4293 let key_col2_ref = source_relation.column(key_col2).ok_or_else(|| {
4294 XlogError::UnsupportedEpistemicConstruct {
4295 construct: "epistemic GPU stable-model tuple membership".to_string(),
4296 context: format!(
4297 "tuple source relation {} missing key column {}",
4298 binding.predicate, key_col2
4299 ),
4300 }
4301 })?;
4302 let key_col0_type =
4303 source_relation
4304 .schema()
4305 .column_type(key_col0)
4306 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
4307 construct: "epistemic GPU stable-model tuple membership"
4308 .to_string(),
4309 context: format!(
4310 "tuple source relation {} missing schema for key column {}",
4311 binding.predicate, key_col0
4312 ),
4313 })?;
4314 let key_col1_type =
4315 source_relation
4316 .schema()
4317 .column_type(key_col1)
4318 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
4319 construct: "epistemic GPU stable-model tuple membership"
4320 .to_string(),
4321 context: format!(
4322 "tuple source relation {} missing schema for key column {}",
4323 binding.predicate, key_col1
4324 ),
4325 })?;
4326 let key_col2_type =
4327 source_relation
4328 .schema()
4329 .column_type(key_col2)
4330 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
4331 construct: "epistemic GPU stable-model tuple membership"
4332 .to_string(),
4333 context: format!(
4334 "tuple source relation {} missing schema for key column {}",
4335 binding.predicate, key_col2
4336 ),
4337 })?;
4338 let key_col0_width = key_col0_type.size_bytes();
4339 let key_col1_width = key_col1_type.size_bytes();
4340 let key_col2_width = key_col2_type.size_bytes();
4341 let key_col0_expectation =
4342 TupleKeyExpectation::from_term(&binding.key_terms[0], key_col0_type)?;
4343 let key_col1_expectation =
4344 TupleKeyExpectation::from_term(&binding.key_terms[1], key_col1_type)?;
4345 let key_col2_expectation =
4346 TupleKeyExpectation::from_term(&binding.key_terms[2], key_col2_type)?;
4347 let max_width = key_col0_width.max(key_col1_width).max(key_col2_width);
4348 if max_width > u32::MAX as usize {
4349 return Err(XlogError::ResourceExhausted {
4350 context: "epistemic GPU tuple-key column width".to_string(),
4351 estimated_bytes: max_width as u64,
4352 budget_bytes: u32::MAX as u64,
4353 });
4354 }
4355 tuple_sources.push(TupleSourceLaunch::ArityThree {
4356 literal_index: binding.literal_index as u32,
4357 reduction_index: binding.reduction_index as u32,
4358 negated: binding.negated as u8,
4359 row_count: source_relation.num_rows_device(),
4360 key_col0: key_col0_ref,
4361 key_col0_width: key_col0_width as u32,
4362 expected_key_col0_bits: key_col0_expectation.bits,
4363 expected_key_col0_type_code: key_col0_expectation.type_code,
4364 key_col1: key_col1_ref,
4365 key_col1_width: key_col1_width as u32,
4366 expected_key_col1_bits: key_col1_expectation.bits,
4367 expected_key_col1_type_code: key_col1_expectation.type_code,
4368 key_col2: key_col2_ref,
4369 key_col2_width: key_col2_width as u32,
4370 expected_key_col2_bits: key_col2_expectation.bits,
4371 expected_key_col2_type_code: key_col2_expectation.type_code,
4372 });
4373 }
4374 key_columns => {
4375 if key_columns.len() > u32::MAX as usize {
4376 return Err(XlogError::ResourceExhausted {
4377 context: "epistemic GPU tuple-key arity".to_string(),
4378 estimated_bytes: key_columns.len() as u64,
4379 budget_bytes: u32::MAX as u64,
4380 });
4381 }
4382
4383 let mut key_col_ptrs_host = Vec::with_capacity(key_columns.len());
4384 let mut key_col_widths_host = Vec::with_capacity(key_columns.len());
4385 let mut expected_key_bits_host = Vec::with_capacity(key_columns.len());
4386 let mut expected_key_type_codes_host = Vec::with_capacity(key_columns.len());
4387 let mut tuple_key_match_modes_host = Vec::with_capacity(key_columns.len());
4388 let mut bound_value_col_ptrs_host = Vec::with_capacity(key_columns.len());
4389 let mut bound_value_col_widths_host = Vec::with_capacity(key_columns.len());
4390 for (term_index, &key_col) in key_columns.iter().enumerate() {
4391 let key_col_ref = source_relation.column(key_col).ok_or_else(|| {
4392 XlogError::UnsupportedEpistemicConstruct {
4393 construct: "epistemic GPU stable-model tuple membership"
4394 .to_string(),
4395 context: format!(
4396 "tuple source relation {} missing key column {}",
4397 binding.predicate, key_col
4398 ),
4399 }
4400 })?;
4401 let key_col_type = source_relation
4402 .schema()
4403 .column_type(key_col)
4404 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
4405 construct: "epistemic GPU stable-model tuple membership"
4406 .to_string(),
4407 context: format!(
4408 "tuple source relation {} missing schema for key column {}",
4409 binding.predicate, key_col
4410 ),
4411 })?;
4412 let key_col_width = key_col_type.size_bytes();
4413 if key_col_width > u32::MAX as usize {
4414 return Err(XlogError::ResourceExhausted {
4415 context: "epistemic GPU tuple-key column width".to_string(),
4416 estimated_bytes: key_col_width as u64,
4417 budget_bytes: u32::MAX as u64,
4418 });
4419 }
4420
4421 key_col_ptrs_host.push(*key_col_ref.device_ptr());
4422 key_col_widths_host.push(key_col_width as u32);
4423 match &binding.key_terms[term_index] {
4424 EirTerm::Variable(variable_name) => {
4425 let bound_col_index = binding.bound_output_columns[term_index]
4426 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
4427 construct: "epistemic GPU bound tuple-key matching"
4428 .to_string(),
4429 context: format!(
4430 "tuple key variable {variable_name} has no reduced \
4431 output column binding"
4432 ),
4433 })?;
4434 let bound_col =
4435 output.column(bound_col_index).ok_or_else(|| {
4436 XlogError::UnsupportedEpistemicConstruct {
4437 construct: "epistemic GPU bound tuple-key matching"
4438 .to_string(),
4439 context: format!(
4440 "reduced output is missing device column \
4441 {bound_col_index} for variable {variable_name}"
4442 ),
4443 }
4444 })?;
4445 let bound_col_type =
4446 output.schema().column_type(bound_col_index).ok_or_else(
4447 || XlogError::UnsupportedEpistemicConstruct {
4448 construct: "epistemic GPU bound tuple-key matching"
4449 .to_string(),
4450 context: format!(
4451 "reduced output is missing schema for variable \
4452 {variable_name}"
4453 ),
4454 },
4455 )?;
4456 if bound_col_type != key_col_type {
4457 return Err(XlogError::UnsupportedEpistemicConstruct {
4458 construct: "epistemic GPU bound tuple-key matching"
4459 .to_string(),
4460 context: format!(
4461 "bound variable {variable_name} has output type \
4462 {bound_col_type:?}, but tuple source {} key column \
4463 {} has type {key_col_type:?}",
4464 binding.predicate, key_col
4465 ),
4466 });
4467 }
4468 let bound_col_width = bound_col_type.size_bytes();
4469 if bound_col_width > u32::MAX as usize {
4470 return Err(XlogError::ResourceExhausted {
4471 context: "epistemic GPU bound tuple-key column width"
4472 .to_string(),
4473 estimated_bytes: bound_col_width as u64,
4474 budget_bytes: u32::MAX as u64,
4475 });
4476 }
4477
4478 expected_key_bits_host.push(0);
4479 expected_key_type_codes_host.push(key_col_type.to_code());
4480 tuple_key_match_modes_host.push(TUPLE_KEY_MATCH_MODE_BOUND_OUTPUT);
4481 bound_value_col_ptrs_host.push(*bound_col.device_ptr());
4482 bound_value_col_widths_host.push(bound_col_width as u32);
4483 }
4484 EirTerm::Anonymous => {
4485 expected_key_bits_host.push(0);
4490 expected_key_type_codes_host.push(key_col_type.to_code());
4491 tuple_key_match_modes_host.push(TUPLE_KEY_MATCH_MODE_WILDCARD);
4492 bound_value_col_ptrs_host.push(0);
4493 bound_value_col_widths_host.push(0);
4494 }
4495 term => {
4496 let expectation =
4497 TupleKeyExpectation::from_term(term, key_col_type)?;
4498 expected_key_bits_host.push(expectation.bits);
4499 expected_key_type_codes_host.push(expectation.type_code);
4500 tuple_key_match_modes_host.push(TUPLE_KEY_MATCH_MODE_GROUND);
4501 bound_value_col_ptrs_host.push(0);
4502 bound_value_col_widths_host.push(0);
4503 }
4504 }
4505 }
4506
4507 let memory = self.provider.memory();
4508 let mut key_col_ptrs = memory.alloc::<u64>(key_columns.len())?;
4509 let mut key_col_widths = memory.alloc::<u32>(key_columns.len())?;
4510 let mut expected_key_bits = memory.alloc::<u64>(key_columns.len())?;
4511 let mut expected_key_type_codes = memory.alloc::<u8>(key_columns.len())?;
4512 let mut tuple_key_match_modes = memory.alloc::<u8>(key_columns.len())?;
4513 let mut bound_value_col_ptrs = memory.alloc::<u64>(key_columns.len())?;
4514 let mut bound_value_col_widths = memory.alloc::<u32>(key_columns.len())?;
4515 self.provider
4516 .htod_launch_metadata_sync_copy_into(&key_col_ptrs_host, &mut key_col_ptrs)
4517 .map_err(|e| {
4518 XlogError::execution_ctx(
4519 "epistemic GPU tuple-key metadata",
4520 "upload key column pointers",
4521 &e,
4522 )
4523 })?;
4524 self.provider
4525 .htod_launch_metadata_sync_copy_into(
4526 &key_col_widths_host,
4527 &mut key_col_widths,
4528 )
4529 .map_err(|e| {
4530 XlogError::execution_ctx(
4531 "epistemic GPU tuple-key metadata",
4532 "upload key column widths",
4533 &e,
4534 )
4535 })?;
4536 self.provider
4537 .htod_launch_metadata_sync_copy_into(
4538 &expected_key_bits_host,
4539 &mut expected_key_bits,
4540 )
4541 .map_err(|e| {
4542 XlogError::execution_ctx(
4543 "epistemic GPU tuple-key metadata",
4544 "upload expected key bits",
4545 &e,
4546 )
4547 })?;
4548 self.provider
4549 .htod_launch_metadata_sync_copy_into(
4550 &expected_key_type_codes_host,
4551 &mut expected_key_type_codes,
4552 )
4553 .map_err(|e| {
4554 XlogError::execution_ctx(
4555 "epistemic GPU tuple-key metadata",
4556 "upload expected key type codes",
4557 &e,
4558 )
4559 })?;
4560 self.provider
4561 .htod_launch_metadata_sync_copy_into(
4562 &tuple_key_match_modes_host,
4563 &mut tuple_key_match_modes,
4564 )
4565 .map_err(|e| {
4566 XlogError::execution_ctx(
4567 "epistemic GPU tuple-key metadata",
4568 "upload tuple key match modes",
4569 &e,
4570 )
4571 })?;
4572 self.provider
4573 .htod_launch_metadata_sync_copy_into(
4574 &bound_value_col_ptrs_host,
4575 &mut bound_value_col_ptrs,
4576 )
4577 .map_err(|e| {
4578 XlogError::execution_ctx(
4579 "epistemic GPU tuple-key metadata",
4580 "upload bound value column pointers",
4581 &e,
4582 )
4583 })?;
4584 self.provider
4585 .htod_launch_metadata_sync_copy_into(
4586 &bound_value_col_widths_host,
4587 &mut bound_value_col_widths,
4588 )
4589 .map_err(|e| {
4590 XlogError::execution_ctx(
4591 "epistemic GPU tuple-key metadata",
4592 "upload bound value column widths",
4593 &e,
4594 )
4595 })?;
4596
4597 tuple_sources.push(TupleSourceLaunch::ArityN {
4598 literal_index: binding.literal_index as u32,
4599 reduction_index: binding.reduction_index as u32,
4600 negated: binding.negated as u8,
4601 row_count: source_relation.num_rows_device(),
4602 bound_value_row_count: output.num_rows_device(),
4603 key_col_count: key_columns.len() as u32,
4604 key_col_ptrs,
4605 key_col_widths,
4606 expected_key_bits,
4607 expected_key_type_codes,
4608 tuple_key_match_modes,
4609 bound_value_col_ptrs,
4610 bound_value_col_widths,
4611 has_bound_value_keys: has_bound_value_keys as u8,
4612 });
4613 }
4614 }
4615 }
4616
4617 let literal_count = literal_count as u32;
4618 let candidate_count = candidate_count as u32;
4619 let reduction_count = reduction_count as u32;
4620 let models_per_reduction = models_per_reduction as u32;
4621 let world_stride = world_stride as u32;
4622 let func = self
4623 .provider
4624 .device()
4625 .inner()
4626 .get_func(
4627 EPISTEMIC_MODULE,
4628 epistemic_kernels::EPISTEMIC_POPULATE_MODEL_MEMBERSHIP_FROM_TUPLE_SOURCE_U8,
4629 )
4630 .ok_or_else(|| {
4631 XlogError::Execution(
4632 "epistemic tuple-source model-membership kernel not found".to_string(),
4633 )
4634 })?;
4635 let func_arity1 = self
4636 .provider
4637 .device()
4638 .inner()
4639 .get_func(
4640 EPISTEMIC_MODULE,
4641 epistemic_kernels::EPISTEMIC_POPULATE_MODEL_MEMBERSHIP_FROM_TUPLE_SOURCE_ARITY1_U8,
4642 )
4643 .ok_or_else(|| {
4644 XlogError::Execution(
4645 "epistemic arity-one tuple-source model-membership kernel not found"
4646 .to_string(),
4647 )
4648 })?;
4649 let func_arity2 = self
4650 .provider
4651 .device()
4652 .inner()
4653 .get_func(
4654 EPISTEMIC_MODULE,
4655 epistemic_kernels::EPISTEMIC_POPULATE_MODEL_MEMBERSHIP_FROM_TUPLE_SOURCE_ARITY2_U8,
4656 )
4657 .ok_or_else(|| {
4658 XlogError::Execution(
4659 "epistemic arity-two tuple-source model-membership kernel not found"
4660 .to_string(),
4661 )
4662 })?;
4663 let func_arity3 = self
4664 .provider
4665 .device()
4666 .inner()
4667 .get_func(
4668 EPISTEMIC_MODULE,
4669 epistemic_kernels::EPISTEMIC_POPULATE_MODEL_MEMBERSHIP_FROM_TUPLE_SOURCE_ARITY3_U8,
4670 )
4671 .ok_or_else(|| {
4672 XlogError::Execution(
4673 "epistemic arity-three tuple-source model-membership kernel not found"
4674 .to_string(),
4675 )
4676 })?;
4677 let func_arity_n = self
4678 .provider
4679 .device()
4680 .inner()
4681 .get_func(
4682 EPISTEMIC_MODULE,
4683 epistemic_kernels::EPISTEMIC_POPULATE_MODEL_MEMBERSHIP_FROM_TUPLE_SOURCE_ARITY_N_U8,
4684 )
4685 .ok_or_else(|| {
4686 XlogError::Execution(
4687 "epistemic generic-arity tuple-source model-membership kernel not found"
4688 .to_string(),
4689 )
4690 })?;
4691 let config = LaunchConfig::for_num_elems(per_binding_launch_elems as u32);
4692
4693 let mut kernel_timings = Vec::with_capacity(tuple_sources.len());
4694 for tuple_source in &tuple_sources {
4695 let kernel_timing = self.time_epistemic_gpu_kernel_launch(
4696 "epistemic GPU tuple-source model membership",
4697 || unsafe {
4698 match tuple_source {
4699 TupleSourceLaunch::ArityZero {
4700 literal_index,
4701 reduction_index,
4702 negated,
4703 row_count,
4704 } => {
4705 let mut params: Vec<*mut c_void> = vec![
4709 literal_count.as_kernel_param(),
4710 candidate_count.as_kernel_param(),
4711 reduction_count.as_kernel_param(),
4712 models_per_reduction.as_kernel_param(),
4713 world_stride.as_kernel_param(),
4714 literal_index.as_kernel_param(),
4715 reduction_index.as_kernel_param(),
4716 negated.as_kernel_param(),
4717 output.num_rows_device().as_kernel_param(),
4718 row_count.as_kernel_param(),
4719 (&workspace.candidate_assumptions).as_kernel_param(),
4720 (&workspace.world_views).as_kernel_param(),
4721 (&workspace.model_membership).as_kernel_param(),
4722 (&workspace.rejection_reasons).as_kernel_param(),
4723 ];
4724 func.clone().launch(config, &mut params)?;
4725 }
4726 TupleSourceLaunch::ArityOne {
4727 literal_index,
4728 reduction_index,
4729 negated,
4730 row_count,
4731 key_col0,
4732 key_col0_width,
4733 expected_key_col0_bits,
4734 expected_key_col0_type_code,
4735 } => {
4736 let mut params: Vec<*mut c_void> = vec![
4740 literal_count.as_kernel_param(),
4741 candidate_count.as_kernel_param(),
4742 reduction_count.as_kernel_param(),
4743 models_per_reduction.as_kernel_param(),
4744 world_stride.as_kernel_param(),
4745 literal_index.as_kernel_param(),
4746 reduction_index.as_kernel_param(),
4747 negated.as_kernel_param(),
4748 output.num_rows_device().as_kernel_param(),
4749 row_count.as_kernel_param(),
4750 key_col0.as_kernel_param(),
4751 key_col0_width.as_kernel_param(),
4752 expected_key_col0_bits.as_kernel_param(),
4753 expected_key_col0_type_code.as_kernel_param(),
4754 (&workspace.candidate_assumptions).as_kernel_param(),
4755 (&workspace.world_views).as_kernel_param(),
4756 (&workspace.model_membership).as_kernel_param(),
4757 (&workspace.rejection_reasons).as_kernel_param(),
4758 ];
4759 func_arity1.clone().launch(config, &mut params)?;
4760 }
4761 TupleSourceLaunch::ArityTwo {
4762 literal_index,
4763 reduction_index,
4764 negated,
4765 row_count,
4766 key_col0,
4767 key_col0_width,
4768 expected_key_col0_bits,
4769 expected_key_col0_type_code,
4770 key_col1,
4771 key_col1_width,
4772 expected_key_col1_bits,
4773 expected_key_col1_type_code,
4774 } => {
4775 let mut params: Vec<*mut c_void> = vec![
4779 literal_count.as_kernel_param(),
4780 candidate_count.as_kernel_param(),
4781 reduction_count.as_kernel_param(),
4782 models_per_reduction.as_kernel_param(),
4783 world_stride.as_kernel_param(),
4784 literal_index.as_kernel_param(),
4785 reduction_index.as_kernel_param(),
4786 negated.as_kernel_param(),
4787 output.num_rows_device().as_kernel_param(),
4788 row_count.as_kernel_param(),
4789 key_col0.as_kernel_param(),
4790 key_col0_width.as_kernel_param(),
4791 expected_key_col0_bits.as_kernel_param(),
4792 expected_key_col0_type_code.as_kernel_param(),
4793 key_col1.as_kernel_param(),
4794 key_col1_width.as_kernel_param(),
4795 expected_key_col1_bits.as_kernel_param(),
4796 expected_key_col1_type_code.as_kernel_param(),
4797 (&workspace.candidate_assumptions).as_kernel_param(),
4798 (&workspace.world_views).as_kernel_param(),
4799 (&workspace.model_membership).as_kernel_param(),
4800 (&workspace.rejection_reasons).as_kernel_param(),
4801 ];
4802 func_arity2.clone().launch(config, &mut params)?;
4803 }
4804 TupleSourceLaunch::ArityThree {
4805 literal_index,
4806 reduction_index,
4807 negated,
4808 row_count,
4809 key_col0,
4810 key_col0_width,
4811 expected_key_col0_bits,
4812 expected_key_col0_type_code,
4813 key_col1,
4814 key_col1_width,
4815 expected_key_col1_bits,
4816 expected_key_col1_type_code,
4817 key_col2,
4818 key_col2_width,
4819 expected_key_col2_bits,
4820 expected_key_col2_type_code,
4821 } => {
4822 let mut params: Vec<*mut c_void> = vec![
4826 literal_count.as_kernel_param(),
4827 candidate_count.as_kernel_param(),
4828 reduction_count.as_kernel_param(),
4829 models_per_reduction.as_kernel_param(),
4830 world_stride.as_kernel_param(),
4831 literal_index.as_kernel_param(),
4832 reduction_index.as_kernel_param(),
4833 negated.as_kernel_param(),
4834 output.num_rows_device().as_kernel_param(),
4835 row_count.as_kernel_param(),
4836 key_col0.as_kernel_param(),
4837 key_col0_width.as_kernel_param(),
4838 expected_key_col0_bits.as_kernel_param(),
4839 expected_key_col0_type_code.as_kernel_param(),
4840 key_col1.as_kernel_param(),
4841 key_col1_width.as_kernel_param(),
4842 expected_key_col1_bits.as_kernel_param(),
4843 expected_key_col1_type_code.as_kernel_param(),
4844 key_col2.as_kernel_param(),
4845 key_col2_width.as_kernel_param(),
4846 expected_key_col2_bits.as_kernel_param(),
4847 expected_key_col2_type_code.as_kernel_param(),
4848 (&workspace.candidate_assumptions).as_kernel_param(),
4849 (&workspace.world_views).as_kernel_param(),
4850 (&workspace.model_membership).as_kernel_param(),
4851 (&workspace.rejection_reasons).as_kernel_param(),
4852 ];
4853 func_arity3.clone().launch(config, &mut params)?;
4854 }
4855 TupleSourceLaunch::ArityN {
4856 literal_index,
4857 reduction_index,
4858 negated,
4859 row_count,
4860 bound_value_row_count,
4861 key_col_count,
4862 key_col_ptrs,
4863 key_col_widths,
4864 expected_key_bits,
4865 expected_key_type_codes,
4866 tuple_key_match_modes,
4867 bound_value_col_ptrs,
4868 bound_value_col_widths,
4869 has_bound_value_keys,
4870 } => {
4871 let mut params: Vec<*mut c_void> = vec![
4876 literal_count.as_kernel_param(),
4877 candidate_count.as_kernel_param(),
4878 reduction_count.as_kernel_param(),
4879 models_per_reduction.as_kernel_param(),
4880 world_stride.as_kernel_param(),
4881 literal_index.as_kernel_param(),
4882 reduction_index.as_kernel_param(),
4883 negated.as_kernel_param(),
4884 output.num_rows_device().as_kernel_param(),
4885 row_count.as_kernel_param(),
4886 key_col_ptrs.as_kernel_param(),
4887 key_col_widths.as_kernel_param(),
4888 expected_key_bits.as_kernel_param(),
4889 expected_key_type_codes.as_kernel_param(),
4890 tuple_key_match_modes.as_kernel_param(),
4891 bound_value_col_ptrs.as_kernel_param(),
4892 bound_value_col_widths.as_kernel_param(),
4893 bound_value_row_count.as_kernel_param(),
4894 key_col_count.as_kernel_param(),
4895 has_bound_value_keys.as_kernel_param(),
4896 (&workspace.candidate_assumptions).as_kernel_param(),
4897 (&workspace.world_views).as_kernel_param(),
4898 (&workspace.model_membership).as_kernel_param(),
4899 (&workspace.rejection_reasons).as_kernel_param(),
4900 ];
4901 func_arity_n.clone().launch(config, &mut params)?;
4902 }
4903 };
4904 Ok(())
4905 },
4906 )?;
4907 kernel_timings.push(kernel_timing);
4908 }
4909 let kernel_timing = EpistemicGpuKernelTimingTrace::checked_sum(kernel_timings)?;
4910
4911 Ok(trace.with_kernel_timing(kernel_timing))
4912 }
4913
4914 pub fn validate_epistemic_gpu_world_views(
4916 &self,
4917 workspace: &mut EpistemicGpuWorkspace,
4918 gpu_plan: &EpistemicGpuPlan,
4919 candidate_count: usize,
4920 models_per_reduction: usize,
4921 ) -> Result<EpistemicGpuWorldViewValidationTrace> {
4922 gpu_plan.validate_tuple_membership_bindings()?;
4923 let literal_count = gpu_plan.epistemic_literals.len();
4924 let reduction_count = gpu_plan.reductions.len();
4925 let trace = EpistemicGpuWorldViewValidationTrace::for_counts(
4926 literal_count,
4927 candidate_count,
4928 reduction_count,
4929 models_per_reduction,
4930 )?;
4931 if trace.model_membership_bytes_checked > workspace.layout.model_membership_bytes {
4932 return Err(XlogError::ResourceExhausted {
4933 context: "epistemic GPU world-view validation membership workspace".to_string(),
4934 estimated_bytes: trace.model_membership_bytes_checked as u64,
4935 budget_bytes: workspace.layout.model_membership_bytes as u64,
4936 });
4937 }
4938 if trace.world_view_slots_checked > workspace.layout.world_view_bytes {
4939 return Err(XlogError::ResourceExhausted {
4940 context: "epistemic GPU world-view validation world-view workspace".to_string(),
4941 estimated_bytes: trace.world_view_slots_checked as u64,
4942 budget_bytes: workspace.layout.world_view_bytes as u64,
4943 });
4944 }
4945 if trace.rejection_reason_slots_written > workspace.layout.rejection_reason_slots {
4946 return Err(XlogError::ResourceExhausted {
4947 context: "epistemic GPU world-view validation rejection workspace".to_string(),
4948 estimated_bytes: trace.rejection_reason_slots_written as u64,
4949 budget_bytes: workspace.layout.rejection_reason_slots as u64,
4950 });
4951 }
4952 if trace.model_membership_bytes_checked > u32::MAX as usize {
4953 return Err(XlogError::ResourceExhausted {
4954 context: "epistemic GPU world-view validation membership launch".to_string(),
4955 estimated_bytes: trace.model_membership_bytes_checked as u64,
4956 budget_bytes: u32::MAX as u64,
4957 });
4958 }
4959 if literal_count > u32::MAX as usize
4960 || candidate_count > u32::MAX as usize
4961 || reduction_count > u32::MAX as usize
4962 || models_per_reduction > u32::MAX as usize
4963 {
4964 return Err(XlogError::ResourceExhausted {
4965 context: "epistemic GPU world-view validation dimensions".to_string(),
4966 estimated_bytes: literal_count
4967 .max(candidate_count)
4968 .max(reduction_count)
4969 .max(models_per_reduction) as u64,
4970 budget_bytes: u32::MAX as u64,
4971 });
4972 }
4973
4974 let mut literal_op_codes_host = vec![0u8; literal_count];
4975 let mut literal_negated_host = vec![0u8; literal_count];
4976 let mut literal_bound_to_output_host = vec![0u8; literal_count];
4977 let mut literal_reduction_indices_host = vec![0u32; literal_count];
4978 for binding in &gpu_plan.tuple_membership_bindings {
4979 literal_op_codes_host[binding.literal_index] = epistemic_operator_code(binding.op);
4980 literal_negated_host[binding.literal_index] = u8::from(binding.negated);
4981 literal_bound_to_output_host[binding.literal_index] =
4982 u8::from(binding.bound_output_columns.iter().any(Option::is_some));
4983 literal_reduction_indices_host[binding.literal_index] = binding.reduction_index as u32;
4984 }
4985 let memory = self.provider.memory();
4986 let mut literal_op_codes = memory.alloc::<u8>(literal_count)?;
4987 let mut literal_negated = memory.alloc::<u8>(literal_count)?;
4988 let mut literal_bound_to_output = memory.alloc::<u8>(literal_count)?;
4989 let mut literal_reduction_indices = memory.alloc::<u32>(literal_count)?;
4990 self.provider
4991 .htod_launch_metadata_sync_copy_into(&literal_op_codes_host, &mut literal_op_codes)
4992 .map_err(|e| {
4993 XlogError::execution_ctx(
4994 "epistemic GPU world-view validation metadata",
4995 "upload literal operator codes",
4996 &e,
4997 )
4998 })?;
4999 self.provider
5000 .htod_launch_metadata_sync_copy_into(&literal_negated_host, &mut literal_negated)
5001 .map_err(|e| {
5002 XlogError::execution_ctx(
5003 "epistemic GPU world-view validation metadata",
5004 "upload literal negation flags",
5005 &e,
5006 )
5007 })?;
5008 self.provider
5009 .htod_launch_metadata_sync_copy_into(
5010 &literal_bound_to_output_host,
5011 &mut literal_bound_to_output,
5012 )
5013 .map_err(|e| {
5014 XlogError::execution_ctx(
5015 "epistemic GPU world-view validation metadata",
5016 "upload literal output-binding flags",
5017 &e,
5018 )
5019 })?;
5020 self.provider
5021 .htod_launch_metadata_sync_copy_into(
5022 &literal_reduction_indices_host,
5023 &mut literal_reduction_indices,
5024 )
5025 .map_err(|e| {
5026 XlogError::execution_ctx(
5027 "epistemic GPU world-view validation metadata",
5028 "upload literal reduction indices",
5029 &e,
5030 )
5031 })?;
5032
5033 let world_stride =
5034 workspace.layout.world_view_bytes / workspace.layout.rejection_reason_slots;
5035 if world_stride == 0 || world_stride > u32::MAX as usize {
5036 return Err(XlogError::ResourceExhausted {
5037 context: "epistemic GPU world-view validation world stride".to_string(),
5038 estimated_bytes: world_stride as u64,
5039 budget_bytes: u32::MAX as u64,
5040 });
5041 }
5042
5043 let literal_count = literal_count as u32;
5044 let candidate_count = candidate_count as u32;
5045 let reduction_count = reduction_count as u32;
5046 let models_per_reduction = models_per_reduction as u32;
5047 let world_stride = world_stride as u32;
5048 let func = self
5049 .provider
5050 .device()
5051 .inner()
5052 .get_func(
5053 EPISTEMIC_MODULE,
5054 epistemic_kernels::EPISTEMIC_VALIDATE_WORLD_VIEWS_U8,
5055 )
5056 .ok_or_else(|| {
5057 XlogError::Execution("epistemic world-view validation kernel not found".to_string())
5058 })?;
5059 let config = LaunchConfig::for_num_elems(candidate_count);
5060
5061 let kernel_timing = self.time_epistemic_gpu_kernel_launch(
5062 "epistemic GPU world-view validation",
5063 || unsafe {
5064 let mut params: Vec<*mut c_void> = vec![
5068 literal_count.as_kernel_param(),
5069 candidate_count.as_kernel_param(),
5070 reduction_count.as_kernel_param(),
5071 models_per_reduction.as_kernel_param(),
5072 world_stride.as_kernel_param(),
5073 (&literal_op_codes).as_kernel_param(),
5074 (&literal_negated).as_kernel_param(),
5075 (&literal_bound_to_output).as_kernel_param(),
5076 (&literal_reduction_indices).as_kernel_param(),
5077 (&workspace.candidate_assumptions).as_kernel_param(),
5078 (&workspace.model_membership).as_kernel_param(),
5079 (&workspace.world_views).as_kernel_param(),
5080 (&workspace.rejection_reasons).as_kernel_param(),
5081 ];
5082 func.clone().launch(config, &mut params)
5083 },
5084 )?;
5085
5086 Ok(trace.with_kernel_timing(kernel_timing))
5087 }
5088
5089 pub fn validate_epistemic_gpu_world_view_constraints(
5099 &self,
5100 workspace: &mut EpistemicGpuWorkspace,
5101 gpu_plan: &EpistemicGpuPlan,
5102 candidate_count: usize,
5103 ) -> Result<EpistemicGpuConstraintWorldViewValidationTrace> {
5104 gpu_plan.validate_constraints()?;
5105 let literal_count = gpu_plan.epistemic_literals.len();
5106 let constraint_count = gpu_plan.constraints.len();
5107
5108 if candidate_count > workspace.layout.rejection_reason_slots {
5117 return Err(XlogError::ResourceExhausted {
5118 context: "epistemic GPU constraint-violation index workspace".to_string(),
5119 estimated_bytes: candidate_count as u64,
5120 budget_bytes: workspace.layout.rejection_reason_slots as u64,
5121 });
5122 }
5123 if candidate_count > 0 {
5124 let sentinel_host = vec![u32::MAX; candidate_count];
5125 let fill_len = candidate_count;
5126 let mut sentinel_view = workspace.constraint_violation_index.slice_mut(0..fill_len);
5127 self.provider
5128 .htod_launch_metadata_sync_copy_into(&sentinel_host, &mut sentinel_view)
5129 .map_err(|e| {
5130 XlogError::execution_ctx(
5131 "epistemic GPU world-view constraint metadata",
5132 "initialize constraint-violation index sentinel",
5133 &e,
5134 )
5135 })?;
5136 }
5137
5138 let mut offsets_host = Vec::with_capacity(constraint_count);
5140 let mut counts_host = Vec::with_capacity(constraint_count);
5141 let mut indices_host: Vec<u32> = Vec::new();
5142 for constraint in &gpu_plan.constraints {
5143 offsets_host.push(indices_host.len() as u32);
5144 counts_host.push(constraint.literal_indices.len() as u32);
5145 for &literal_index in &constraint.literal_indices {
5146 indices_host.push(literal_index as u32);
5147 }
5148 }
5149 let constraint_literal_refs = indices_host.len();
5150
5151 let trace = EpistemicGpuConstraintWorldViewValidationTrace {
5152 constraint_count,
5153 constraint_literal_refs,
5154 candidates_checked: candidate_count,
5155 rejection_reason_slots_written: candidate_count,
5156 kernel_launches: 0,
5157 host_write_ops: 0,
5158 kernel_timing: EpistemicGpuKernelTimingTrace::unrecorded(),
5159 };
5160
5161 if constraint_count == 0 {
5162 return Ok(trace);
5165 }
5166
5167 if candidate_count > workspace.layout.rejection_reason_slots {
5168 return Err(XlogError::ResourceExhausted {
5169 context: "epistemic GPU world-view constraint rejection workspace".to_string(),
5170 estimated_bytes: candidate_count as u64,
5171 budget_bytes: workspace.layout.rejection_reason_slots as u64,
5172 });
5173 }
5174 if candidate_count > u32::MAX as usize
5175 || literal_count > u32::MAX as usize
5176 || constraint_count > u32::MAX as usize
5177 || constraint_literal_refs > u32::MAX as usize
5178 {
5179 return Err(XlogError::ResourceExhausted {
5180 context: "epistemic GPU world-view constraint dimensions".to_string(),
5181 estimated_bytes: candidate_count
5182 .max(literal_count)
5183 .max(constraint_count)
5184 .max(constraint_literal_refs) as u64,
5185 budget_bytes: u32::MAX as u64,
5186 });
5187 }
5188
5189 let memory = self.provider.memory();
5190 let mut constraint_literal_offsets = memory.alloc::<u32>(constraint_count)?;
5191 let mut constraint_literal_counts = memory.alloc::<u32>(constraint_count)?;
5192 let mut constraint_literal_indices = memory.alloc::<u32>(constraint_literal_refs.max(1))?;
5193 self.provider
5194 .htod_launch_metadata_sync_copy_into(&offsets_host, &mut constraint_literal_offsets)
5195 .map_err(|e| {
5196 XlogError::execution_ctx(
5197 "epistemic GPU world-view constraint metadata",
5198 "upload constraint literal offsets",
5199 &e,
5200 )
5201 })?;
5202 self.provider
5203 .htod_launch_metadata_sync_copy_into(&counts_host, &mut constraint_literal_counts)
5204 .map_err(|e| {
5205 XlogError::execution_ctx(
5206 "epistemic GPU world-view constraint metadata",
5207 "upload constraint literal counts",
5208 &e,
5209 )
5210 })?;
5211 if !indices_host.is_empty() {
5212 self.provider
5213 .htod_launch_metadata_sync_copy_into(&indices_host, &mut constraint_literal_indices)
5214 .map_err(|e| {
5215 XlogError::execution_ctx(
5216 "epistemic GPU world-view constraint metadata",
5217 "upload constraint literal indices",
5218 &e,
5219 )
5220 })?;
5221 }
5222
5223 let literal_count_u32 = literal_count as u32;
5224 let candidate_count_u32 = candidate_count as u32;
5225 let constraint_count_u32 = constraint_count as u32;
5226 let func = self
5227 .provider
5228 .device()
5229 .inner()
5230 .get_func(
5231 EPISTEMIC_MODULE,
5232 epistemic_kernels::EPISTEMIC_VALIDATE_CONSTRAINTS_U8,
5233 )
5234 .ok_or_else(|| {
5235 XlogError::Execution(
5236 "epistemic world-view constraint validation kernel not found".to_string(),
5237 )
5238 })?;
5239 let config = LaunchConfig::for_num_elems(candidate_count_u32);
5240
5241 let kernel_timing = self.time_epistemic_gpu_kernel_launch(
5242 "epistemic GPU world-view constraint validation",
5243 || unsafe {
5244 let mut params: Vec<*mut c_void> = vec![
5249 literal_count_u32.as_kernel_param(),
5250 candidate_count_u32.as_kernel_param(),
5251 constraint_count_u32.as_kernel_param(),
5252 (&constraint_literal_offsets).as_kernel_param(),
5253 (&constraint_literal_counts).as_kernel_param(),
5254 (&constraint_literal_indices).as_kernel_param(),
5255 (&workspace.candidate_assumptions).as_kernel_param(),
5256 (&mut workspace.rejection_reasons).as_kernel_param(),
5257 (&mut workspace.constraint_violation_index).as_kernel_param(),
5258 ];
5259 func.clone().launch(config, &mut params)
5260 },
5261 )?;
5262
5263 Ok(EpistemicGpuConstraintWorldViewValidationTrace {
5264 kernel_launches: 1,
5265 kernel_timing,
5266 ..trace
5267 })
5268 }
5269
5270 pub fn materialize_epistemic_gpu_candidates(
5272 &self,
5273 workspace: &mut EpistemicGpuWorkspace,
5274 candidate_count: usize,
5275 ) -> Result<EpistemicGpuMaterializationTrace> {
5276 let trace = EpistemicGpuMaterializationTrace::for_count(candidate_count)?;
5277 if trace.world_view_slots_written > workspace.layout.world_view_bytes {
5278 return Err(XlogError::ResourceExhausted {
5279 context: "epistemic GPU materialization world-view workspace".to_string(),
5280 estimated_bytes: trace.world_view_slots_written as u64,
5281 budget_bytes: workspace.layout.world_view_bytes as u64,
5282 });
5283 }
5284 if candidate_count > workspace.layout.rejection_reason_slots {
5285 return Err(XlogError::ResourceExhausted {
5286 context: "epistemic GPU materialization rejection workspace".to_string(),
5287 estimated_bytes: candidate_count as u64,
5288 budget_bytes: workspace.layout.rejection_reason_slots as u64,
5289 });
5290 }
5291 if candidate_count > u32::MAX as usize {
5292 return Err(XlogError::ResourceExhausted {
5293 context: "epistemic GPU materialization launch".to_string(),
5294 estimated_bytes: candidate_count as u64,
5295 budget_bytes: u32::MAX as u64,
5296 });
5297 }
5298
5299 let world_stride =
5300 workspace.layout.world_view_bytes / workspace.layout.rejection_reason_slots;
5301 if world_stride == 0 || world_stride > u32::MAX as usize {
5302 return Err(XlogError::ResourceExhausted {
5303 context: "epistemic GPU materialization world stride".to_string(),
5304 estimated_bytes: world_stride as u64,
5305 budget_bytes: u32::MAX as u64,
5306 });
5307 }
5308
5309 let candidate_count = candidate_count as u32;
5310 let world_stride = world_stride as u32;
5311 let func = self
5312 .provider
5313 .device()
5314 .inner()
5315 .get_func(
5316 EPISTEMIC_MODULE,
5317 epistemic_kernels::EPISTEMIC_MATERIALIZE_ACCEPTED_CANDIDATES_U8,
5318 )
5319 .ok_or_else(|| {
5320 XlogError::Execution(
5321 "epistemic candidate materialization kernel not found".to_string(),
5322 )
5323 })?;
5324 let config = LaunchConfig::for_num_elems(candidate_count);
5325
5326 let kernel_timing = self.time_epistemic_gpu_kernel_launch(
5327 "epistemic GPU candidate materialization",
5328 || unsafe {
5329 func.clone().launch(
5332 config,
5333 (
5334 candidate_count,
5335 world_stride,
5336 &workspace.rejection_reasons,
5337 &mut workspace.world_views,
5338 ),
5339 )
5340 },
5341 )?;
5342
5343 Ok(trace.with_kernel_timing(kernel_timing))
5344 }
5345
5346 pub fn materialize_epistemic_gpu_final_results(
5348 &self,
5349 workspace: &mut EpistemicGpuWorkspace,
5350 output: &CudaBuffer,
5351 candidate_count: usize,
5352 ) -> Result<EpistemicGpuFinalResultMaterializationTrace> {
5353 let trace = EpistemicGpuFinalResultMaterializationTrace::for_count(candidate_count)?;
5354 if trace.world_view_slots_written > workspace.layout.world_view_bytes {
5355 return Err(XlogError::ResourceExhausted {
5356 context: "epistemic GPU final-result world-view workspace".to_string(),
5357 estimated_bytes: trace.world_view_slots_written as u64,
5358 budget_bytes: workspace.layout.world_view_bytes as u64,
5359 });
5360 }
5361 if candidate_count > workspace.layout.rejection_reason_slots {
5362 return Err(XlogError::ResourceExhausted {
5363 context: "epistemic GPU final-result rejection workspace".to_string(),
5364 estimated_bytes: candidate_count as u64,
5365 budget_bytes: workspace.layout.rejection_reason_slots as u64,
5366 });
5367 }
5368 if candidate_count > u32::MAX as usize {
5369 return Err(XlogError::ResourceExhausted {
5370 context: "epistemic GPU final-result launch".to_string(),
5371 estimated_bytes: candidate_count as u64,
5372 budget_bytes: u32::MAX as u64,
5373 });
5374 }
5375
5376 let world_stride =
5377 workspace.layout.world_view_bytes / workspace.layout.rejection_reason_slots;
5378 if world_stride == 0 || world_stride > u32::MAX as usize {
5379 return Err(XlogError::ResourceExhausted {
5380 context: "epistemic GPU final-result world stride".to_string(),
5381 estimated_bytes: world_stride as u64,
5382 budget_bytes: u32::MAX as u64,
5383 });
5384 }
5385
5386 let candidate_count = candidate_count as u32;
5387 let world_stride = world_stride as u32;
5388 let func = self
5389 .provider
5390 .device()
5391 .inner()
5392 .get_func(
5393 EPISTEMIC_MODULE,
5394 epistemic_kernels::EPISTEMIC_MATERIALIZE_FINAL_RESULT_FLAGS_U8,
5395 )
5396 .ok_or_else(|| {
5397 XlogError::Execution(
5398 "epistemic final-result materialization kernel not found".to_string(),
5399 )
5400 })?;
5401 let config = LaunchConfig::for_num_elems(candidate_count);
5402
5403 let kernel_timing = self.time_epistemic_gpu_kernel_launch(
5404 "epistemic GPU final result materialization",
5405 || unsafe {
5406 func.clone().launch(
5411 config,
5412 (
5413 candidate_count,
5414 world_stride,
5415 output.num_rows_device(),
5416 &workspace.rejection_reasons,
5417 &mut workspace.world_views,
5418 ),
5419 )
5420 },
5421 )?;
5422
5423 Ok(trace.with_kernel_timing(kernel_timing))
5424 }
5425
5426 #[allow(clippy::too_many_arguments)]
5428 pub fn materialize_epistemic_gpu_final_tuples(
5429 &self,
5430 workspace: &mut EpistemicGpuWorkspace,
5431 output: &CudaBuffer,
5432 gpu_plan: &EpistemicGpuPlan,
5433 literal_count: usize,
5434 candidate_count: usize,
5435 reduction_count: usize,
5436 models_per_reduction: usize,
5437 ) -> Result<(CudaBuffer, EpistemicGpuFinalTupleMaterializationTrace)> {
5438 self.materialize_epistemic_gpu_final_tuples_scoped(
5439 workspace,
5440 output,
5441 gpu_plan,
5442 literal_count,
5443 candidate_count,
5444 reduction_count,
5445 models_per_reduction,
5446 None,
5447 )
5448 }
5449
5450 #[allow(clippy::too_many_arguments)]
5463 fn materialize_epistemic_gpu_final_tuples_scoped(
5464 &self,
5465 workspace: &mut EpistemicGpuWorkspace,
5466 output: &CudaBuffer,
5467 gpu_plan: &EpistemicGpuPlan,
5468 literal_count: usize,
5469 candidate_count: usize,
5470 reduction_count: usize,
5471 models_per_reduction: usize,
5472 head_reduction_filter: Option<&BTreeSet<usize>>,
5473 ) -> Result<(CudaBuffer, EpistemicGpuFinalTupleMaterializationTrace)> {
5474 gpu_plan.validate_tuple_membership_bindings()?;
5475 if candidate_count > workspace.layout.rejection_reason_slots {
5476 return Err(XlogError::ResourceExhausted {
5477 context: "epistemic GPU final-tuple rejection workspace".to_string(),
5478 estimated_bytes: candidate_count as u64,
5479 budget_bytes: workspace.layout.rejection_reason_slots as u64,
5480 });
5481 }
5482 let literal_count_u32 =
5483 checked_u32_dimension(literal_count, "epistemic GPU final-tuple literals")?;
5484 let candidate_count_u32 =
5485 checked_u32_dimension(candidate_count, "epistemic GPU final-tuple candidates")?;
5486 let reduction_count_u32 =
5487 checked_u32_dimension(reduction_count, "epistemic GPU final-tuple reductions")?;
5488 let models_per_reduction_u32 = checked_u32_dimension(
5489 models_per_reduction,
5490 "epistemic GPU final-tuple models per reduction",
5491 )?;
5492 let output_row_capacity =
5493 usize::try_from(output.num_rows()).map_err(|_| XlogError::ResourceExhausted {
5494 context: "epistemic GPU final-tuple output rows".to_string(),
5495 estimated_bytes: output.num_rows(),
5496 budget_bytes: usize::MAX as u64,
5497 })?;
5498 let output_row_capacity_u32 =
5499 checked_u32_dimension(output_row_capacity, "epistemic GPU final-tuple output rows")?;
5500 let final_output_columns =
5501 final_output_columns_for_materialization(output, gpu_plan, head_reduction_filter)?;
5502 let mut tuple_bytes_capacity = 0usize;
5503 let mut source_columns: Vec<(&CudaColumn, u32, u32)> =
5504 Vec::with_capacity(final_output_columns.len());
5505 let mut result_columns_raw: Vec<TrackedCudaSlice<u8>> =
5506 Vec::with_capacity(final_output_columns.len());
5507 let mut final_schema_columns = Vec::with_capacity(final_output_columns.len());
5508 let mut final_schema_sort_labels = Vec::with_capacity(final_output_columns.len());
5509 for &col_idx in &final_output_columns {
5510 let src_col = output.column(col_idx).ok_or_else(|| {
5511 XlogError::Execution(format!("epistemic final tuple missing column {col_idx}"))
5512 })?;
5513 let (column_name, column_type) = output
5514 .schema()
5515 .columns
5516 .get(col_idx)
5517 .ok_or_else(|| {
5518 XlogError::Execution(format!(
5519 "epistemic final tuple missing schema column {col_idx}"
5520 ))
5521 })?
5522 .clone();
5523 let column_width = column_type.size_bytes();
5524 let expected_column_bytes = checked_product(output_row_capacity, column_width)?;
5525 if src_col.len() < expected_column_bytes {
5526 return Err(XlogError::ResourceExhausted {
5527 context: "epistemic GPU final-tuple column capacity".to_string(),
5528 estimated_bytes: expected_column_bytes as u64,
5529 budget_bytes: src_col.len() as u64,
5530 });
5531 }
5532 let column_byte_len =
5533 checked_u32_dimension(src_col.len(), "epistemic GPU final-tuple column")?;
5534 let column_width =
5535 checked_u32_dimension(column_width, "epistemic GPU final-tuple column width")?;
5536 tuple_bytes_capacity = checked_sum(tuple_bytes_capacity, src_col.len())?;
5537 source_columns.push((src_col, column_byte_len, column_width));
5538 result_columns_raw.push(self.provider.memory().alloc::<u8>(src_col.len())?);
5539 final_schema_columns.push((column_name, column_type));
5540 final_schema_sort_labels.push(
5541 output
5542 .schema()
5543 .column_sort_label(col_idx)
5544 .unwrap_or("")
5545 .to_string(),
5546 );
5547 }
5548
5549 let mut final_row_count = self.provider.memory().alloc::<u32>(1)?;
5550 let mut row_map = self
5551 .provider
5552 .memory()
5553 .alloc::<u32>(output_row_capacity.max(1))?;
5554 let row_filter_bindings: Vec<_> = gpu_plan
5555 .tuple_membership_bindings
5556 .iter()
5557 .filter(|binding| binding.bound_output_columns.iter().any(Option::is_some))
5558 .filter(|binding| {
5559 head_reduction_filter
5560 .map(|reductions| reductions.contains(&binding.reduction_index))
5561 .unwrap_or(true)
5562 })
5563 .collect();
5564 if row_filter_bindings.len() > u32::MAX as usize {
5565 return Err(XlogError::ResourceExhausted {
5566 context: "epistemic GPU final tuple row-filter count".to_string(),
5567 estimated_bytes: row_filter_bindings.len() as u64,
5568 budget_bytes: u32::MAX as u64,
5569 });
5570 }
5571 let negated_row_filter_count = row_filter_bindings
5572 .iter()
5573 .filter(|binding| binding.negated)
5574 .count();
5575 let trace = EpistemicGpuFinalTupleMaterializationTrace::for_counts(
5576 final_output_columns.len(),
5577 output_row_capacity,
5578 tuple_bytes_capacity,
5579 literal_count,
5580 candidate_count,
5581 reduction_count,
5582 models_per_reduction,
5583 )?
5584 .with_row_filter_counts(row_filter_bindings.len(), negated_row_filter_count)?;
5585 if trace.model_membership_bytes_checked > workspace.layout.model_membership_bytes {
5586 return Err(XlogError::ResourceExhausted {
5587 context: "epistemic GPU final-tuple membership workspace".to_string(),
5588 estimated_bytes: trace.model_membership_bytes_checked as u64,
5589 budget_bytes: workspace.layout.model_membership_bytes as u64,
5590 });
5591 }
5592 if trace.world_view_slots_checked > workspace.layout.world_view_bytes {
5593 return Err(XlogError::ResourceExhausted {
5594 context: "epistemic GPU final-tuple world-view workspace".to_string(),
5595 estimated_bytes: trace.world_view_slots_checked as u64,
5596 budget_bytes: workspace.layout.world_view_bytes as u64,
5597 });
5598 }
5599 if trace.model_membership_bytes_checked > u32::MAX as usize {
5600 return Err(XlogError::ResourceExhausted {
5601 context: "epistemic GPU final-tuple membership launch".to_string(),
5602 estimated_bytes: trace.model_membership_bytes_checked as u64,
5603 budget_bytes: u32::MAX as u64,
5604 });
5605 }
5606
5607 let world_stride =
5608 workspace.layout.world_view_bytes / workspace.layout.rejection_reason_slots;
5609 if world_stride == 0 || world_stride > u32::MAX as usize {
5610 return Err(XlogError::ResourceExhausted {
5611 context: "epistemic GPU final-tuple world stride".to_string(),
5612 estimated_bytes: world_stride as u64,
5613 budget_bytes: u32::MAX as u64,
5614 });
5615 }
5616 let world_stride =
5617 checked_u32_dimension(world_stride, "epistemic GPU final-tuple world stride")?;
5618 let mut metadata_len = 0usize;
5619 for binding in &row_filter_bindings {
5620 metadata_len = checked_sum(metadata_len, binding.key_columns.len())?;
5621 }
5622 let metadata_len = metadata_len.max(1);
5623 let row_filter_metadata_len = row_filter_bindings.len().max(1);
5624 checked_u32_dimension(
5625 metadata_len,
5626 "epistemic GPU final tuple row-filter key metadata",
5627 )?;
5628 checked_u32_dimension(
5629 row_filter_metadata_len,
5630 "epistemic GPU final tuple row-filter metadata",
5631 )?;
5632 let memory = self.provider.memory();
5633 let device = self.provider.device().inner();
5634 let mut tuple_source_row_count_ptrs = memory.alloc::<u64>(row_filter_metadata_len)?;
5635 let mut row_filter_negated = memory.alloc::<u8>(row_filter_metadata_len)?;
5636 let mut row_filter_key_offsets = memory.alloc::<u32>(row_filter_metadata_len)?;
5637 let mut row_filter_key_counts = memory.alloc::<u32>(row_filter_metadata_len)?;
5638 let mut key_col_ptrs = memory.alloc::<u64>(metadata_len)?;
5639 let mut key_col_widths = memory.alloc::<u32>(metadata_len)?;
5640 let mut expected_key_bits = memory.alloc::<u64>(metadata_len)?;
5641 let mut expected_key_type_codes = memory.alloc::<u8>(metadata_len)?;
5642 let mut tuple_key_match_modes = memory.alloc::<u8>(metadata_len)?;
5643 let mut bound_value_col_ptrs = memory.alloc::<u64>(metadata_len)?;
5644 let mut bound_value_col_widths = memory.alloc::<u32>(metadata_len)?;
5645 let row_filter_count = checked_u32_dimension(
5646 row_filter_bindings.len(),
5647 "epistemic GPU final tuple row-filter count",
5648 )?;
5649 let mut tuple_source_row_counts = Vec::with_capacity(row_filter_bindings.len());
5650
5651 if !row_filter_bindings.is_empty() {
5652 let mut tuple_source_row_count_ptrs_host =
5653 Vec::with_capacity(row_filter_bindings.len());
5654 let mut row_filter_negated_host = Vec::with_capacity(row_filter_bindings.len());
5655 let mut row_filter_key_offsets_host = Vec::with_capacity(row_filter_bindings.len());
5656 let mut row_filter_key_counts_host = Vec::with_capacity(row_filter_bindings.len());
5657 let mut key_col_ptrs_host = Vec::with_capacity(metadata_len);
5658 let mut key_col_widths_host = Vec::with_capacity(metadata_len);
5659 let mut expected_key_bits_host = Vec::with_capacity(metadata_len);
5660 let mut expected_key_type_codes_host = Vec::with_capacity(metadata_len);
5661 let mut tuple_key_match_modes_host = Vec::with_capacity(metadata_len);
5662 let mut bound_value_col_ptrs_host = Vec::with_capacity(metadata_len);
5663 let mut bound_value_col_widths_host = Vec::with_capacity(metadata_len);
5664
5665 for binding in &row_filter_bindings {
5666 let row_filter_key_offset = checked_u32_dimension(
5667 key_col_ptrs_host.len(),
5668 "epistemic GPU final tuple row-filter key offset",
5669 )?;
5670 let row_filter_key_count = checked_u32_dimension(
5671 binding.key_columns.len(),
5672 "epistemic GPU final tuple row-filter key arity",
5673 )?;
5674 row_filter_key_offsets_host.push(row_filter_key_offset);
5675 row_filter_key_counts_host.push(row_filter_key_count);
5676 row_filter_negated_host.push(binding.negated as u8);
5677
5678 let source_relation = self
5679 .resolve_modal_tuple_source(binding.predicate.as_str(), binding.arity)
5680 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
5681 construct: "epistemic GPU final tuple row filtering".to_string(),
5682 context: format!(
5683 "missing tuple source relation {} (arity {}) for final row filter",
5684 binding.predicate, binding.arity
5685 ),
5686 })?;
5687 let tuple_source_row_count = self.clone_device_row_count(source_relation)?;
5688 tuple_source_row_count_ptrs_host.push(*tuple_source_row_count.device_ptr());
5689 tuple_source_row_counts.push(tuple_source_row_count);
5690
5691 for (term_index, &key_col) in binding.key_columns.iter().enumerate() {
5692 let key_col_ref = source_relation.column(key_col).ok_or_else(|| {
5693 XlogError::UnsupportedEpistemicConstruct {
5694 construct: "epistemic GPU final tuple row filtering".to_string(),
5695 context: format!(
5696 "tuple source relation {} missing key column {}",
5697 binding.predicate, key_col
5698 ),
5699 }
5700 })?;
5701 let key_col_type =
5702 source_relation
5703 .schema()
5704 .column_type(key_col)
5705 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
5706 construct: "epistemic GPU final tuple row filtering".to_string(),
5707 context: format!(
5708 "tuple source relation {} missing schema for key column {}",
5709 binding.predicate, key_col
5710 ),
5711 })?;
5712 let key_col_width = key_col_type.size_bytes();
5713 if key_col_width > u32::MAX as usize {
5714 return Err(XlogError::ResourceExhausted {
5715 context: "epistemic GPU final tuple row-filter key width".to_string(),
5716 estimated_bytes: key_col_width as u64,
5717 budget_bytes: u32::MAX as u64,
5718 });
5719 }
5720
5721 key_col_ptrs_host.push(*key_col_ref.device_ptr());
5722 key_col_widths_host.push(key_col_width as u32);
5723 match &binding.key_terms[term_index] {
5724 EirTerm::Variable(variable_name) => {
5725 let bound_col_index = binding.bound_output_columns[term_index]
5726 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
5727 construct: "epistemic GPU final tuple row filtering"
5728 .to_string(),
5729 context: format!(
5730 "tuple key variable {variable_name} has no reduced \
5731 output column binding"
5732 ),
5733 })?;
5734 let bound_col = output.column(bound_col_index).ok_or_else(|| {
5735 XlogError::UnsupportedEpistemicConstruct {
5736 construct: "epistemic GPU final tuple row filtering"
5737 .to_string(),
5738 context: format!(
5739 "reduced output missing device column {bound_col_index} \
5740 for variable {variable_name}"
5741 ),
5742 }
5743 })?;
5744 let bound_col_type = output
5745 .schema()
5746 .column_type(bound_col_index)
5747 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
5748 construct: "epistemic GPU final tuple row filtering"
5749 .to_string(),
5750 context: format!(
5751 "reduced output missing schema for variable \
5752 {variable_name}"
5753 ),
5754 })?;
5755 if bound_col_type != key_col_type {
5756 return Err(XlogError::UnsupportedEpistemicConstruct {
5757 construct: "epistemic GPU final tuple row filtering"
5758 .to_string(),
5759 context: format!(
5760 "bound variable {variable_name} has output type \
5761 {bound_col_type:?}, but tuple source {} key column {} \
5762 has type {key_col_type:?}",
5763 binding.predicate, key_col
5764 ),
5765 });
5766 }
5767 let bound_col_width = bound_col_type.size_bytes();
5768 if bound_col_width > u32::MAX as usize {
5769 return Err(XlogError::ResourceExhausted {
5770 context: "epistemic GPU final tuple row-filter bound width"
5771 .to_string(),
5772 estimated_bytes: bound_col_width as u64,
5773 budget_bytes: u32::MAX as u64,
5774 });
5775 }
5776 expected_key_bits_host.push(0);
5777 expected_key_type_codes_host.push(key_col_type.to_code());
5778 tuple_key_match_modes_host.push(TUPLE_KEY_MATCH_MODE_BOUND_OUTPUT);
5779 bound_value_col_ptrs_host.push(*bound_col.device_ptr());
5780 bound_value_col_widths_host.push(bound_col_width as u32);
5781 }
5782 EirTerm::Anonymous => {
5783 expected_key_bits_host.push(0);
5786 expected_key_type_codes_host.push(key_col_type.to_code());
5787 tuple_key_match_modes_host.push(TUPLE_KEY_MATCH_MODE_WILDCARD);
5788 bound_value_col_ptrs_host.push(0);
5789 bound_value_col_widths_host.push(0);
5790 }
5791 term => {
5792 let expectation = TupleKeyExpectation::from_term(term, key_col_type)?;
5793 expected_key_bits_host.push(expectation.bits);
5794 expected_key_type_codes_host.push(expectation.type_code);
5795 tuple_key_match_modes_host.push(TUPLE_KEY_MATCH_MODE_GROUND);
5796 bound_value_col_ptrs_host.push(0);
5797 bound_value_col_widths_host.push(0);
5798 }
5799 }
5800 }
5801 }
5802
5803 let metadata_context = "epistemic GPU final tuple row-filter metadata";
5804 self.provider
5805 .htod_launch_metadata_sync_copy_into(
5806 &tuple_source_row_count_ptrs_host,
5807 &mut tuple_source_row_count_ptrs,
5808 )
5809 .map_err(|e| {
5810 XlogError::execution_ctx(
5811 metadata_context,
5812 "upload tuple source row-count pointers",
5813 &e,
5814 )
5815 })?;
5816 self.provider
5817 .htod_launch_metadata_sync_copy_into(
5818 &row_filter_negated_host,
5819 &mut row_filter_negated,
5820 )
5821 .map_err(|e| {
5822 XlogError::execution_ctx(metadata_context, "upload row-filter polarity", &e)
5823 })?;
5824 self.provider
5825 .htod_launch_metadata_sync_copy_into(
5826 &row_filter_key_offsets_host,
5827 &mut row_filter_key_offsets,
5828 )
5829 .map_err(|e| {
5830 XlogError::execution_ctx(metadata_context, "upload row-filter key offsets", &e)
5831 })?;
5832 self.provider
5833 .htod_launch_metadata_sync_copy_into(
5834 &row_filter_key_counts_host,
5835 &mut row_filter_key_counts,
5836 )
5837 .map_err(|e| {
5838 XlogError::execution_ctx(metadata_context, "upload row-filter key counts", &e)
5839 })?;
5840 self.provider
5841 .htod_launch_metadata_sync_copy_into(&key_col_ptrs_host, &mut key_col_ptrs)
5842 .map_err(|e| {
5843 XlogError::execution_ctx(metadata_context, "upload key column pointers", &e)
5844 })?;
5845 self.provider
5846 .htod_launch_metadata_sync_copy_into(&key_col_widths_host, &mut key_col_widths)
5847 .map_err(|e| {
5848 XlogError::execution_ctx(metadata_context, "upload key column widths", &e)
5849 })?;
5850 self.provider
5851 .htod_launch_metadata_sync_copy_into(
5852 &expected_key_bits_host,
5853 &mut expected_key_bits,
5854 )
5855 .map_err(|e| {
5856 XlogError::execution_ctx(metadata_context, "upload expected key bits", &e)
5857 })?;
5858 self.provider
5859 .htod_launch_metadata_sync_copy_into(
5860 &expected_key_type_codes_host,
5861 &mut expected_key_type_codes,
5862 )
5863 .map_err(|e| {
5864 XlogError::execution_ctx(metadata_context, "upload expected key type codes", &e)
5865 })?;
5866 self.provider
5867 .htod_launch_metadata_sync_copy_into(
5868 &tuple_key_match_modes_host,
5869 &mut tuple_key_match_modes,
5870 )
5871 .map_err(|e| {
5872 XlogError::execution_ctx(metadata_context, "upload tuple key match modes", &e)
5873 })?;
5874 self.provider
5875 .htod_launch_metadata_sync_copy_into(
5876 &bound_value_col_ptrs_host,
5877 &mut bound_value_col_ptrs,
5878 )
5879 .map_err(|e| {
5880 XlogError::execution_ctx(
5881 metadata_context,
5882 "upload bound value column pointers",
5883 &e,
5884 )
5885 })?;
5886 self.provider
5887 .htod_launch_metadata_sync_copy_into(
5888 &bound_value_col_widths_host,
5889 &mut bound_value_col_widths,
5890 )
5891 .map_err(|e| {
5892 XlogError::execution_ctx(
5893 metadata_context,
5894 "upload bound value column widths",
5895 &e,
5896 )
5897 })?;
5898 } else {
5899 let metadata_context = "epistemic GPU final tuple row-filter metadata";
5900 device
5901 .memset_zeros(&mut tuple_source_row_count_ptrs)
5902 .map_err(|e| {
5903 XlogError::execution_ctx(
5904 metadata_context,
5905 "tuple source row-count pointer memset",
5906 &e,
5907 )
5908 })?;
5909 device.memset_zeros(&mut row_filter_negated).map_err(|e| {
5910 XlogError::execution_ctx(metadata_context, "row-filter polarity memset", &e)
5911 })?;
5912 device
5913 .memset_zeros(&mut row_filter_key_offsets)
5914 .map_err(|e| {
5915 XlogError::execution_ctx(metadata_context, "row-filter key offset memset", &e)
5916 })?;
5917 device
5918 .memset_zeros(&mut row_filter_key_counts)
5919 .map_err(|e| {
5920 XlogError::execution_ctx(metadata_context, "row-filter key count memset", &e)
5921 })?;
5922 device.memset_zeros(&mut key_col_ptrs).map_err(|e| {
5923 XlogError::execution_ctx(metadata_context, "key column pointer memset", &e)
5924 })?;
5925 device.memset_zeros(&mut key_col_widths).map_err(|e| {
5926 XlogError::execution_ctx(metadata_context, "key column width memset", &e)
5927 })?;
5928 device.memset_zeros(&mut expected_key_bits).map_err(|e| {
5929 XlogError::execution_ctx(metadata_context, "expected key bits memset", &e)
5930 })?;
5931 device
5932 .memset_zeros(&mut expected_key_type_codes)
5933 .map_err(|e| {
5934 XlogError::execution_ctx(metadata_context, "expected key type code memset", &e)
5935 })?;
5936 device
5937 .memset_zeros(&mut tuple_key_match_modes)
5938 .map_err(|e| {
5939 XlogError::execution_ctx(metadata_context, "tuple key match mode memset", &e)
5940 })?;
5941 device
5942 .memset_zeros(&mut bound_value_col_ptrs)
5943 .map_err(|e| {
5944 XlogError::execution_ctx(
5945 metadata_context,
5946 "bound value column pointer memset",
5947 &e,
5948 )
5949 })?;
5950 device
5951 .memset_zeros(&mut bound_value_col_widths)
5952 .map_err(|e| {
5953 XlogError::execution_ctx(
5954 metadata_context,
5955 "bound value column width memset",
5956 &e,
5957 )
5958 })?;
5959 }
5960
5961 let mut is_constraint_literal = vec![false; literal_count.max(1)];
5979 for constraint in &gpu_plan.constraints {
5980 for &literal_index in &constraint.literal_indices {
5981 if literal_index < literal_count {
5982 is_constraint_literal[literal_index] = true;
5983 }
5984 }
5985 }
5986 let mut gate_literal_required_host = vec![0u8; literal_count.max(1)];
5987 for binding in &gpu_plan.tuple_membership_bindings {
5988 if !binding.bound_output_columns.iter().any(Option::is_some)
5989 && binding.literal_index < literal_count
5990 && !is_constraint_literal[binding.literal_index]
5991 {
5992 gate_literal_required_host[binding.literal_index] = 1u8;
5993 }
5994 }
5995 let mut gate_literal_required = memory.alloc::<u8>(literal_count.max(1))?;
6003 self.provider
6004 .htod_launch_metadata_sync_copy_into(
6005 &gate_literal_required_host,
6006 &mut gate_literal_required,
6007 )
6008 .map_err(|e| {
6009 XlogError::execution_ctx(
6010 "epistemic GPU final tuple gate metadata",
6011 "upload global-gate literal mask",
6012 &e,
6013 )
6014 })?;
6015
6016 let row_map_func = self
6017 .provider
6018 .device()
6019 .inner()
6020 .get_func(
6021 EPISTEMIC_MODULE,
6022 epistemic_kernels::EPISTEMIC_BUILD_FINAL_TUPLE_ROW_MAP_U8,
6023 )
6024 .ok_or_else(|| {
6025 XlogError::Execution("epistemic final tuple row-map kernel not found".to_string())
6026 })?;
6027 let close_rejections_func = self
6028 .provider
6029 .device()
6030 .inner()
6031 .get_func(
6032 EPISTEMIC_MODULE,
6033 epistemic_kernels::EPISTEMIC_CLOSE_FINAL_TUPLE_REJECTIONS_U8,
6034 )
6035 .ok_or_else(|| {
6036 XlogError::Execution(
6037 "epistemic final tuple rejection-close kernel not found".to_string(),
6038 )
6039 })?;
6040 let func = self
6041 .provider
6042 .device()
6043 .inner()
6044 .get_func(
6045 EPISTEMIC_MODULE,
6046 epistemic_kernels::EPISTEMIC_MATERIALIZE_FINAL_TUPLE_COLUMN_U8,
6047 )
6048 .ok_or_else(|| {
6049 XlogError::Execution(
6050 "epistemic final tuple materialization kernel not found".to_string(),
6051 )
6052 })?;
6053
6054 let mut kernel_timings = Vec::with_capacity(checked_sum(source_columns.len(), 2)?);
6055 let row_map_timing = self.time_epistemic_gpu_kernel_launch(
6056 "epistemic GPU final tuple row map",
6057 || unsafe {
6058 self.provider
6059 .device()
6060 .inner()
6061 .memset_zeros(&mut final_row_count)?;
6062 self.provider.device().inner().memset_zeros(&mut row_map)?;
6063 let mut row_map_params: Vec<*mut c_void> = vec![
6064 output_row_capacity_u32.as_kernel_param(),
6065 literal_count_u32.as_kernel_param(),
6066 candidate_count_u32.as_kernel_param(),
6067 reduction_count_u32.as_kernel_param(),
6068 models_per_reduction_u32.as_kernel_param(),
6069 world_stride.as_kernel_param(),
6070 output.num_rows_device().as_kernel_param(),
6071 (&workspace.rejection_reasons).as_kernel_param(),
6072 (&workspace.model_membership).as_kernel_param(),
6073 (&workspace.world_views).as_kernel_param(),
6074 (&tuple_source_row_count_ptrs).as_kernel_param(),
6075 (&row_filter_negated).as_kernel_param(),
6076 (&row_filter_key_offsets).as_kernel_param(),
6077 (&row_filter_key_counts).as_kernel_param(),
6078 (&key_col_ptrs).as_kernel_param(),
6079 (&key_col_widths).as_kernel_param(),
6080 (&expected_key_bits).as_kernel_param(),
6081 (&expected_key_type_codes).as_kernel_param(),
6082 (&tuple_key_match_modes).as_kernel_param(),
6083 (&bound_value_col_ptrs).as_kernel_param(),
6084 (&bound_value_col_widths).as_kernel_param(),
6085 row_filter_count.as_kernel_param(),
6086 (&row_map).as_kernel_param(),
6087 (&final_row_count).as_kernel_param(),
6088 (&workspace.candidate_assumptions).as_kernel_param(),
6089 (&gate_literal_required).as_kernel_param(),
6090 ];
6091 row_map_func.clone().launch(
6092 LaunchConfig::for_num_elems(output_row_capacity_u32.max(1)),
6093 &mut row_map_params,
6094 )?;
6095 Ok(())
6096 },
6097 )?;
6098 kernel_timings.push(row_map_timing);
6099
6100 let close_rejections_timing = self.time_epistemic_gpu_kernel_launch(
6101 "epistemic GPU final tuple rejection closeout",
6102 || unsafe {
6103 let mut close_rejections_params: Vec<*mut c_void> = vec![
6104 candidate_count_u32.as_kernel_param(),
6105 world_stride.as_kernel_param(),
6106 (&final_row_count).as_kernel_param(),
6107 (&workspace.rejection_reasons).as_kernel_param(),
6108 (&workspace.world_views).as_kernel_param(),
6109 ];
6110 close_rejections_func.clone().launch(
6111 LaunchConfig::for_num_elems(candidate_count_u32.max(1)),
6112 &mut close_rejections_params,
6113 )?;
6114 Ok(())
6115 },
6116 )?;
6117 kernel_timings.push(close_rejections_timing);
6118
6119 for ((src_col, column_byte_len, column_row_width), dst_col) in
6120 source_columns.iter().zip(result_columns_raw.iter_mut())
6121 {
6122 let column_timing = self.time_epistemic_gpu_kernel_launch(
6123 "epistemic GPU final tuple column materialization",
6124 || unsafe {
6125 let mut params: Vec<*mut c_void> = vec![
6130 column_byte_len.as_kernel_param(),
6131 column_row_width.as_kernel_param(),
6132 literal_count_u32.as_kernel_param(),
6133 candidate_count_u32.as_kernel_param(),
6134 reduction_count_u32.as_kernel_param(),
6135 models_per_reduction_u32.as_kernel_param(),
6136 world_stride.as_kernel_param(),
6137 output.num_rows_device().as_kernel_param(),
6138 (&workspace.rejection_reasons).as_kernel_param(),
6139 (&workspace.model_membership).as_kernel_param(),
6140 (&workspace.world_views).as_kernel_param(),
6141 (&row_map).as_kernel_param(),
6142 (*src_col).as_kernel_param(),
6143 dst_col.as_kernel_param(),
6144 (&final_row_count).as_kernel_param(),
6145 ];
6146 func.clone().launch(
6147 LaunchConfig::for_num_elems((*column_byte_len).max(1)),
6148 &mut params,
6149 )?;
6150 Ok(())
6151 },
6152 )?;
6153 kernel_timings.push(column_timing);
6154 }
6155 let kernel_timing = EpistemicGpuKernelTimingTrace::checked_sum(kernel_timings)?;
6156
6157 let result_columns: Vec<CudaColumn> =
6158 result_columns_raw.into_iter().map(Into::into).collect();
6159 let final_schema = Schema::new(final_schema_columns)
6160 .with_sort_labels(final_schema_sort_labels)
6161 .map_err(|err| XlogError::Execution(format!("epistemic final schema: {err}")))?;
6162 let final_output = CudaBuffer::from_columns(
6163 result_columns,
6164 output.num_rows(),
6165 final_row_count,
6166 final_schema,
6167 );
6168 let final_output = if gpu_plan.final_output_columns.is_none() {
6169 final_output
6170 } else {
6171 self.provider.dedup_full_row(&final_output)?
6172 };
6173
6174 Ok((final_output, trace.with_kernel_timing(kernel_timing)))
6175 }
6176
6177 pub fn prepare_epistemic_gpu_execution(
6179 &self,
6180 executable: &EpistemicExecutablePlan,
6181 capacities: EpistemicGpuWorkspaceCapacities,
6182 ) -> Result<EpistemicGpuPreparedExecution> {
6183 let preflight = EpistemicGpuRuntimePreflight::for_executable_plan(executable, capacities)?;
6184 let mut workspace =
6185 self.allocate_epistemic_gpu_workspace(&executable.gpu_plan, capacities)?;
6186 let workspace_reset = self.reset_epistemic_gpu_workspace(&mut workspace)?;
6187
6188 Ok(EpistemicGpuPreparedExecution {
6189 preflight,
6190 tuple_membership_bindings: executable.gpu_plan.tuple_membership_bindings.clone(),
6191 workspace,
6192 workspace_reset,
6193 })
6194 }
6195
6196 fn validate_epistemic_gpu_reduced_constraints(
6197 &self,
6198 executable: &EpistemicExecutablePlan,
6199 ) -> Result<EpistemicGpuConstraintValidationTrace> {
6200 let mut checked_constraint_relations = 0usize;
6201 let mut violated_constraint_relations = 0usize;
6202 let mut row_count_device_reads = 0u32;
6203 let mut violations = Vec::new();
6204
6205 let mut relation_names = Vec::new();
6206 for rule in executable
6207 .reduced_runtime_plan
6208 .rules_by_scc
6209 .iter()
6210 .flatten()
6211 {
6212 if rule.head.starts_with(XLOG_CONSTRAINT_RELATION_PREFIX)
6213 && !relation_names.iter().any(|name| name == &rule.head)
6214 {
6215 relation_names.push(rule.head.as_str());
6216 }
6217 }
6218
6219 for relation_name in relation_names {
6220 checked_constraint_relations += 1;
6221 let relation = self.store().get(relation_name).ok_or_else(|| {
6222 XlogError::Execution(format!(
6223 "missing reduced constraint relation {relation_name} after production runtime \
6224 dispatch"
6225 ))
6226 })?;
6227 let row_count_was_cached = relation.cached_row_count().is_some();
6228 let rows = self.provider.device_row_count(relation)?;
6229 row_count_device_reads += u32::from(!row_count_was_cached);
6230 if rows > 0 {
6231 violated_constraint_relations += 1;
6232 violations.push(format!("{relation_name}={rows}"));
6233 }
6234 }
6235
6236 if !violations.is_empty() {
6237 return Err(XlogError::Execution(format!(
6238 "epistemic GPU reduced constraint violation: {}",
6239 violations.join(", ")
6240 )));
6241 }
6242
6243 Ok(EpistemicGpuConstraintValidationTrace {
6244 checked_constraint_relations,
6245 violated_constraint_relations,
6246 row_count_device_reads,
6247 })
6248 }
6249
6250 pub fn materialize_epistemic_head_relation(
6263 &mut self,
6264 name: &str,
6265 gated_output: &CudaBuffer,
6266 ) -> Result<()> {
6267 let cloned = self.clone_buffer(gated_output)?;
6268 self.put_relation(name, cloned);
6269 Ok(())
6270 }
6271
6272 pub fn clone_store_relation(&self, buffer: &CudaBuffer) -> Result<CudaBuffer> {
6276 self.clone_buffer(buffer)
6277 }
6278
6279 pub fn execute_epistemic_gpu_execution(
6281 &mut self,
6282 executable: &EpistemicExecutablePlan,
6283 capacities: EpistemicGpuWorkspaceCapacities,
6284 ) -> Result<EpistemicGpuExecutionResult> {
6285 let mut prepared = self.prepare_epistemic_gpu_execution(executable, capacities)?;
6286 let literal_count = executable.gpu_plan.epistemic_literals.len();
6287 let candidate_count = bounded_candidate_count(literal_count, capacities.max_candidates)?;
6288 let transfer_budget_start = self.provider.host_transfer_stats();
6289 let launch_metadata_transfer_start = self.provider.host_launch_metadata_transfer_stats();
6290 let candidate_generation = self.generate_epistemic_gpu_candidates(
6291 &mut prepared.workspace,
6292 literal_count,
6293 candidate_count,
6294 )?;
6295 let propagation = self.propagate_epistemic_gpu_candidates(
6296 &mut prepared.workspace,
6297 literal_count,
6298 candidate_count,
6299 )?;
6300 let candidate_validation = self.validate_epistemic_gpu_candidates(
6301 &mut prepared.workspace,
6302 literal_count,
6303 candidate_count,
6304 )?;
6305 let counters_before = self.epistemic_gpu_runtime_counters();
6306 let _reduced_return = self.execute_plan(&executable.reduced_runtime_plan)?;
6307 let counters_after = self.epistemic_gpu_runtime_counters();
6308 let trace = EpistemicGpuRuntimeTrace::try_from_preflight_and_counters(
6309 prepared.preflight,
6310 counters_before,
6311 counters_after,
6312 )?;
6313 trace.require_wcoj_certification()?;
6314 let output_relation = executable
6315 .gpu_plan
6316 .reductions
6317 .last()
6318 .ok_or_else(|| XlogError::UnsupportedEpistemicConstruct {
6319 construct: "epistemic GPU reduced output".to_string(),
6320 context: "executable plan has no epistemic reductions".to_string(),
6321 })?
6322 .head_predicate
6323 .as_str();
6324 let output = {
6325 let reduced_output = self.store().get(output_relation).ok_or_else(|| {
6326 XlogError::UnsupportedEpistemicConstruct {
6327 construct: "epistemic GPU reduced output".to_string(),
6328 context: format!(
6329 "missing reduced output relation {output_relation} after production \
6330 runtime dispatch"
6331 ),
6332 }
6333 })?;
6334 self.clone_buffer(reduced_output)?
6335 };
6336 let model_membership = self.populate_epistemic_gpu_model_membership_from_tuple_sources(
6337 &mut prepared.workspace,
6338 &output,
6339 &executable.gpu_plan,
6340 candidate_count,
6341 capacities.max_models_per_reduction,
6342 )?;
6343 model_membership.require_stable_model_tuple_source()?;
6344 let expected_tuple_key_column_reads =
6345 expected_tuple_key_column_reads(&executable.gpu_plan.tuple_membership_bindings)?;
6346 model_membership.require_planned_tuple_key_column_reads(expected_tuple_key_column_reads)?;
6347 let world_view_validation = self.validate_epistemic_gpu_world_views(
6348 &mut prepared.workspace,
6349 &executable.gpu_plan,
6350 candidate_count,
6351 capacities.max_models_per_reduction,
6352 )?;
6353 let constraint_world_view_validation = self.validate_epistemic_gpu_world_view_constraints(
6354 &mut prepared.workspace,
6355 &executable.gpu_plan,
6356 candidate_count,
6357 )?;
6358 let materialization =
6359 self.materialize_epistemic_gpu_candidates(&mut prepared.workspace, candidate_count)?;
6360 let final_result_materialization = self.materialize_epistemic_gpu_final_results(
6361 &mut prepared.workspace,
6362 &output,
6363 candidate_count,
6364 )?;
6365 let head_reductions = epistemic_head_reduction_indices(&executable.gpu_plan);
6370 let is_multi_head = head_reductions.len() > 1;
6371 let primary_head_filter = if is_multi_head {
6372 head_reductions.get(output_relation).cloned()
6373 } else {
6374 None
6375 };
6376 let (final_output, final_tuple_materialization) = self
6377 .materialize_epistemic_gpu_final_tuples_scoped(
6378 &mut prepared.workspace,
6379 &output,
6380 &executable.gpu_plan,
6381 literal_count,
6382 candidate_count,
6383 executable.gpu_plan.reductions.len(),
6384 capacities.max_models_per_reduction,
6385 primary_head_filter.as_ref(),
6386 )?;
6387 let mut additional_head_outputs: Vec<(String, CudaBuffer)> = Vec::new();
6392 if is_multi_head {
6393 for (head, reductions) in &head_reductions {
6394 if head.as_str() == output_relation {
6395 continue;
6396 }
6397 let head_output = {
6398 let reduced_head = self.store().get(head.as_str()).ok_or_else(|| {
6399 XlogError::UnsupportedEpistemicConstruct {
6400 construct: "epistemic GPU reduced output".to_string(),
6401 context: format!(
6402 "missing reduced output relation {head} after production runtime \
6403 dispatch for joint multi-head materialization"
6404 ),
6405 }
6406 })?;
6407 self.clone_buffer(reduced_head)?
6408 };
6409 let (head_final_output, _head_trace) = self
6410 .materialize_epistemic_gpu_final_tuples_scoped(
6411 &mut prepared.workspace,
6412 &head_output,
6413 &executable.gpu_plan,
6414 literal_count,
6415 candidate_count,
6416 executable.gpu_plan.reductions.len(),
6417 capacities.max_models_per_reduction,
6418 Some(reductions),
6419 )?;
6420 additional_head_outputs.push((head.clone(), head_final_output));
6421 }
6422 }
6423 let tuple_evidence_output = if executable.gpu_plan.final_output_columns.is_some() {
6424 let mut evidence_plan = executable.gpu_plan.clone();
6425 evidence_plan.final_output_columns = None;
6426 let (evidence_output, _) = self.materialize_epistemic_gpu_final_tuples(
6427 &mut prepared.workspace,
6428 &output,
6429 &evidence_plan,
6430 literal_count,
6431 candidate_count,
6432 executable.gpu_plan.reductions.len(),
6433 capacities.max_models_per_reduction,
6434 )?;
6435 Some(evidence_output)
6436 } else {
6437 None
6438 };
6439 let transfer_budget_end = self.provider.host_transfer_stats();
6440 let launch_metadata_transfer_end = self.provider.host_launch_metadata_transfer_stats();
6441 let transfer_budget =
6442 EpistemicGpuTransferBudgetTrace::from_host_transfer_stats_with_launch_metadata(
6443 candidate_count,
6444 transfer_budget_start,
6445 transfer_budget_end,
6446 launch_metadata_transfer_start,
6447 launch_metadata_transfer_end,
6448 )?;
6449 let final_result_transfer =
6450 EpistemicGpuFinalResultTransferTrace::from_final_output(&self.provider, &final_output)?;
6451 final_tuple_materialization.require_row_filter_materialization_evidence(
6452 "epistemic GPU final tuple materialization",
6453 final_result_transfer.final_output_rows,
6454 )?;
6455 let constraint_validation = self.validate_epistemic_gpu_reduced_constraints(executable)?;
6456 let semantic_trace = EpistemicGpuSemanticTrace::from_device_rejection_reasons(
6457 &self.provider,
6458 &prepared.workspace,
6459 &candidate_generation,
6460 &propagation,
6461 &model_membership,
6462 &world_view_validation,
6463 )?;
6464
6465 Ok(EpistemicGpuExecutionResult {
6466 provider_identity: EpistemicGpuProviderIdentity::from_provider(&self.provider),
6467 prepared,
6468 candidate_generation,
6469 propagation,
6470 candidate_validation,
6471 model_membership,
6472 world_view_validation,
6473 constraint_world_view_validation,
6474 materialization,
6475 final_result_materialization,
6476 final_tuple_materialization,
6477 transfer_budget,
6478 final_result_transfer,
6479 constraint_validation,
6480 semantic_trace,
6481 tuple_membership_bindings: executable.gpu_plan.tuple_membership_bindings.clone(),
6482 final_output,
6483 additional_head_outputs,
6484 tuple_evidence_output,
6485 output,
6486 trace,
6487 })
6488 }
6489
6490 pub fn execute_epistemic_gpu_execution_batch(
6498 &mut self,
6499 executables: &[&EpistemicExecutablePlan],
6500 capacities: EpistemicGpuWorkspaceCapacities,
6501 ) -> Result<Vec<EpistemicGpuExecutionResult>> {
6502 if executables.is_empty() {
6503 return Err(XlogError::UnsupportedEpistemicConstruct {
6504 construct: "epistemic GPU batch execution".to_string(),
6505 context: "batch execution requires at least one executable component".to_string(),
6506 });
6507 }
6508
6509 let mut results = Vec::with_capacity(executables.len());
6510 for executable in executables {
6511 results.push(self.execute_epistemic_gpu_execution(executable, capacities)?);
6512 }
6513 Ok(results)
6514 }
6515
6516 pub fn execute_epistemic_gpu_execution_batch_with_trace(
6523 &mut self,
6524 executables: &[&EpistemicExecutablePlan],
6525 capacities: EpistemicGpuWorkspaceCapacities,
6526 ) -> Result<EpistemicGpuBatchExecutionResult> {
6527 let results = self.execute_epistemic_gpu_execution_batch(executables, capacities)?;
6528 let trace = EpistemicGpuBatchExecutionTrace::try_from_component_results(&results)?;
6529 Ok(EpistemicGpuBatchExecutionResult { results, trace })
6530 }
6531}
6532
6533#[derive(Default)]
6534struct RuntimeRouteSummary {
6535 multiway_reduction_count: usize,
6536 kclique_wcoj_plan_count: usize,
6537 wcoj_triangle_route_count: usize,
6538 wcoj_4cycle_route_count: usize,
6539 free_join_route_count: usize,
6540 kclique_wcoj_plan_count_by_arity: [usize; 4],
6541 kclique_wcoj_max_arity: u8,
6542 kclique_wcoj_edge_permutation_count: usize,
6543 kclique_stream_groups: BTreeSet<StreamGroupId>,
6544 kclique_skew_scheduled_plan_count: usize,
6545 planned_hash_route_count: usize,
6546 planned_hash_planner_wins_count: usize,
6547 planned_hash_incomplete_stats_count: usize,
6548 planned_hash_cost_evidence_count: usize,
6549 sorted_layout_requirement_count: usize,
6550 helper_split_spec_count: usize,
6551}
6552
6553fn summarize_runtime_routes(node: &RirNode, routes: &mut RuntimeRouteSummary) {
6554 match node {
6555 RirNode::MultiWayJoin { inputs, plan, .. } => {
6556 routes.multiway_reduction_count += 1;
6557 match plan {
6558 Some(MultiwayPlan::WcojWithPlan(order)) => {
6559 routes.kclique_wcoj_plan_count += 1;
6560 if let Some(slot) = usize::from(order.k).checked_sub(5) {
6561 if slot < routes.kclique_wcoj_plan_count_by_arity.len() {
6562 routes.kclique_wcoj_plan_count_by_arity[slot] += 1;
6563 }
6564 }
6565 routes.kclique_wcoj_max_arity = routes.kclique_wcoj_max_arity.max(order.k);
6566 routes.kclique_wcoj_edge_permutation_count += order
6567 .edge_permutation
6568 .iter()
6569 .take_while(|slot| **slot != u8::MAX)
6570 .count();
6571 routes.kclique_stream_groups.insert(order.stream_group);
6572 if !order.helper_split_specs.is_empty() {
6573 routes.kclique_skew_scheduled_plan_count += 1;
6574 }
6575 routes.sorted_layout_requirement_count +=
6576 order.sorted_layout_requirements.edge_slots.len();
6577 routes.helper_split_spec_count += order.helper_split_specs.len();
6578 }
6579 Some(MultiwayPlan::PlannedHashRoute {
6580 reason,
6581 planner_evidence,
6582 }) => {
6583 routes.planned_hash_route_count += 1;
6584 match reason {
6585 PlannedHashReason::PlannerPredictsHashWins => {
6586 routes.planned_hash_planner_wins_count += 1;
6587 if planner_evidence.wcoj_cost.is_finite()
6588 && planner_evidence.hash_cost.is_finite()
6589 && planner_evidence.hash_cost <= planner_evidence.wcoj_cost
6590 {
6591 routes.planned_hash_cost_evidence_count += 1;
6592 }
6593 }
6594 PlannedHashReason::IncompleteStatsSafeDefault => {
6595 routes.planned_hash_incomplete_stats_count += 1;
6596 }
6597 }
6598 }
6599 Some(MultiwayPlan::FreeJoin) => {
6605 routes.free_join_route_count += 1;
6606 }
6607 None => {
6608 if super::wcoj_dispatch::match_multiway_triangle(node).is_some() {
6609 routes.wcoj_triangle_route_count += 1;
6610 } else if super::wcoj_dispatch::match_multiway_4cycle(node).is_some() {
6611 routes.wcoj_4cycle_route_count += 1;
6612 }
6613 }
6614 }
6615
6616 for input in inputs {
6617 summarize_runtime_routes(input, routes);
6618 }
6619 }
6620 RirNode::Filter { input, .. }
6621 | RirNode::Project { input, .. }
6622 | RirNode::Distinct { input, .. }
6623 | RirNode::GroupBy { input, .. } => summarize_runtime_routes(input, routes),
6624 RirNode::Join { left, right, .. } | RirNode::Diff { left, right } => {
6625 summarize_runtime_routes(left, routes);
6626 summarize_runtime_routes(right, routes);
6627 }
6628 RirNode::Union { inputs } => {
6629 for input in inputs {
6630 summarize_runtime_routes(input, routes);
6631 }
6632 }
6633 RirNode::Fixpoint {
6634 base, recursive, ..
6635 } => {
6636 summarize_runtime_routes(base, routes);
6637 summarize_runtime_routes(recursive, routes);
6638 }
6639 RirNode::ChainJoin { left, right, .. } => {
6640 summarize_runtime_routes(left, routes);
6641 summarize_runtime_routes(right, routes);
6642 }
6643 RirNode::TensorMaskedJoin { .. } | RirNode::Scan { .. } | RirNode::Unit => {}
6644 }
6645}
6646
6647fn helper_relation_ids(executable: &EpistemicExecutablePlan) -> BTreeSet<RelId> {
6648 executable
6649 .relation_ids
6650 .iter()
6651 .filter_map(|(name, rel)| name.starts_with("__kclique_helper_").then_some(*rel))
6652 .collect()
6653}
6654
6655fn count_helper_relation_scans(node: &RirNode, helper_relations: &BTreeSet<RelId>) -> usize {
6656 match node {
6657 RirNode::Scan { .. } => 0,
6658 RirNode::MultiWayJoin { plan, inputs, .. } => {
6659 let own_wcoj_inputs = if matches!(plan, Some(MultiwayPlan::WcojWithPlan(_))) {
6660 inputs
6661 .iter()
6662 .map(|input| count_helper_relation_leaf_scans(input, helper_relations))
6663 .sum()
6664 } else {
6665 0
6666 };
6667 own_wcoj_inputs
6668 + inputs
6669 .iter()
6670 .map(|input| count_helper_relation_scans(input, helper_relations))
6671 .sum::<usize>()
6672 }
6673 RirNode::Filter { input, .. }
6674 | RirNode::Project { input, .. }
6675 | RirNode::Distinct { input, .. }
6676 | RirNode::GroupBy { input, .. } => count_helper_relation_scans(input, helper_relations),
6677 RirNode::Join { left, right, .. } | RirNode::Diff { left, right } => {
6678 count_helper_relation_scans(left, helper_relations)
6679 + count_helper_relation_scans(right, helper_relations)
6680 }
6681 RirNode::Union { inputs } => inputs
6682 .iter()
6683 .map(|input| count_helper_relation_scans(input, helper_relations))
6684 .sum(),
6685 RirNode::Fixpoint {
6686 base, recursive, ..
6687 } => {
6688 count_helper_relation_scans(base, helper_relations)
6689 + count_helper_relation_scans(recursive, helper_relations)
6690 }
6691 RirNode::ChainJoin { left, right, .. } => {
6692 count_helper_relation_scans(left, helper_relations)
6693 + count_helper_relation_scans(right, helper_relations)
6694 }
6695 RirNode::TensorMaskedJoin { .. } | RirNode::Unit => 0,
6696 }
6697}
6698
6699fn count_helper_relation_leaf_scans(node: &RirNode, helper_relations: &BTreeSet<RelId>) -> usize {
6700 match node {
6701 RirNode::Scan { rel } => usize::from(helper_relations.contains(rel)),
6702 RirNode::Filter { input, .. }
6703 | RirNode::Project { input, .. }
6704 | RirNode::Distinct { input, .. }
6705 | RirNode::GroupBy { input, .. } => {
6706 count_helper_relation_leaf_scans(input, helper_relations)
6707 }
6708 RirNode::Join { left, right, .. } | RirNode::Diff { left, right } => {
6709 count_helper_relation_leaf_scans(left, helper_relations)
6710 + count_helper_relation_leaf_scans(right, helper_relations)
6711 }
6712 RirNode::Union { inputs } => inputs
6713 .iter()
6714 .map(|input| count_helper_relation_leaf_scans(input, helper_relations))
6715 .sum(),
6716 RirNode::Fixpoint {
6717 base, recursive, ..
6718 } => {
6719 count_helper_relation_leaf_scans(base, helper_relations)
6720 + count_helper_relation_leaf_scans(recursive, helper_relations)
6721 }
6722 RirNode::MultiWayJoin { inputs, .. } => inputs
6723 .iter()
6724 .map(|input| count_helper_relation_leaf_scans(input, helper_relations))
6725 .sum(),
6726 RirNode::ChainJoin { left, right, .. } => {
6727 count_helper_relation_leaf_scans(left, helper_relations)
6728 + count_helper_relation_leaf_scans(right, helper_relations)
6729 }
6730 RirNode::TensorMaskedJoin { .. } | RirNode::Unit => 0,
6731 }
6732}
6733
6734fn require_positive(value: usize, context: &str) -> Result<()> {
6735 if value == 0 {
6736 return Err(XlogError::ResourceExhausted {
6737 context: context.to_string(),
6738 estimated_bytes: 0,
6739 budget_bytes: 1,
6740 });
6741 }
6742 Ok(())
6743}
6744
6745fn checked_u32_dimension(value: usize, context: &str) -> Result<u32> {
6746 u32::try_from(value).map_err(|_| XlogError::ResourceExhausted {
6747 context: context.to_string(),
6748 estimated_bytes: value as u64,
6749 budget_bytes: u32::MAX as u64,
6750 })
6751}
6752
6753fn epistemic_head_reduction_indices(
6759 gpu_plan: &EpistemicGpuPlan,
6760) -> std::collections::BTreeMap<String, BTreeSet<usize>> {
6761 let mut heads: std::collections::BTreeMap<String, BTreeSet<usize>> =
6762 std::collections::BTreeMap::new();
6763 for (reduction_index, reduction) in gpu_plan.reductions.iter().enumerate() {
6764 heads
6765 .entry(reduction.head_predicate.clone())
6766 .or_default()
6767 .insert(reduction_index);
6768 }
6769 heads
6770}
6771
6772fn final_output_columns_for_materialization(
6773 output: &CudaBuffer,
6774 gpu_plan: &EpistemicGpuPlan,
6775 head_reduction_filter: Option<&BTreeSet<usize>>,
6776) -> Result<Vec<usize>> {
6777 if let Some(filter) = head_reduction_filter {
6787 if let Some(public_head_arity) = gpu_plan
6788 .reductions
6789 .iter()
6790 .enumerate()
6791 .filter(|(reduction_index, _)| filter.contains(reduction_index))
6792 .map(|(_, reduction)| reduction.public_head_arity)
6793 .max()
6794 {
6795 if public_head_arity > output.arity() {
6796 return Err(XlogError::UnsupportedEpistemicConstruct {
6797 construct: "epistemic GPU final output projection".to_string(),
6798 context: format!(
6799 "per-head public arity {} exceeds reduced output arity {} for the \
6800 joint multi-head materialization",
6801 public_head_arity,
6802 output.arity()
6803 ),
6804 });
6805 }
6806 return Ok((0..public_head_arity).collect());
6807 }
6808 }
6809
6810 let Some(final_output_columns) = &gpu_plan.final_output_columns else {
6811 return Ok((0..output.arity()).collect());
6812 };
6813
6814 let mut seen = vec![false; output.arity()];
6815 for &column in final_output_columns {
6816 if column >= output.arity() {
6817 return Err(XlogError::UnsupportedEpistemicConstruct {
6818 construct: "epistemic GPU final output projection".to_string(),
6819 context: format!(
6820 "final output column {} exceeds reduced output arity {}",
6821 column,
6822 output.arity()
6823 ),
6824 });
6825 }
6826 if seen[column] {
6827 return Err(XlogError::UnsupportedEpistemicConstruct {
6828 construct: "epistemic GPU final output projection".to_string(),
6829 context: format!("duplicate final output column {}", column),
6830 });
6831 }
6832 seen[column] = true;
6833 }
6834
6835 Ok(final_output_columns.clone())
6836}
6837
6838fn require_u32_launch_bound(value: usize, context: &str) -> Result<()> {
6839 checked_u32_dimension(value, context).map(|_| ())
6840}
6841
6842fn require_u32_launch_dimensions(values: &[usize], context: &str) -> Result<()> {
6843 let max_value = values.iter().copied().max().unwrap_or(0);
6844 require_u32_launch_bound(max_value, context)
6845}
6846
6847fn checked_product(left: usize, right: usize) -> Result<usize> {
6848 left.checked_mul(right).ok_or_else(|| {
6849 XlogError::Kernel(format!(
6850 "epistemic GPU workspace size overflow: {left} * {right}"
6851 ))
6852 })
6853}
6854
6855fn checked_sum(left: usize, right: usize) -> Result<usize> {
6856 left.checked_add(right).ok_or_else(|| {
6857 XlogError::Kernel(format!(
6858 "epistemic GPU workspace size overflow: {left} + {right}"
6859 ))
6860 })
6861}
6862
6863fn require_epistemic_gpu_kernel_phases(gpu_plan: &EpistemicGpuPlan) -> Result<()> {
6864 let required = [
6865 EpistemicGpuHotPathPhase::CandidateGeneration,
6866 EpistemicGpuHotPathPhase::Propagation,
6867 EpistemicGpuHotPathPhase::CandidateValidation,
6868 EpistemicGpuHotPathPhase::ModelMembership,
6869 EpistemicGpuHotPathPhase::WorldViewValidation,
6870 EpistemicGpuHotPathPhase::ResultMaterialization,
6871 EpistemicGpuHotPathPhase::FinalResultMaterialization,
6872 EpistemicGpuHotPathPhase::FinalTupleMaterialization,
6873 ];
6874
6875 for phase in required {
6876 if !gpu_plan.required_kernel_phases.contains(&phase) {
6877 return Err(XlogError::UnsupportedEpistemicConstruct {
6878 construct: "epistemic GPU kernel phase contract".to_string(),
6879 context: format!(
6880 "accepted GPU execution requires kernel phase {:?}, but the plan declared {:?}",
6881 phase, gpu_plan.required_kernel_phases
6882 ),
6883 });
6884 }
6885 }
6886
6887 Ok(())
6888}
6889
6890fn require_epistemic_gpu_buffer_contract(gpu_plan: &EpistemicGpuPlan) -> Result<()> {
6891 let required = [
6892 EpistemicGpuBufferKind::CandidateAssumptions,
6893 EpistemicGpuBufferKind::WorldViews,
6894 EpistemicGpuBufferKind::ModelMembership,
6895 EpistemicGpuBufferKind::RejectionReasons,
6896 ];
6897
6898 for buffer in required {
6899 if !gpu_plan.required_buffers.contains(&buffer) {
6900 return Err(XlogError::UnsupportedEpistemicConstruct {
6901 construct: "epistemic GPU buffer contract".to_string(),
6902 context: format!(
6903 "accepted GPU execution requires buffer {:?}, but the plan declared {:?}",
6904 buffer, gpu_plan.required_buffers
6905 ),
6906 });
6907 }
6908 }
6909
6910 Ok(())
6911}
6912
6913fn expected_tuple_key_column_reads(bindings: &[EpistemicTupleMembershipBinding]) -> Result<usize> {
6914 bindings.iter().try_fold(0usize, |acc, binding| {
6915 checked_sum(acc, binding.key_columns.len())
6916 })
6917}
6918
6919fn world_view_bitset_bytes_per_candidate(literal_count: usize) -> Result<usize> {
6920 Ok(checked_sum(literal_count, 7)? / 8)
6921}
6922
6923fn epistemic_operator_code(op: EirEpistemicOp) -> u8 {
6924 match op {
6925 EirEpistemicOp::Know => 1,
6926 EirEpistemicOp::Possible => 2,
6927 }
6928}
6929
6930fn bounded_candidate_count(literal_count: usize, max_candidates: usize) -> Result<usize> {
6931 require_positive(literal_count, "epistemic GPU execution literals")?;
6932 require_positive(max_candidates, "epistemic GPU execution candidates")?;
6933 if literal_count > 31 {
6934 return Err(XlogError::UnsupportedEpistemicConstruct {
6935 construct: "epistemic GPU execution candidate generation".to_string(),
6936 context: format!("literal count {literal_count} exceeds 31-bit candidate mask"),
6937 });
6938 }
6939 let required_candidates = 1usize << literal_count;
6940 if max_candidates < required_candidates {
6941 return Err(XlogError::ResourceExhausted {
6942 context: "epistemic GPU execution candidate capacity".to_string(),
6943 estimated_bytes: required_candidates as u64,
6944 budget_bytes: max_candidates as u64,
6945 });
6946 }
6947 Ok(required_candidates)
6948}
6949
6950#[cfg(test)]
6951mod tests {
6952 use super::*;
6953 use xlog_core::ScalarType;
6954 use xlog_ir::EirTerm;
6955
6956 #[test]
6957 fn tuple_key_expectation_encodes_ground_integer_for_u32_column() {
6958 let expectation =
6959 TupleKeyExpectation::from_term(&EirTerm::Integer(42), ScalarType::U32).unwrap();
6960
6961 assert_eq!(
6962 expectation,
6963 TupleKeyExpectation {
6964 bits: 42,
6965 type_code: ScalarType::U32.to_code(),
6966 }
6967 );
6968 }
6969
6970 #[test]
6971 fn tuple_key_expectation_encodes_symbol_for_symbol_column() {
6972 let expectation =
6973 TupleKeyExpectation::from_term(&EirTerm::Symbol(7), ScalarType::Symbol).unwrap();
6974
6975 assert_eq!(
6976 expectation,
6977 TupleKeyExpectation {
6978 bits: 7,
6979 type_code: ScalarType::Symbol.to_code(),
6980 }
6981 );
6982 }
6983
6984 #[test]
6985 fn tuple_key_expectation_rejects_variable_as_ground_expectation() {
6986 let err =
6987 TupleKeyExpectation::from_term(&EirTerm::Variable("X".to_string()), ScalarType::U32)
6988 .expect_err("variable tuple keys require bound-output matching");
6989
6990 match err {
6991 XlogError::UnsupportedEpistemicConstruct { construct, context } => {
6992 assert_eq!(construct, "epistemic GPU tuple-key expectation");
6993 assert!(context.contains("cannot be encoded as a ground tuple-key expectation"));
6994 }
6995 other => panic!("expected tuple-key expectation error, got {other:?}"),
6996 }
6997 }
6998}