pub struct GpuXgcf { /* private fields */ }Implementations§
Source§impl GpuXgcf
impl GpuXgcf
pub fn from_device( builder: GpuCircuitBuilder, layout: GpuCircuitLayout, provider: &CudaKernelProvider, ) -> Result<GpuXgcf>
Sourcepub fn smooth_random_vars_device(
&self,
provider: &CudaKernelProvider,
random_var_list: &TrackedCudaSlice<u32>,
random_var_count: u32,
smooth_node_cap: u32,
smooth_edge_cap: u32,
) -> Result<GpuXgcf>
pub fn smooth_random_vars_device( &self, provider: &CudaKernelProvider, random_var_list: &TrackedCudaSlice<u32>, random_var_count: u32, smooth_node_cap: u32, smooth_edge_cap: u32, ) -> Result<GpuXgcf>
GPU-native smoothing pass for random variables.
Returns a new device-resident circuit that is smooth w.r.t. random_var_list.
This method performs no device->host data-plane transfers and traps on capacity overflow.
pub fn upload(provider: &CudaKernelProvider, circuit: &Xgcf) -> Result<Self>
pub fn max_var(&self) -> u32
Sourcepub fn root(&self) -> u32
pub fn root(&self) -> u32
Root node id of the circuit (XGCF requires exactly one root for evaluation/verification).
Sourcepub fn num_levels(&self) -> u32
pub fn num_levels(&self) -> u32
Number of topological levels in the circuit.
Sourcepub fn num_nodes_device(&self) -> &TrackedCudaSlice<u32>
pub fn num_nodes_device(&self) -> &TrackedCudaSlice<u32>
Device-resident actual node count (len = 1).
Sourcepub fn num_edges_device(&self) -> &TrackedCudaSlice<u32>
pub fn num_edges_device(&self) -> &TrackedCudaSlice<u32>
Device-resident actual edge count (len = 1).
Sourcepub fn level_nodes(&self) -> &TrackedCudaSlice<u32>
pub fn level_nodes(&self) -> &TrackedCudaSlice<u32>
Device-resident level -> node index mapping (len = num_nodes).
Sourcepub fn level_offsets(&self) -> &TrackedCudaSlice<u32>
pub fn level_offsets(&self) -> &TrackedCudaSlice<u32>
Device-resident offsets for each level (len = num_levels + 1).
Sourcepub fn node_type(&self) -> &TrackedCudaSlice<u8>
pub fn node_type(&self) -> &TrackedCudaSlice<u8>
Device-resident node type tags (see XgcfNodeType).
Sourcepub fn child_offsets(&self) -> &TrackedCudaSlice<u32>
pub fn child_offsets(&self) -> &TrackedCudaSlice<u32>
Device-resident CSR child offsets for AND/OR nodes (len = num_nodes + 1).
Sourcepub fn child_indices(&self) -> &TrackedCudaSlice<u32>
pub fn child_indices(&self) -> &TrackedCudaSlice<u32>
Device-resident CSR child indices for AND/OR nodes.
Sourcepub fn lit(&self) -> &TrackedCudaSlice<i32>
pub fn lit(&self) -> &TrackedCudaSlice<i32>
Device-resident literals for LIT nodes (signed DIMACS, 1-based var ids).
Sourcepub fn decision_var(&self) -> &TrackedCudaSlice<u32>
pub fn decision_var(&self) -> &TrackedCudaSlice<u32>
Device-resident decision var ids for DECISION nodes (0 for non-decision).
pub fn decision_child_false(&self) -> &TrackedCudaSlice<u32>
pub fn decision_child_true(&self) -> &TrackedCudaSlice<u32>
Sourcepub fn values(&self) -> &TrackedCudaSlice<f64>
pub fn values(&self) -> &TrackedCudaSlice<f64>
Device-resident per-node values buffer (log-space). Written by forward pass.
Sourcepub fn grad_true(&self) -> &TrackedCudaSlice<f64>
pub fn grad_true(&self) -> &TrackedCudaSlice<f64>
Device-resident gradient buffer for ln(true-weight) per CNF variable.
Sourcepub fn grad_false(&self) -> &TrackedCudaSlice<f64>
pub fn grad_false(&self) -> &TrackedCudaSlice<f64>
Device-resident gradient buffer for ln(false-weight) per CNF variable.
Sourcepub fn var_log_true(&self) -> &TrackedCudaSlice<f64>
pub fn var_log_true(&self) -> &TrackedCudaSlice<f64>
Device-resident log(true-weight) table.
Sourcepub fn var_log_false(&self) -> &TrackedCudaSlice<f64>
pub fn var_log_false(&self) -> &TrackedCudaSlice<f64>
Device-resident log(false-weight) table.
Sourcepub fn var_log_weights_mut(
&mut self,
) -> (&mut TrackedCudaSlice<f64>, &mut TrackedCudaSlice<f64>)
pub fn var_log_weights_mut( &mut self, ) -> (&mut TrackedCudaSlice<f64>, &mut TrackedCudaSlice<f64>)
Mutable access to both log-weight tables (true/false) at once.
This is useful when passing both slices to a single CUDA kernel launch, avoiding
overlapping mutable borrows of self.
Sourcepub fn set_free_var_mask_device(
&mut self,
mask: TrackedCudaSlice<u8>,
) -> Result<()>
pub fn set_free_var_mask_device( &mut self, mask: TrackedCudaSlice<u8>, ) -> Result<()>
Attach a device-resident free-variable mask (length = max_var + 1).
Sourcepub fn set_base_weights(
&mut self,
provider: &CudaKernelProvider,
var_log_weights: &[(f64, f64)],
) -> Result<()>
pub fn set_base_weights( &mut self, provider: &CudaKernelProvider, var_log_weights: &[(f64, f64)], ) -> Result<()>
Upload a host weight table into the device-resident var_log_true/var_log_false buffers.
This is intended for one-time initialization of static weights (evidence + non-neural facts). Neural fast-path updates should overwrite only the relevant subset on GPU.
Sourcepub fn eval_log_wmc_device_inplace(
&mut self,
provider: &CudaKernelProvider,
out_log_z: &mut TrackedCudaSlice<f64>,
) -> Result<()>
pub fn eval_log_wmc_device_inplace( &mut self, provider: &CudaKernelProvider, out_log_z: &mut TrackedCudaSlice<f64>, ) -> Result<()>
Evaluate logZ on the device using the currently loaded weights and write it into out_log_z.
This method performs no device->host transfers.
Sourcepub fn eval_log_wmc_device_into(
&mut self,
provider: &CudaKernelProvider,
var_log_weights: &[(f64, f64)],
out_log_z: &mut TrackedCudaSlice<f64>,
) -> Result<()>
pub fn eval_log_wmc_device_into( &mut self, provider: &CudaKernelProvider, var_log_weights: &[(f64, f64)], out_log_z: &mut TrackedCudaSlice<f64>, ) -> Result<()>
Evaluate logZ on the device and write it into out_log_z (uploads weights from host).
Sourcepub fn eval_log_wmc_device(
&mut self,
provider: &CudaKernelProvider,
var_log_weights: &[(f64, f64)],
) -> Result<TrackedCudaSlice<f64>>
pub fn eval_log_wmc_device( &mut self, provider: &CudaKernelProvider, var_log_weights: &[(f64, f64)], ) -> Result<TrackedCudaSlice<f64>>
Evaluate logZ on the device and return a device-resident scalar (uploads weights from host).
Sourcepub fn eval_grads_inplace(
&mut self,
provider: &CudaKernelProvider,
) -> Result<()>
pub fn eval_grads_inplace( &mut self, provider: &CudaKernelProvider, ) -> Result<()>
Evaluate the circuit and populate grad_true/grad_false on the device (no host reads).
Preconditions:
var_log_true/var_log_falsecontain the current weights on device.- Caller may read back results for testing/debugging, but this API performs no dtoh transfers.