Struct GpuXgcf

Source

pub struct GpuXgcf { /* private fields */ }

Implementations§

Source §

impl GpuXgcf

Source

pub fn from_device( builder: GpuCircuitBuilder, layout: GpuCircuitLayout, provider: &CudaKernelProvider, ) -> Result<GpuXgcf>

Source

pub fn smooth_random_vars_device( &self, provider: &CudaKernelProvider, random_var_list: &TrackedCudaSlice<u32>, random_var_count: u32, smooth_node_cap: u32, smooth_edge_cap: u32, ) -> Result<GpuXgcf>

GPU-native smoothing pass for random variables.

Returns a new device-resident circuit that is smooth w.r.t. random_var_list. This method performs no device->host data-plane transfers and traps on capacity overflow.

Source

pub fn upload(provider: &CudaKernelProvider, circuit: &Xgcf) -> Result<Self>

Source

pub fn max_var(&self) -> u32

Source

pub fn root(&self) -> u32

Root node id of the circuit (XGCF requires exactly one root for evaluation/verification).

Source

pub fn num_nodes(&self) -> usize

Capacity (upper bound) for XGCF nodes in the circuit buffers.

Source

pub fn num_edges(&self) -> usize

Capacity (upper bound) for XGCF edges in the circuit buffers.

Source

pub fn num_levels(&self) -> u32

Number of topological levels in the circuit.

Source

pub fn num_nodes_device(&self) -> &TrackedCudaSlice<u32>

Device-resident actual node count (len = 1).

Source

pub fn num_edges_device(&self) -> &TrackedCudaSlice<u32>

Device-resident actual edge count (len = 1).

Source

pub fn level_nodes(&self) -> &TrackedCudaSlice<u32>

Device-resident level -> node index mapping (len = num_nodes).

Source

pub fn level_offsets(&self) -> &TrackedCudaSlice<u32>

Device-resident offsets for each level (len = num_levels + 1).

Source

pub fn node_type(&self) -> &TrackedCudaSlice<u8>

Device-resident node type tags (see XgcfNodeType).

Source

pub fn child_offsets(&self) -> &TrackedCudaSlice<u32>

Device-resident CSR child offsets for AND/OR nodes (len = num_nodes + 1).

Source

pub fn child_indices(&self) -> &TrackedCudaSlice<u32>

Device-resident CSR child indices for AND/OR nodes.

Source

pub fn lit(&self) -> &TrackedCudaSlice<i32>

Device-resident literals for LIT nodes (signed DIMACS, 1-based var ids).

Source

pub fn decision_var(&self) -> &TrackedCudaSlice<u32>

Device-resident decision var ids for DECISION nodes (0 for non-decision).

Source

pub fn decision_child_false(&self) -> &TrackedCudaSlice<u32>

Source

pub fn decision_child_true(&self) -> &TrackedCudaSlice<u32>

Source

pub fn values(&self) -> &TrackedCudaSlice<f64>

Device-resident per-node values buffer (log-space). Written by forward pass.

Source

pub fn grad_true(&self) -> &TrackedCudaSlice<f64>

Device-resident gradient buffer for ln(true-weight) per CNF variable.

Source

pub fn grad_false(&self) -> &TrackedCudaSlice<f64>

Device-resident gradient buffer for ln(false-weight) per CNF variable.

Source

pub fn var_log_true(&self) -> &TrackedCudaSlice<f64>

Device-resident log(true-weight) table.

Source

pub fn var_log_false(&self) -> &TrackedCudaSlice<f64>

Device-resident log(false-weight) table.

Source

pub fn var_log_weights_mut( &mut self, ) -> (&mut TrackedCudaSlice<f64>, &mut TrackedCudaSlice<f64>)

Mutable access to both log-weight tables (true/false) at once.

This is useful when passing both slices to a single CUDA kernel launch, avoiding overlapping mutable borrows of self.

Source

pub fn set_free_var_mask_device( &mut self, mask: TrackedCudaSlice<u8>, ) -> Result<()>

Attach a device-resident free-variable mask (length = max_var + 1).

Source

pub fn set_base_weights( &mut self, provider: &CudaKernelProvider, var_log_weights: &[(f64, f64)], ) -> Result<()>

Upload a host weight table into the device-resident var_log_true/var_log_false buffers.

This is intended for one-time initialization of static weights (evidence + non-neural facts). Neural fast-path updates should overwrite only the relevant subset on GPU.

Source

pub fn eval_log_wmc_device_inplace( &mut self, provider: &CudaKernelProvider, out_log_z: &mut TrackedCudaSlice<f64>, ) -> Result<()>

Evaluate logZ on the device using the currently loaded weights and write it into out_log_z.

This method performs no device->host transfers.

Source

pub fn eval_log_wmc_device_into( &mut self, provider: &CudaKernelProvider, var_log_weights: &[(f64, f64)], out_log_z: &mut TrackedCudaSlice<f64>, ) -> Result<()>

Evaluate logZ on the device and write it into out_log_z (uploads weights from host).

Source

pub fn eval_log_wmc_device( &mut self, provider: &CudaKernelProvider, var_log_weights: &[(f64, f64)], ) -> Result<TrackedCudaSlice<f64>>

Evaluate logZ on the device and return a device-resident scalar (uploads weights from host).

Source

pub fn eval_grads_inplace( &mut self, provider: &CudaKernelProvider, ) -> Result<()>

Evaluate the circuit and populate grad_true/grad_false on the device (no host reads).

Preconditions:

var_log_true/var_log_false contain the current weights on device.
Caller may read back results for testing/debugging, but this API performs no dtoh transfers.

Source