tessera_ui/renderer/compute/
pipeline.rs

1//! GPU compute pipeline system for Tessera UI framework.
2//!
3//! This module provides the infrastructure for GPU compute operations in
4//! Tessera, enabling advanced visual effects and post-processing operations
5//! that would be inefficient or impossible to achieve with traditional
6//! CPU-based approaches.
7//!
8//! # Architecture Overview
9//!
10//! The compute pipeline system is designed to work seamlessly with the
11//! rendering pipeline, using a ping-pong buffer approach for efficient
12//! multi-pass operations. Each compute pipeline processes a specific type of
13//! compute command and operates on texture data using GPU compute shaders.
14//!
15//! ## Key Components
16//!
17//! - [`ComputablePipeline<C>`]: The main trait for implementing custom compute
18//!   pipelines
19//! - [`ComputePipelineRegistry`]: Manages and dispatches commands to registered
20//!   compute pipelines
21//! - [`ComputeResourceManager`]: Manages GPU buffers and resources for compute
22//!   operations
23//!
24//! # Design Philosophy
25//!
26//! The compute pipeline system embraces WGPU's compute shader capabilities to
27//! enable:
28//!
29//! - **Advanced Post-Processing**: Blur, contrast adjustment, color grading,
30//!   and other image effects
31//! - **Parallel Processing**: Leverage GPU parallelism for computationally
32//!   intensive operations
33//! - **Real-Time Effects**: Achieve complex visual effects at interactive frame
34//!   rates
35//! - **Memory Efficiency**: Use GPU memory directly without CPU roundtrips
36//!
37//! # Ping-Pong Rendering
38//!
39//! The system uses a ping-pong approach where:
40//!
41//! 1. **Input Texture**: Contains the result from previous rendering or compute
42//!    pass
43//! 2. **Output Texture**: Receives the processed result from the current
44//!    compute operation
45//! 3. **Format Convention**: All textures use `wgpu::TextureFormat::Rgba8Unorm`
46//!    for compatibility
47//!
48//! This approach enables efficient chaining of multiple compute operations
49//! without intermediate CPU involvement.
50//!
51//! # Implementation Guide
52//!
53//! ## Creating a Custom Compute Pipeline
54//!
55//! To create a custom compute pipeline:
56//!
57//! 1. Define your compute command struct implementing [`ComputeCommand`]
58//! 2. Create a pipeline struct implementing [`ComputablePipeline<YourCommand>`]
59//! 3. Write a compute shader in WGSL
60//! 4. Register the pipeline with [`ComputePipelineRegistry::register`]
61//!
62//! # Performance Considerations
63//!
64//! - **Workgroup Size**: Choose workgroup sizes that align with GPU
65//!   architecture (typically 8x8 or 16x16)
66//! - **Memory Access**: Optimize memory access patterns in shaders for better
67//!   cache utilization
68//! - **Resource Reuse**: Use the [`ComputeResourceManager`] to reuse buffers
69//!   across frames
70//! - **Batch Operations**: Combine multiple similar operations when possible
71//!
72//! # Texture Format Requirements
73//!
74//! Due to WGPU limitations, compute shaders require specific texture formats:
75//!
76//! - **Input Textures**: Can be any readable format, typically from render
77//!   passes
78//! - **Output Textures**: Must use `wgpu::TextureFormat::Rgba8Unorm` for
79//!   storage binding
80//! - **sRGB Limitation**: sRGB formats cannot be used as storage textures
81//!
82//! The framework automatically handles format conversions when necessary.
83
84use std::{any::TypeId, collections::HashMap};
85
86use crate::{PxPosition, PxRect, PxSize, compute::resource::ComputeResourceManager};
87
88use super::command::ComputeCommand;
89
90/// Type-erased metadata describing a compute command within a batch.
91pub struct ErasedComputeBatchItem<'a> {
92    /// The compute command to execute.
93    pub command: &'a dyn ComputeCommand,
94    /// The measured size of the target region.
95    pub size: PxSize,
96    /// The absolute position of the target region.
97    pub position: PxPosition,
98    /// The rectangle of the content that will be written.
99    pub target_area: PxRect,
100}
101
102/// Strongly typed metadata describing a compute command within a batch.
103pub struct ComputeBatchItem<'a, C: ComputeCommand> {
104    /// The compute command to execute.
105    pub command: &'a C,
106    /// The measured size of the target region.
107    pub size: PxSize,
108    /// The absolute position of the target region.
109    pub position: PxPosition,
110    /// The rectangle of the content that will be written.
111    pub target_area: PxRect,
112}
113
114/// Provides comprehensive context for compute operations within a compute pass.
115///
116/// This struct bundles essential WGPU resources, configuration, and
117/// command-specific data required for a compute pipeline to process its
118/// commands.
119///
120/// # Type Parameters
121///
122/// * `C` - The specific [`ComputeCommand`] type being processed.
123///
124/// # Fields
125///
126/// * `device` - The WGPU device, used for creating and managing GPU resources.
127/// * `queue` - The WGPU queue, used for submitting command buffers and writing
128///   buffer data.
129/// * `config` - The current surface configuration, providing information like
130///   format and dimensions.
131/// * `compute_pass` - The active `wgpu::ComputePass` encoder, used to record
132///   compute commands.
133/// * `items` - A slice of [`ComputeBatchItem`]s, each containing a compute
134///   command and its metadata.
135/// * `resource_manager` - A mutable reference to the
136///   [`ComputeResourceManager`], used for managing reusable GPU buffers.
137/// * `input_view` - A view of the input texture for the compute operation.
138/// * `output_view` - A view of the output texture for the compute operation.
139pub struct ComputeContext<'a, 'b, 'c, C: ComputeCommand> {
140    /// WGPU device used to create and manage GPU resources.
141    pub device: &'a wgpu::Device,
142    /// Queue for submitting GPU workloads.
143    pub queue: &'a wgpu::Queue,
144    /// Surface configuration describing output formats and dimensions.
145    pub config: &'a wgpu::SurfaceConfiguration,
146    /// Target texture size for the current compute pass.
147    pub target_size: PxSize,
148    /// Active compute pass encoder.
149    pub compute_pass: &'a mut wgpu::ComputePass<'b>,
150    /// Batch of typed compute items to process.
151    pub items: &'c [ComputeBatchItem<'c, C>],
152    /// Shared resource manager used to reuse GPU buffers.
153    pub resource_manager: &'a mut ComputeResourceManager,
154    /// Input texture view sampled by the compute pass.
155    pub input_view: &'a wgpu::TextureView,
156    /// Output texture view written by the compute pass.
157    pub output_view: &'a wgpu::TextureView,
158}
159
160/// Type-erased context used when dispatching compute pipelines.
161pub(crate) struct ErasedDispatchContext<'a, 'b> {
162    pub device: &'a wgpu::Device,
163    pub queue: &'a wgpu::Queue,
164    pub config: &'a wgpu::SurfaceConfiguration,
165    pub target_size: PxSize,
166    pub compute_pass: &'a mut wgpu::ComputePass<'b>,
167    pub resource_manager: &'a mut ComputeResourceManager,
168    pub input_view: &'a wgpu::TextureView,
169    pub output_view: &'a wgpu::TextureView,
170}
171
172/// Core trait for implementing GPU compute pipelines.
173///
174/// This trait defines the interface for compute pipelines that process specific
175/// types of compute commands using GPU compute shaders. Each pipeline is
176/// responsible for setting up compute resources, managing shader dispatch, and
177/// processing texture data.
178///
179/// # Type Parameters
180///
181/// * `C` - The specific [`ComputeCommand`] type this pipeline can handle
182///
183/// # Design Principles
184///
185/// - **Single Responsibility**: Each pipeline handles one specific type of
186///   compute operation
187/// - **Stateless Operation**: Pipelines should not maintain state between
188///   dispatch calls
189/// - **Resource Efficiency**: Reuse GPU resources when possible through the
190///   resource manager
191/// - **Thread Safety**: All implementations must be `Send + Sync` for parallel
192///   execution
193///
194/// # Integration with Rendering
195///
196/// Compute pipelines operate within the broader rendering pipeline, typically:
197///
198/// 1. **After Rendering**: Process the rendered scene for post-effects
199/// 2. **Between Passes**: Transform data between different rendering stages
200/// 3. **Before Rendering**: Prepare data or textures for subsequent render
201///    operations
202pub trait ComputablePipeline<C: ComputeCommand>: Send + Sync + 'static {
203    /// Dispatches the compute command within an active compute pass.
204    ///
205    /// This method receives one or more compute commands of the same type.
206    /// Implementations may choose to process the batch collectively (e.g.,
207    /// by packing data into a single dispatch) or sequentially iterate over
208    /// the items. It should set up the necessary GPU resources, bind them
209    /// to the compute pipeline, and dispatch the appropriate number of
210    /// workgroups to process the input texture.
211    ///
212    /// # Parameters
213    ///
214    /// * `context` - The context for the compute pass.
215    ///
216    /// # Texture Format Requirements
217    ///
218    /// Due to WGPU limitations, storage textures have specific format
219    /// requirements:
220    ///
221    /// - **Input Texture**: Can be any readable format, typically from render
222    ///   passes
223    /// - **Output Texture**: Must use `wgpu::TextureFormat::Rgba8Unorm` format
224    /// - **sRGB Limitation**: sRGB formats cannot be used as storage textures
225    ///
226    /// The framework ensures that `output_view` always uses a compatible format
227    /// for storage binding operations.
228    ///
229    /// # Workgroup Dispatch Guidelines
230    ///
231    /// When dispatching workgroups, consider:
232    ///
233    /// - **Workgroup Size**: Match your shader's `@workgroup_size` declaration
234    /// - **Coverage**: Ensure all pixels are processed by calculating
235    ///   appropriate dispatch dimensions
236    /// - **Alignment**: Round up dispatch dimensions to cover the entire
237    ///   texture
238    ///
239    /// # Resource Management
240    ///
241    /// Use the `resource_manager` to:
242    /// - Store persistent buffers that can be reused across frames
243    /// - Avoid recreating expensive GPU resources
244    /// - Manage buffer lifetimes efficiently
245    ///
246    /// # Error Handling
247    ///
248    /// This method should handle errors gracefully:
249    /// - Validate command parameters before use
250    /// - Ensure texture dimensions are compatible
251    /// - Handle resource creation failures appropriately
252    fn dispatch(&mut self, context: &mut ComputeContext<C>);
253}
254
255/// Internal trait for type erasure of computable pipelines.
256///
257/// This trait enables dynamic dispatch of compute commands to their
258/// corresponding pipelines without knowing the specific command type at compile
259/// time. It's used internally by the [`ComputePipelineRegistry`] and should not
260/// be implemented directly by users.
261///
262/// The type erasure is achieved through the [`Downcast`] trait, which allows
263/// downcasting from `&dyn ComputeCommand` to concrete command types.
264///
265/// # Implementation Note
266///
267/// This trait is automatically implemented for any type that implements
268/// [`ComputablePipeline<C>`] through the [`ComputablePipelineImpl`] wrapper.
269pub(crate) trait ErasedComputablePipeline: Send + Sync {
270    /// Dispatches a type-erased compute command.
271    fn dispatch_erased(
272        &mut self,
273        context: ErasedDispatchContext<'_, '_>,
274        items: &[ErasedComputeBatchItem<'_>],
275    );
276}
277
278/// A wrapper to implement `ErasedComputablePipeline` for any
279/// `ComputablePipeline`.
280struct ComputablePipelineImpl<C: ComputeCommand, P: ComputablePipeline<C>> {
281    pipeline: P,
282    _command: std::marker::PhantomData<C>,
283}
284
285impl<C: ComputeCommand + 'static, P: ComputablePipeline<C>> ErasedComputablePipeline
286    for ComputablePipelineImpl<C, P>
287{
288    fn dispatch_erased(
289        &mut self,
290        context: ErasedDispatchContext<'_, '_>,
291        items: &[ErasedComputeBatchItem<'_>],
292    ) {
293        if items.is_empty() {
294            return;
295        }
296
297        let mut typed_items: Vec<ComputeBatchItem<'_, C>> = Vec::with_capacity(items.len());
298        for item in items {
299            let command = item
300                .command
301                .downcast_ref::<C>()
302                .expect("Compute batch contained command of unexpected type");
303            typed_items.push(ComputeBatchItem {
304                command,
305                size: item.size,
306                position: item.position,
307                target_area: item.target_area,
308            });
309        }
310
311        self.pipeline.dispatch(&mut ComputeContext {
312            device: context.device,
313            queue: context.queue,
314            config: context.config,
315            target_size: context.target_size,
316            compute_pass: context.compute_pass,
317            items: &typed_items,
318            resource_manager: context.resource_manager,
319            input_view: context.input_view,
320            output_view: context.output_view,
321        });
322    }
323}
324
325/// Registry for managing and dispatching compute pipelines.
326///
327/// The `ComputePipelineRegistry` serves as the central hub for all compute
328/// pipelines in the Tessera framework. It maintains a collection of registered
329/// pipelines and handles the dispatch of compute commands to their appropriate
330/// pipelines.
331///
332/// # Architecture
333///
334/// The registry uses type erasure to store pipelines of different types in a
335/// single collection. When a compute command needs to be processed, the
336/// registry attempts to dispatch it to all registered pipelines until one
337/// handles it successfully.
338///
339/// # Usage Pattern
340///
341/// 1. Create a new registry
342/// 2. Register all required compute pipelines during application initialization
343/// 3. The renderer uses the registry to dispatch commands during frame
344///    rendering
345///
346/// # Performance Considerations
347///
348/// - Pipeline lookup is O(1) on average due to HashMap implementation.
349///
350/// # Thread Safety
351///
352/// The registry and all registered pipelines must be `Send + Sync` to support
353/// parallel execution in the rendering system.
354#[derive(Default)]
355pub struct ComputePipelineRegistry {
356    pipelines: HashMap<TypeId, Box<dyn ErasedComputablePipeline>>,
357}
358
359impl ComputePipelineRegistry {
360    /// Creates a new empty compute pipeline registry.
361    ///
362    /// # Example
363    ///
364    /// ```
365    /// use tessera_ui::renderer::compute::ComputePipelineRegistry;
366    ///
367    /// let registry = ComputePipelineRegistry::new();
368    /// ```
369    pub fn new() -> Self {
370        Self::default()
371    }
372
373    /// Registers a new compute pipeline for a specific command type.
374    ///
375    /// This method takes ownership of the pipeline and wraps it in a
376    /// type-erased container that can be stored alongside other pipelines
377    /// of different types.
378    ///
379    /// # Type Parameters
380    ///
381    /// * `C` - The [`ComputeCommand`] type this pipeline handles
382    ///
383    /// # Parameters
384    ///
385    /// * `pipeline` - The pipeline instance to register
386    ///
387    /// # Thread Safety
388    ///
389    /// The pipeline must implement `Send + Sync` to be compatible with
390    /// Tessera's parallel rendering architecture.
391    pub fn register<C: ComputeCommand + 'static>(
392        &mut self,
393        pipeline: impl ComputablePipeline<C> + 'static,
394    ) {
395        let erased_pipeline = Box::new(ComputablePipelineImpl {
396            pipeline,
397            _command: std::marker::PhantomData,
398        });
399        self.pipelines.insert(TypeId::of::<C>(), erased_pipeline);
400    }
401
402    /// Dispatches one or more commands to their corresponding registered
403    /// pipeline.
404    pub(crate) fn dispatch_erased(
405        &mut self,
406        context: ErasedDispatchContext<'_, '_>,
407        items: &[ErasedComputeBatchItem<'_>],
408    ) {
409        if items.is_empty() {
410            return;
411        }
412
413        let command_type_id = items[0].command.as_any().type_id();
414        if let Some(pipeline) = self.pipelines.get_mut(&command_type_id) {
415            pipeline.dispatch_erased(context, items);
416        } else {
417            panic!(
418                "No pipeline found for command {:?}",
419                std::any::type_name_of_val(items[0].command)
420            );
421        }
422    }
423}