tessera_ui/renderer/compute/pipeline.rs
1//! GPU compute pipeline system for Tessera UI framework.
2//!
3//! This module provides the infrastructure for GPU compute operations in
4//! Tessera, enabling advanced visual effects and post-processing operations
5//! that would be inefficient or impossible to achieve with traditional
6//! CPU-based approaches.
7//!
8//! # Architecture Overview
9//!
10//! The compute pipeline system is designed to work seamlessly with the
11//! rendering pipeline, using a ping-pong buffer approach for efficient
12//! multi-pass operations. Each compute pipeline processes a specific type of
13//! compute command and operates on texture data using GPU compute shaders.
14//!
15//! ## Key Components
16//!
17//! - [`ComputablePipeline<C>`]: The main trait for implementing custom compute
18//! pipelines
19//! - [`ComputePipelineRegistry`]: Manages and dispatches commands to registered
20//! compute pipelines
21//! - [`ComputeResourceManager`]: Manages GPU buffers and resources for compute
22//! operations
23//!
24//! # Design Philosophy
25//!
26//! The compute pipeline system embraces WGPU's compute shader capabilities to
27//! enable:
28//!
29//! - **Advanced Post-Processing**: Blur, contrast adjustment, color grading,
30//! and other image effects
31//! - **Parallel Processing**: Leverage GPU parallelism for computationally
32//! intensive operations
33//! - **Real-Time Effects**: Achieve complex visual effects at interactive frame
34//! rates
35//! - **Memory Efficiency**: Use GPU memory directly without CPU roundtrips
36//!
37//! # Ping-Pong Rendering
38//!
39//! The system uses a ping-pong approach where:
40//!
41//! 1. **Input Texture**: Contains the result from previous rendering or compute
42//! pass
43//! 2. **Output Texture**: Receives the processed result from the current
44//! compute operation
45//! 3. **Format Convention**: All textures use `wgpu::TextureFormat::Rgba8Unorm`
46//! for compatibility
47//!
48//! This approach enables efficient chaining of multiple compute operations
49//! without intermediate CPU involvement.
50//!
51//! # Implementation Guide
52//!
53//! ## Creating a Custom Compute Pipeline
54//!
55//! To create a custom compute pipeline:
56//!
57//! 1. Define your compute command struct implementing [`ComputeCommand`]
58//! 2. Create a pipeline struct implementing [`ComputablePipeline<YourCommand>`]
59//! 3. Write a compute shader in WGSL
60//! 4. Register the pipeline with [`ComputePipelineRegistry::register`]
61//!
62//! # Performance Considerations
63//!
64//! - **Workgroup Size**: Choose workgroup sizes that align with GPU
65//! architecture (typically 8x8 or 16x16)
66//! - **Memory Access**: Optimize memory access patterns in shaders for better
67//! cache utilization
68//! - **Resource Reuse**: Use the [`ComputeResourceManager`] to reuse buffers
69//! across frames
70//! - **Batch Operations**: Combine multiple similar operations when possible
71//!
72//! # Texture Format Requirements
73//!
74//! Due to WGPU limitations, compute shaders require specific texture formats:
75//!
76//! - **Input Textures**: Can be any readable format, typically from render
77//! passes
78//! - **Output Textures**: Must use `wgpu::TextureFormat::Rgba8Unorm` for
79//! storage binding
80//! - **sRGB Limitation**: sRGB formats cannot be used as storage textures
81//!
82//! The framework automatically handles format conversions when necessary.
83
84use std::{any::TypeId, collections::HashMap};
85
86use crate::{PxPosition, PxRect, PxSize, compute::resource::ComputeResourceManager};
87
88use super::command::ComputeCommand;
89
90/// Type-erased metadata describing a compute command within a batch.
91pub struct ErasedComputeBatchItem<'a> {
92 /// The compute command to execute.
93 pub command: &'a dyn ComputeCommand,
94 /// The measured size of the target region.
95 pub size: PxSize,
96 /// The absolute position of the target region.
97 pub position: PxPosition,
98 /// The rectangle of the content that will be written.
99 pub target_area: PxRect,
100}
101
102/// Strongly typed metadata describing a compute command within a batch.
103pub struct ComputeBatchItem<'a, C: ComputeCommand> {
104 /// The compute command to execute.
105 pub command: &'a C,
106 /// The measured size of the target region.
107 pub size: PxSize,
108 /// The absolute position of the target region.
109 pub position: PxPosition,
110 /// The rectangle of the content that will be written.
111 pub target_area: PxRect,
112}
113
114/// Provides comprehensive context for compute operations within a compute pass.
115///
116/// This struct bundles essential WGPU resources, configuration, and
117/// command-specific data required for a compute pipeline to process its
118/// commands.
119///
120/// # Type Parameters
121///
122/// * `C` - The specific [`ComputeCommand`] type being processed.
123///
124/// # Fields
125///
126/// * `device` - The WGPU device, used for creating and managing GPU resources.
127/// * `queue` - The WGPU queue, used for submitting command buffers and writing
128/// buffer data.
129/// * `config` - The current surface configuration, providing information like
130/// format and dimensions.
131/// * `compute_pass` - The active `wgpu::ComputePass` encoder, used to record
132/// compute commands.
133/// * `items` - A slice of [`ComputeBatchItem`]s, each containing a compute
134/// command and its metadata.
135/// * `resource_manager` - A mutable reference to the
136/// [`ComputeResourceManager`], used for managing reusable GPU buffers.
137/// * `input_view` - A view of the input texture for the compute operation.
138/// * `output_view` - A view of the output texture for the compute operation.
139pub struct ComputeContext<'a, 'b, 'c, C: ComputeCommand> {
140 /// WGPU device used to create and manage GPU resources.
141 pub device: &'a wgpu::Device,
142 /// Queue for submitting GPU workloads.
143 pub queue: &'a wgpu::Queue,
144 /// Surface configuration describing output formats and dimensions.
145 pub config: &'a wgpu::SurfaceConfiguration,
146 /// Target texture size for the current compute pass.
147 pub target_size: PxSize,
148 /// Active compute pass encoder.
149 pub compute_pass: &'a mut wgpu::ComputePass<'b>,
150 /// Batch of typed compute items to process.
151 pub items: &'c [ComputeBatchItem<'c, C>],
152 /// Shared resource manager used to reuse GPU buffers.
153 pub resource_manager: &'a mut ComputeResourceManager,
154 /// Input texture view sampled by the compute pass.
155 pub input_view: &'a wgpu::TextureView,
156 /// Output texture view written by the compute pass.
157 pub output_view: &'a wgpu::TextureView,
158}
159
160/// Type-erased context used when dispatching compute pipelines.
161pub(crate) struct ErasedDispatchContext<'a, 'b> {
162 pub device: &'a wgpu::Device,
163 pub queue: &'a wgpu::Queue,
164 pub config: &'a wgpu::SurfaceConfiguration,
165 pub target_size: PxSize,
166 pub compute_pass: &'a mut wgpu::ComputePass<'b>,
167 pub resource_manager: &'a mut ComputeResourceManager,
168 pub input_view: &'a wgpu::TextureView,
169 pub output_view: &'a wgpu::TextureView,
170}
171
172/// Core trait for implementing GPU compute pipelines.
173///
174/// This trait defines the interface for compute pipelines that process specific
175/// types of compute commands using GPU compute shaders. Each pipeline is
176/// responsible for setting up compute resources, managing shader dispatch, and
177/// processing texture data.
178///
179/// # Type Parameters
180///
181/// * `C` - The specific [`ComputeCommand`] type this pipeline can handle
182///
183/// # Design Principles
184///
185/// - **Single Responsibility**: Each pipeline handles one specific type of
186/// compute operation
187/// - **Stateless Operation**: Pipelines should not maintain state between
188/// dispatch calls
189/// - **Resource Efficiency**: Reuse GPU resources when possible through the
190/// resource manager
191/// - **Thread Safety**: All implementations must be `Send + Sync` for parallel
192/// execution
193///
194/// # Integration with Rendering
195///
196/// Compute pipelines operate within the broader rendering pipeline, typically:
197///
198/// 1. **After Rendering**: Process the rendered scene for post-effects
199/// 2. **Between Passes**: Transform data between different rendering stages
200/// 3. **Before Rendering**: Prepare data or textures for subsequent render
201/// operations
202pub trait ComputablePipeline<C: ComputeCommand>: Send + Sync + 'static {
203 /// Dispatches the compute command within an active compute pass.
204 ///
205 /// This method receives one or more compute commands of the same type.
206 /// Implementations may choose to process the batch collectively (e.g.,
207 /// by packing data into a single dispatch) or sequentially iterate over
208 /// the items. It should set up the necessary GPU resources, bind them
209 /// to the compute pipeline, and dispatch the appropriate number of
210 /// workgroups to process the input texture.
211 ///
212 /// # Parameters
213 ///
214 /// * `context` - The context for the compute pass.
215 ///
216 /// # Texture Format Requirements
217 ///
218 /// Due to WGPU limitations, storage textures have specific format
219 /// requirements:
220 ///
221 /// - **Input Texture**: Can be any readable format, typically from render
222 /// passes
223 /// - **Output Texture**: Must use `wgpu::TextureFormat::Rgba8Unorm` format
224 /// - **sRGB Limitation**: sRGB formats cannot be used as storage textures
225 ///
226 /// The framework ensures that `output_view` always uses a compatible format
227 /// for storage binding operations.
228 ///
229 /// # Workgroup Dispatch Guidelines
230 ///
231 /// When dispatching workgroups, consider:
232 ///
233 /// - **Workgroup Size**: Match your shader's `@workgroup_size` declaration
234 /// - **Coverage**: Ensure all pixels are processed by calculating
235 /// appropriate dispatch dimensions
236 /// - **Alignment**: Round up dispatch dimensions to cover the entire
237 /// texture
238 ///
239 /// # Resource Management
240 ///
241 /// Use the `resource_manager` to:
242 /// - Store persistent buffers that can be reused across frames
243 /// - Avoid recreating expensive GPU resources
244 /// - Manage buffer lifetimes efficiently
245 ///
246 /// # Error Handling
247 ///
248 /// This method should handle errors gracefully:
249 /// - Validate command parameters before use
250 /// - Ensure texture dimensions are compatible
251 /// - Handle resource creation failures appropriately
252 fn dispatch(&mut self, context: &mut ComputeContext<C>);
253}
254
255/// Internal trait for type erasure of computable pipelines.
256///
257/// This trait enables dynamic dispatch of compute commands to their
258/// corresponding pipelines without knowing the specific command type at compile
259/// time. It's used internally by the [`ComputePipelineRegistry`] and should not
260/// be implemented directly by users.
261///
262/// The type erasure is achieved through the [`Downcast`] trait, which allows
263/// downcasting from `&dyn ComputeCommand` to concrete command types.
264///
265/// # Implementation Note
266///
267/// This trait is automatically implemented for any type that implements
268/// [`ComputablePipeline<C>`] through the [`ComputablePipelineImpl`] wrapper.
269pub(crate) trait ErasedComputablePipeline: Send + Sync {
270 /// Dispatches a type-erased compute command.
271 fn dispatch_erased(
272 &mut self,
273 context: ErasedDispatchContext<'_, '_>,
274 items: &[ErasedComputeBatchItem<'_>],
275 );
276}
277
278/// A wrapper to implement `ErasedComputablePipeline` for any
279/// `ComputablePipeline`.
280struct ComputablePipelineImpl<C: ComputeCommand, P: ComputablePipeline<C>> {
281 pipeline: P,
282 _command: std::marker::PhantomData<C>,
283}
284
285impl<C: ComputeCommand + 'static, P: ComputablePipeline<C>> ErasedComputablePipeline
286 for ComputablePipelineImpl<C, P>
287{
288 fn dispatch_erased(
289 &mut self,
290 context: ErasedDispatchContext<'_, '_>,
291 items: &[ErasedComputeBatchItem<'_>],
292 ) {
293 if items.is_empty() {
294 return;
295 }
296
297 let mut typed_items: Vec<ComputeBatchItem<'_, C>> = Vec::with_capacity(items.len());
298 for item in items {
299 let command = item
300 .command
301 .downcast_ref::<C>()
302 .expect("Compute batch contained command of unexpected type");
303 typed_items.push(ComputeBatchItem {
304 command,
305 size: item.size,
306 position: item.position,
307 target_area: item.target_area,
308 });
309 }
310
311 self.pipeline.dispatch(&mut ComputeContext {
312 device: context.device,
313 queue: context.queue,
314 config: context.config,
315 target_size: context.target_size,
316 compute_pass: context.compute_pass,
317 items: &typed_items,
318 resource_manager: context.resource_manager,
319 input_view: context.input_view,
320 output_view: context.output_view,
321 });
322 }
323}
324
325/// Registry for managing and dispatching compute pipelines.
326///
327/// The `ComputePipelineRegistry` serves as the central hub for all compute
328/// pipelines in the Tessera framework. It maintains a collection of registered
329/// pipelines and handles the dispatch of compute commands to their appropriate
330/// pipelines.
331///
332/// # Architecture
333///
334/// The registry uses type erasure to store pipelines of different types in a
335/// single collection. When a compute command needs to be processed, the
336/// registry attempts to dispatch it to all registered pipelines until one
337/// handles it successfully.
338///
339/// # Usage Pattern
340///
341/// 1. Create a new registry
342/// 2. Register all required compute pipelines during application initialization
343/// 3. The renderer uses the registry to dispatch commands during frame
344/// rendering
345///
346/// # Performance Considerations
347///
348/// - Pipeline lookup is O(1) on average due to HashMap implementation.
349///
350/// # Thread Safety
351///
352/// The registry and all registered pipelines must be `Send + Sync` to support
353/// parallel execution in the rendering system.
354#[derive(Default)]
355pub struct ComputePipelineRegistry {
356 pipelines: HashMap<TypeId, Box<dyn ErasedComputablePipeline>>,
357}
358
359impl ComputePipelineRegistry {
360 /// Creates a new empty compute pipeline registry.
361 ///
362 /// # Example
363 ///
364 /// ```
365 /// use tessera_ui::renderer::compute::ComputePipelineRegistry;
366 ///
367 /// let registry = ComputePipelineRegistry::new();
368 /// ```
369 pub fn new() -> Self {
370 Self::default()
371 }
372
373 /// Registers a new compute pipeline for a specific command type.
374 ///
375 /// This method takes ownership of the pipeline and wraps it in a
376 /// type-erased container that can be stored alongside other pipelines
377 /// of different types.
378 ///
379 /// # Type Parameters
380 ///
381 /// * `C` - The [`ComputeCommand`] type this pipeline handles
382 ///
383 /// # Parameters
384 ///
385 /// * `pipeline` - The pipeline instance to register
386 ///
387 /// # Thread Safety
388 ///
389 /// The pipeline must implement `Send + Sync` to be compatible with
390 /// Tessera's parallel rendering architecture.
391 pub fn register<C: ComputeCommand + 'static>(
392 &mut self,
393 pipeline: impl ComputablePipeline<C> + 'static,
394 ) {
395 let erased_pipeline = Box::new(ComputablePipelineImpl {
396 pipeline,
397 _command: std::marker::PhantomData,
398 });
399 self.pipelines.insert(TypeId::of::<C>(), erased_pipeline);
400 }
401
402 /// Dispatches one or more commands to their corresponding registered
403 /// pipeline.
404 pub(crate) fn dispatch_erased(
405 &mut self,
406 context: ErasedDispatchContext<'_, '_>,
407 items: &[ErasedComputeBatchItem<'_>],
408 ) {
409 if items.is_empty() {
410 return;
411 }
412
413 let command_type_id = items[0].command.as_any().type_id();
414 if let Some(pipeline) = self.pipelines.get_mut(&command_type_id) {
415 pipeline.dispatch_erased(context, items);
416 } else {
417 panic!(
418 "No pipeline found for command {:?}",
419 std::any::type_name_of_val(items[0].command)
420 );
421 }
422 }
423}