ranim_render/
utils.rs

1use std::{fmt::Debug, marker::PhantomData, ops::Deref};
2
3use tracing::{info, warn};
4use wgpu::util::DeviceExt;
5
6pub mod collections {
7    use slotmap::{Key, SecondaryMap, SlotMap};
8    use std::{
9        any::{Any, TypeId},
10        collections::HashMap,
11    };
12
13    pub struct Graph<K: Key, N> {
14        nodes: SlotMap<K, N>,
15        nexts: SecondaryMap<K, Vec<K>>,
16        prevs: SecondaryMap<K, Vec<K>>,
17    }
18
19    impl<K: Key, N> Default for Graph<K, N> {
20        fn default() -> Self {
21            Self {
22                nodes: SlotMap::default(),
23                nexts: SecondaryMap::default(),
24                prevs: SecondaryMap::default(),
25            }
26        }
27    }
28
29    impl<K: Key, N> Graph<K, N> {
30        pub fn new() -> Self {
31            Self::default()
32        }
33        pub fn insert_node(&mut self, node: N) -> K {
34            let key = self.nodes.insert(node);
35            self.nexts.insert(key, Vec::new());
36            self.prevs.insert(key, Vec::new());
37            key
38        }
39        pub fn insert_edge(&mut self, from: K, to: K) {
40            self.nexts.get_mut(from).unwrap().push(to);
41            self.prevs.get_mut(to).unwrap().push(from);
42        }
43        pub fn iter(&self) -> GraphTopoIter<'_, K, N> {
44            GraphTopoIter::new(self)
45        }
46    }
47
48    pub struct GraphTopoIter<'a, K: Key, N> {
49        graph: &'a Graph<K, N>,
50        in_degrees: SecondaryMap<K, usize>,
51        ready_stack: Vec<K>,
52    }
53
54    impl<'a, K: Key, N> GraphTopoIter<'a, K, N> {
55        fn new(graph: &'a Graph<K, N>) -> Self {
56            let mut in_degrees = SecondaryMap::new();
57            let mut ready_stack = Vec::new();
58
59            for (key, _) in graph.nodes.iter() {
60                let degree = graph.prevs[key].len();
61                in_degrees.insert(key, degree);
62
63                if degree == 0 {
64                    ready_stack.push(key);
65                }
66            }
67
68            Self {
69                graph,
70                in_degrees,
71                ready_stack,
72            }
73        }
74    }
75
76    impl<'a, K: Key, N> Iterator for GraphTopoIter<'a, K, N> {
77        type Item = &'a N;
78
79        fn next(&mut self) -> Option<Self::Item> {
80            let current_key = self.ready_stack.pop()?;
81
82            let next_nodes = self.graph.nexts.get(current_key).unwrap();
83            for &next_key in next_nodes {
84                let degree = self.in_degrees.get_mut(next_key).unwrap();
85                *degree -= 1;
86                if *degree == 0 {
87                    self.ready_stack.push(next_key);
88                }
89            }
90
91            self.graph.nodes.get(current_key)
92        }
93    }
94
95    /// A trait to support calling `clear` on the type erased trait object.
96    pub trait AnyClear: Any + Send + Sync {
97        fn clear(&mut self);
98    }
99
100    impl<T: Any + Send + Sync> AnyClear for Vec<T> {
101        fn clear(&mut self) {
102            self.clear();
103        }
104    }
105
106    /// A type-erased container for render packets.
107    ///
108    /// Basically a HashMap of `TypeId` -> type-erased `Vec<T>`
109    #[derive(Default)]
110    pub struct TypeBinnedVec {
111        inner: HashMap<TypeId, Box<dyn AnyClear>>,
112    }
113
114    impl TypeBinnedVec {
115        fn init_row<T: Send + Sync + 'static>(&mut self) -> &mut Vec<T> {
116            #[allow(clippy::unwrap_or_default)]
117            let entry = self
118                .inner
119                .entry(TypeId::of::<T>())
120                .or_insert(Box::<Vec<T>>::default());
121            (entry.as_mut() as &mut dyn Any)
122                .downcast_mut::<Vec<T>>()
123                .unwrap()
124        }
125        pub fn get_row<T: Send + Sync + 'static>(&self) -> &[T] {
126            self.inner
127                .get(&TypeId::of::<T>())
128                .and_then(|v| (v.as_ref() as &dyn Any).downcast_ref::<Vec<T>>())
129                .map(|v| v.as_ref())
130                .unwrap_or(&[])
131        }
132        pub fn extend<T: Send + Sync + 'static>(&mut self, packets: impl IntoIterator<Item = T>) {
133            self.init_row::<T>().extend(packets);
134        }
135        pub fn push<T: Send + Sync + 'static>(&mut self, packet: T) {
136            self.init_row::<T>().push(packet);
137        }
138        pub fn clear(&mut self) {
139            self.inner.iter_mut().for_each(|(_, v)| {
140                v.clear();
141            });
142        }
143    }
144}
145
146/// Wgpu context
147pub struct WgpuContext {
148    /// The wgpu instance
149    pub instance: wgpu::Instance,
150    /// The wgpu adapter
151    pub adapter: wgpu::Adapter,
152    /// The wgpu device
153    pub device: wgpu::Device,
154    /// The wgpu queue
155    pub queue: wgpu::Queue,
156}
157
158impl WgpuContext {
159    /// Create a new wgpu context
160    pub async fn new() -> Self {
161        let instance = wgpu::Instance::default();
162        let adapter = instance
163            .request_adapter(&wgpu::RequestAdapterOptions {
164                power_preference: wgpu::PowerPreference::HighPerformance,
165                ..Default::default()
166            })
167            .await
168            .unwrap();
169        info!("wgpu adapter info: {:?}", adapter.get_info());
170        let required_limits = adapter.limits();
171
172        #[cfg(feature = "profiling")]
173        let (device, queue) = adapter
174            .request_device(&wgpu::DeviceDescriptor {
175                required_features: wgpu_profiler::GpuProfiler::ALL_WGPU_TIMER_FEATURES,
176                required_limits,
177                ..Default::default()
178            })
179            .await
180            .unwrap();
181        #[cfg(not(feature = "profiling"))]
182        let (device, queue) = adapter
183            .request_device(&wgpu::DeviceDescriptor {
184                required_limits,
185                ..Default::default()
186            })
187            .await
188            .unwrap();
189
190        Self {
191            instance,
192            adapter,
193            device,
194            queue,
195        }
196    }
197}
198
199#[allow(unused)]
200pub(crate) struct WgpuBuffer<T: bytemuck::Pod + bytemuck::Zeroable + Debug> {
201    label: Option<&'static str>,
202    buffer: wgpu::Buffer,
203    usage: wgpu::BufferUsages,
204    inner: T,
205}
206
207impl<T: bytemuck::Pod + bytemuck::Zeroable + Debug> AsRef<wgpu::Buffer> for WgpuBuffer<T> {
208    fn as_ref(&self) -> &wgpu::Buffer {
209        &self.buffer
210    }
211}
212
213#[allow(unused)]
214impl<T: bytemuck::Pod + bytemuck::Zeroable + Debug> WgpuBuffer<T> {
215    pub(crate) fn new_init(
216        ctx: &WgpuContext,
217        label: Option<&'static str>,
218        usage: wgpu::BufferUsages,
219        data: T,
220    ) -> Self {
221        assert!(
222            usage.contains(wgpu::BufferUsages::COPY_DST),
223            "Buffer {label:?} does not contains COPY_DST"
224        );
225        // trace!("[WgpuBuffer]: new_init, {} {:?}", data.len(), usage);
226        Self {
227            label,
228            buffer: ctx
229                .device
230                .create_buffer_init(&wgpu::util::BufferInitDescriptor {
231                    label,
232                    contents: bytemuck::bytes_of(&data),
233                    usage,
234                }),
235            usage,
236            inner: data,
237        }
238    }
239
240    pub(crate) fn get(&self) -> &T {
241        &self.inner
242    }
243
244    pub(crate) fn set(&mut self, ctx: &WgpuContext, data: T) {
245        {
246            let mut view = ctx
247                .queue
248                .write_buffer_with(
249                    &self.buffer,
250                    0,
251                    wgpu::BufferSize::new(std::mem::size_of_val(&data) as u64).unwrap(),
252                )
253                .unwrap();
254            view.copy_from_slice(bytemuck::bytes_of(&data));
255        }
256        // ctx.queue.submit([]);
257        self.inner = data;
258    }
259
260    #[allow(unused)]
261    pub(crate) fn read_buffer(&self, ctx: &WgpuContext) -> Vec<u8> {
262        let size = std::mem::size_of::<T>();
263        let staging_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
264            label: Some("Debug Staging Buffer"),
265            size: size as u64,
266            usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
267            mapped_at_creation: false,
268        });
269
270        let mut encoder = ctx
271            .device
272            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
273                label: Some("Debug Read Encoder"),
274            });
275
276        encoder.copy_buffer_to_buffer(&self.buffer, 0, &staging_buffer, 0, size as u64);
277        ctx.queue.submit(Some(encoder.finish()));
278
279        let buffer_slice = staging_buffer.slice(..);
280        let (tx, rx) = async_channel::bounded(1);
281        buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
282            pollster::block_on(tx.send(result)).unwrap()
283        });
284        ctx.device
285            .poll(wgpu::PollType::wait_indefinitely())
286            .unwrap();
287        pollster::block_on(rx.recv()).unwrap().unwrap();
288
289        buffer_slice.get_mapped_range().to_vec()
290    }
291}
292
293pub(crate) struct WgpuVecBuffer<T: Default + bytemuck::Pod + bytemuck::Zeroable + Debug> {
294    label: Option<&'static str>,
295    pub(crate) buffer: wgpu::Buffer,
296    usage: wgpu::BufferUsages,
297    /// Keep match to the buffer size
298    len: usize,
299    _phantom: PhantomData<T>,
300    // inner: Vec<T>,
301}
302
303impl<T: Default + bytemuck::Pod + bytemuck::Zeroable + Debug> WgpuVecBuffer<T> {
304    pub(crate) fn new(
305        ctx: &WgpuContext,
306        label: Option<&'static str>,
307        usage: wgpu::BufferUsages,
308        len: usize,
309    ) -> Self {
310        assert!(
311            usage.contains(wgpu::BufferUsages::COPY_DST),
312            "Buffer {label:?} does not contains COPY_DST"
313        );
314        let size = (std::mem::size_of::<T>() * len) as u64;
315        Self {
316            label,
317            buffer: ctx.device.create_buffer(&wgpu::BufferDescriptor {
318                label,
319                size,
320                usage,
321                mapped_at_creation: false,
322            }),
323            usage,
324            len: 0,
325            _phantom: PhantomData,
326            // inner: vec![],
327        }
328    }
329
330    #[allow(unused)]
331    pub(crate) fn new_init(
332        ctx: &WgpuContext,
333        label: Option<&'static str>,
334        usage: wgpu::BufferUsages,
335        data: &[T],
336    ) -> Self {
337        let mut buffer = Self::new(ctx, label, usage, data.len());
338        buffer.set(ctx, data);
339        buffer
340    }
341
342    #[allow(unused)]
343    pub(crate) fn len(&self) -> usize {
344        self.len
345    }
346    // pub(crate) fn get(&self) -> &[T] {
347    //     self.inner.as_ref()
348    // }
349
350    #[allow(unused)]
351    pub(crate) fn resize(&mut self, ctx: &WgpuContext, len: usize) -> bool {
352        let size = (std::mem::size_of::<T>() * len) as u64;
353        let realloc = self.buffer.size() != size;
354        if realloc {
355            self.len = len;
356            // self.inner.resize(len, T::default());
357            self.buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
358                label: self.label,
359                size,
360                usage: self.usage,
361                mapped_at_creation: false,
362            })
363        }
364        realloc
365    }
366
367    pub(crate) fn set(&mut self, ctx: &WgpuContext, data: &[T]) -> bool {
368        // trace!("{} {}", self.inner.len(), data.len());
369        // self.inner.resize(data.len(), T::default());
370        // self.inner.copy_from_slice(data);
371        self.len = data.len();
372        let realloc = self.buffer.size() != std::mem::size_of_val(data) as u64;
373
374        if realloc {
375            // info!("realloc");
376            // NOTE: create_buffer_init sometimes causes freezing in wasm
377            let buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
378                label: self.label,
379                size: (std::mem::size_of_val(data)) as u64,
380                usage: self.usage,
381                mapped_at_creation: false,
382            });
383            ctx.queue
384                .write_buffer(&buffer, 0, bytemuck::cast_slice(data));
385            // info!("new");
386            self.buffer = buffer;
387        } else {
388            // info!("queue copy");
389            {
390                let mut view = ctx
391                    .queue
392                    .write_buffer_with(
393                        &self.buffer,
394                        0,
395                        wgpu::BufferSize::new((std::mem::size_of_val(data)) as u64).unwrap(),
396                    )
397                    .unwrap();
398                view.copy_from_slice(bytemuck::cast_slice(data));
399            }
400            // ctx.queue.submit([]);
401        }
402        // info!("done");
403        realloc
404    }
405
406    #[allow(unused)]
407    pub(crate) fn read_buffer(&self, ctx: &WgpuContext) -> Option<Vec<u8>> {
408        let size = std::mem::size_of::<T>() * self.len;
409        let staging_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
410            label: Some("Debug Staging Buffer"),
411            size: size as u64,
412            usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
413            mapped_at_creation: false,
414        });
415
416        let mut encoder = ctx
417            .device
418            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
419                label: Some("Debug Read Encoder"),
420            });
421
422        encoder.copy_buffer_to_buffer(&self.buffer, 0, &staging_buffer, 0, size as u64);
423        ctx.queue.submit(Some(encoder.finish()));
424
425        let buffer_slice = staging_buffer.slice(..);
426        let (tx, rx) = async_channel::bounded(1);
427        buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
428            tx.try_send(result).unwrap()
429        });
430        ctx.device
431            .poll(wgpu::PollType::wait_indefinitely())
432            .unwrap();
433        pollster::block_on(rx.recv()).unwrap().unwrap();
434
435        let x = buffer_slice.get_mapped_range().to_vec();
436        Some(x)
437    }
438}
439
440pub struct WgpuTexture {
441    inner: wgpu::Texture,
442}
443
444impl WgpuTexture {
445    pub fn new(ctx: &WgpuContext, desc: &wgpu::TextureDescriptor) -> Self {
446        Self {
447            inner: ctx.device.create_texture(desc),
448        }
449    }
450}
451
452impl Deref for WgpuTexture {
453    type Target = wgpu::Texture;
454    fn deref(&self) -> &Self::Target {
455        &self.inner
456    }
457}
458
459/// A [`WgpuTexture`] with [`wgpu::TextureUsages::COPY_SRC`] usage and wrapped with a staging buffer and
460/// a cpu side bytes `Vec<T>` buffer to read back from the texture.
461pub struct ReadbackWgpuTexture {
462    inner: WgpuTexture,
463    aligned_bytes_per_row: usize,
464    staging_buffer: wgpu::Buffer,
465    bytes: Vec<u8>,
466    /// Pending async readback receiver. Present when `start_readback` has been called
467    /// but `finish_readback` has not yet completed.
468    pending_rx: Option<async_channel::Receiver<Result<(), wgpu::BufferAsyncError>>>,
469}
470
471impl Deref for ReadbackWgpuTexture {
472    type Target = WgpuTexture;
473    fn deref(&self) -> &Self::Target {
474        &self.inner
475    }
476}
477
478const ALIGNMENT: usize = 256;
479impl ReadbackWgpuTexture {
480    pub fn new(ctx: &WgpuContext, desc: &wgpu::TextureDescriptor) -> Self {
481        if !desc.usage.contains(wgpu::TextureUsages::COPY_SRC) {
482            warn!(
483                "ReadbackWgpuTexture should have COPY_SRC usage, but got {:?}, will auto add this usage",
484                desc.usage
485            );
486        }
487        let texture = WgpuTexture::new(
488            ctx,
489            &wgpu::TextureDescriptor {
490                usage: desc.usage | wgpu::TextureUsages::COPY_SRC,
491                ..*desc
492            },
493        );
494        let block_size = desc.format.block_copy_size(None).unwrap();
495        let bytes_per_row =
496            (texture.size().width * block_size).div_ceil(ALIGNMENT as u32) as usize * ALIGNMENT;
497
498        let staging_buffer_label = desc.label.map(|s| format!("{s} Staging Buffer"));
499        let staging_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
500            label: staging_buffer_label.as_deref(),
501            size: (bytes_per_row * texture.size().height as usize) as u64,
502            usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
503            mapped_at_creation: false,
504        });
505        let len = texture.size().width * texture.size().height * block_size;
506        let bytes = vec![0u8; len as usize];
507
508        Self {
509            inner: texture,
510            aligned_bytes_per_row: bytes_per_row,
511            staging_buffer,
512            bytes,
513            pending_rx: None,
514        }
515    }
516    pub fn texture_data(&self) -> &[u8] {
517        &self.bytes
518    }
519
520    /// Start an async readback: copy texture to staging buffer, submit, and begin mapping.
521    ///
522    /// This is non-blocking. Call [`finish_readback`](Self::finish_readback) later to
523    /// poll the device and copy the data into the CPU-side buffer.
524    pub fn start_readback(&mut self, ctx: &WgpuContext) {
525        let size = self.size();
526
527        let mut encoder = ctx
528            .device
529            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
530                label: Some("Readback Copy Encoder"),
531            });
532        encoder.copy_texture_to_buffer(
533            wgpu::TexelCopyTextureInfo {
534                aspect: wgpu::TextureAspect::All,
535                texture: self,
536                mip_level: 0,
537                origin: wgpu::Origin3d::ZERO,
538            },
539            wgpu::TexelCopyBufferInfo {
540                buffer: &self.staging_buffer,
541                layout: wgpu::TexelCopyBufferLayout {
542                    offset: 0,
543                    bytes_per_row: Some(self.aligned_bytes_per_row as u32),
544                    rows_per_image: Some(size.height),
545                },
546            },
547            size,
548        );
549        ctx.queue.submit(Some(encoder.finish()));
550
551        let buffer_slice = self.staging_buffer.slice(..);
552        let (tx, rx) = async_channel::bounded(1);
553        buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
554            let _ = tx.try_send(result);
555        });
556        self.pending_rx = Some(rx);
557    }
558
559    /// Finish a pending async readback: poll the device, copy data from the staging
560    /// buffer into the CPU-side buffer, and unmap.
561    ///
562    /// If no readback is pending, this is a no-op.
563    pub fn finish_readback(&mut self, ctx: &WgpuContext) {
564        let Some(rx) = self.pending_rx.take() else {
565            return;
566        };
567
568        ctx.device
569            .poll(wgpu::PollType::wait_indefinitely())
570            .unwrap();
571        pollster::block_on(rx.recv()).unwrap().unwrap();
572
573        self.copy_staging_to_bytes();
574    }
575
576    /// Try to finish a pending readback without blocking.
577    /// Returns `true` if completed (or nothing was pending), `false` if GPU isn't done yet.
578    pub fn try_finish_readback(&mut self, ctx: &WgpuContext) -> bool {
579        let Some(rx) = self.pending_rx.as_ref() else {
580            return true;
581        };
582
583        // Non-blocking poll to nudge the GPU
584        let _ = ctx.device.poll(wgpu::PollType::Poll);
585
586        // Check if the mapping callback has fired
587        match rx.try_recv() {
588            Ok(result) => {
589                result.unwrap();
590                self.pending_rx = None;
591                self.copy_staging_to_bytes();
592                true
593            }
594            Err(async_channel::TryRecvError::Empty) => false,
595            Err(async_channel::TryRecvError::Closed) => {
596                self.pending_rx = None;
597                true
598            }
599        }
600    }
601
602    fn copy_staging_to_bytes(&mut self) {
603        let size = self.size();
604        let buffer_slice = self.staging_buffer.slice(..);
605        let view = buffer_slice.get_mapped_range();
606        let block_size = self.inner.format().block_copy_size(None).unwrap();
607        let bytes_in_row = (size.width * block_size) as usize;
608
609        for y in 0..size.height as usize {
610            let src_row_start = y * self.aligned_bytes_per_row;
611            let dst_row_start = y * bytes_in_row;
612
613            self.bytes[dst_row_start..dst_row_start + bytes_in_row]
614                .copy_from_slice(&view[src_row_start..src_row_start + bytes_in_row]);
615        }
616        drop(view);
617        self.staging_buffer.unmap();
618    }
619
620    /// Synchronous readback: start + finish in one call.
621    pub fn update_texture_data(&mut self, ctx: &WgpuContext) -> &[u8] {
622        self.start_readback(ctx);
623        self.finish_readback(ctx);
624        &self.bytes
625    }
626}
627
628#[cfg(test)]
629mod test {
630    #[test]
631    fn test() {
632        // let x = vec![0, 1, 2, 3];
633        // assert_eq!(
634        //     bytemuck::bytes_of(&[x.as_slice()]),
635        //     bytemuck::bytes_of(&x)
636        // )
637    }
638}
ranim_render/utils.rs

ranim_render/
utils.rs