1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
use ahash::HashMap;
use parking_lot::Mutex;

use crate::config::WgpuBackendType;

use super::{handle_async_error, wgpu_core_error::WgpuCoreWrappedContextError};

#[derive(Debug, Hash, PartialEq, Eq)]
pub enum ContextError {
    WgpuCoreError(WgpuCoreWrappedContextError),
    #[cfg(web)]
    WebGpuError(String),
}

pub struct ErrorEntry {
    /// Frame index for frame on which this error was last logged.
    last_occurred_frame_index: u64,

    /// Description of the error.
    // TODO(#4507): Expecting to need this once we use this in views. Also very useful for debugging.
    #[allow(dead_code)]
    description: String,
}

/// Keeps track of wgpu errors and de-duplicates messages across frames.
///
/// On native & webgl, what accounts for as an error duplicate is a heuristic based on wgpu-core error type.
///
/// Used to avoid spamming the user with repeating errors.
/// [`crate::RenderContext`] maintains a "top level" error tracker for all otherwise unhandled errors.
///
/// TODO(#4507): Users should be able to create their own scopes feeding into separate trackers.
#[derive(Default)]
pub struct ErrorTracker {
    pub errors: Mutex<HashMap<ContextError, ErrorEntry>>,
}

impl ErrorTracker {
    /// Called by the renderer context when the last error scope of a frame has finished.
    ///
    /// Error scopes live on the device timeline, which may be arbitrarily delayed compared to the content timeline.
    /// See <https://www.w3.org/TR/webgpu/#programming-model-timelines>.
    /// Do *not* call this with the content pipeline's frame index!
    pub fn on_device_timeline_frame_finished(&self, device_timeline_frame_index: u64) {
        let mut errors = self.errors.lock();
        errors.retain(|_error, entry| {
            // If the error was not logged on the just concluded frame, remove it.
            device_timeline_frame_index == entry.last_occurred_frame_index
        });
    }

    /// Handles an async error, calling [`ErrorTracker::handle_error`] as needed.
    ///
    /// `on_last_scope_resolved` is called when the last scope has resolved.
    ///
    /// `frame_index` should be the currently active frame index which is associated with the scope.
    /// (by the time the scope finishes, the active frame index may have changed)
    pub fn handle_error_future(
        self: &std::sync::Arc<Self>,
        backend_type: WgpuBackendType,
        error_scope_result: impl IntoIterator<
            Item = impl std::future::Future<Output = Option<wgpu::Error>> + Send + 'static,
        >,
        frame_index: u64,
        on_last_scope_resolved: impl Fn(&Self, u64) + Send + Sync + 'static,
    ) {
        let mut error_scope_result = error_scope_result.into_iter().peekable();
        while let Some(error_future) = error_scope_result.next() {
            if error_scope_result.peek().is_none() {
                let err_tracker = self.clone();
                handle_async_error(
                    backend_type,
                    move |error| {
                        if let Some(error) = error {
                            err_tracker.handle_error(error, frame_index);
                        }
                        on_last_scope_resolved(&err_tracker, frame_index);
                    },
                    error_future,
                );
                break;
            }

            let err_tracker = self.clone();
            handle_async_error(
                backend_type,
                move |error| {
                    if let Some(error) = error {
                        err_tracker.handle_error(error, frame_index);
                    }
                },
                error_future,
            );
        }
    }

    /// Logs a wgpu error to the tracker.
    ///
    /// If the error happened already already, it will be deduplicated.
    ///
    /// `frame_index` should be the frame index associated with the error scope.
    /// Since errors are reported on the `device timeline`, not the `content timeline`,
    /// this may not be the currently active frame index!
    pub fn handle_error(&self, error: wgpu::Error, frame_index: u64) {
        let is_internal_error = matches!(error, wgpu::Error::Internal { .. });

        match error {
            wgpu::Error::OutOfMemory { source: _ } => {
                re_log::error!("A wgpu operation caused out-of-memory: {error}");
            }
            wgpu::Error::Internal {
                source: _source,
                description,
            }
            | wgpu::Error::Validation {
                source: _source,
                description,
            } => {
                let entry = ErrorEntry {
                    last_occurred_frame_index: frame_index,
                    description: description.clone(),
                };

                let should_log = match _source.downcast::<wgpu_core::error::ContextError>() {
                    Ok(ctx_err) => {
                        if ctx_err
                            .source
                            .downcast_ref::<wgpu_core::command::CommandEncoderError>()
                            .is_some()
                        {
                            // Actual command encoder errors never carry any meaningful
                            // information: ignore them.
                            return;
                        }

                        let ctx_err =
                            ContextError::WgpuCoreError(WgpuCoreWrappedContextError(ctx_err));
                        self.errors.lock().insert(ctx_err, entry).is_none()
                    }

                    #[cfg(not(web))]
                    Err(_) => true,

                    // We might be running with WebGPU on the web and therefore don't have a wgpu_core type.
                    #[cfg(web)]
                    Err(_) => {
                        let ctx_err = ContextError::WebGpuError(description.clone());
                        self.errors.lock().insert(ctx_err, entry).is_none()
                    }
                };

                if should_log {
                    let base_description = if is_internal_error {
                        "Internal wgpu error"
                    } else {
                        "Wgpu validation error"
                    };
                    re_log::error!("{base_description} {frame_index}: {description}");
                }
            }
        }
    }
}