1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361
//! Image-related utilities.
use smallvec::{smallvec, SmallVec};
use crate::{
datatypes::ChannelDatatype,
datatypes::{Blob, TensorBuffer, TensorData, TensorDimension},
};
// ----------------------------------------------------------------------------
/// The kind of image data, either color, segmentation, or depth image.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum ImageKind {
/// A normal grayscale or color image ([`crate::archetypes::Image`]).
Color,
/// A depth map ([`crate::archetypes::DepthImage`]).
Depth,
/// A segmentation image ([`crate::archetypes::SegmentationImage`]).
///
/// The data is a [`crate::components::ClassId`] which should be
/// looked up using the appropriate [`crate::components::AnnotationContext`]
Segmentation,
}
// ----------------------------------------------------------------------------
/// Errors when converting images from the [`image`] crate to an [`crate::archetypes::Image`].
#[cfg(feature = "image")]
#[derive(thiserror::Error, Clone, Debug)]
pub enum ImageConversionError {
/// Unknown color type from the image crate.
///
/// This should only happen if you are using a newer `image` crate than the one Rerun was built for,
/// because `image` can add new color types without it being a breaking change,
/// so we cannot exhaustively match on all color types.
#[error("Unsupported color type: {0:?}. We support 8-bit, 16-bit, and f32 images, and RGB, RGBA, Luminance, and Luminance-Alpha.")]
UnsupportedImageColorType(image::ColorType),
}
/// Errors when loading image files.
#[cfg(feature = "image")]
#[derive(thiserror::Error, Clone, Debug)]
pub enum ImageLoadError {
/// e.g. failed to decode a JPEG file.
#[error(transparent)]
Image(std::sync::Arc<image::ImageError>),
/// e.g. failed to find a file on disk.
#[error("Failed to load file: {0}")]
ReadError(std::sync::Arc<std::io::Error>),
/// Failure to convert the loaded image to a [`crate::archetypes::Image`].
#[error(transparent)]
ImageConversionError(#[from] ImageConversionError),
/// The encountered MIME type is not supported for decoding images.
#[error("MIME type '{0}' is not supported for images")]
UnsupportedMimeType(String),
/// Failed to read the MIME type from inspecting the image data blob.
#[error("Could not detect MIME type from the image contents")]
UnrecognizedMimeType,
}
#[cfg(feature = "image")]
impl From<image::ImageError> for ImageLoadError {
#[inline]
fn from(err: image::ImageError) -> Self {
Self::Image(std::sync::Arc::new(err))
}
}
#[cfg(feature = "image")]
impl From<std::io::Error> for ImageLoadError {
#[inline]
fn from(err: std::io::Error) -> Self {
Self::ReadError(std::sync::Arc::new(err))
}
}
// ----------------------------------------------------------------------------
/// Error returned when trying to interpret a tensor as an image.
#[derive(thiserror::Error, Clone, Debug)]
pub enum ImageConstructionError<T: TryInto<TensorData>>
where
T::Error: std::error::Error,
{
/// Could not convert source to [`TensorData`].
#[error("Could not convert source to TensorData: {0}")]
TensorDataConversion(T::Error),
/// The tensor did not have the right shape for an image (e.g. had too many dimensions).
#[error("Could not create Image from TensorData with shape {0:?}")]
BadImageShape(Vec<TensorDimension>),
/// Happens if you try to cast `NV12` or `YUY2` to a depth image or segmentation image.
#[error("Chroma downsampling is not supported for this image type (e.g. DepthImage or SegmentationImage)")]
ChromaDownsamplingNotSupported,
}
/// Converts it to what is useful for the image API.
pub fn blob_and_datatype_from_tensor(tensor_buffer: TensorBuffer) -> (Blob, ChannelDatatype) {
match tensor_buffer {
TensorBuffer::U8(buffer) => (Blob(buffer), ChannelDatatype::U8),
TensorBuffer::U16(buffer) => (Blob(buffer.cast_to_u8()), ChannelDatatype::U16),
TensorBuffer::U32(buffer) => (Blob(buffer.cast_to_u8()), ChannelDatatype::U32),
TensorBuffer::U64(buffer) => (Blob(buffer.cast_to_u8()), ChannelDatatype::U64),
TensorBuffer::I8(buffer) => (Blob(buffer.cast_to_u8()), ChannelDatatype::I8),
TensorBuffer::I16(buffer) => (Blob(buffer.cast_to_u8()), ChannelDatatype::I16),
TensorBuffer::I32(buffer) => (Blob(buffer.cast_to_u8()), ChannelDatatype::I32),
TensorBuffer::I64(buffer) => (Blob(buffer.cast_to_u8()), ChannelDatatype::I64),
TensorBuffer::F16(buffer) => (Blob(buffer.cast_to_u8()), ChannelDatatype::F16),
TensorBuffer::F32(buffer) => (Blob(buffer.cast_to_u8()), ChannelDatatype::F32),
TensorBuffer::F64(buffer) => (Blob(buffer.cast_to_u8()), ChannelDatatype::F64),
}
}
// ----------------------------------------------------------------------------
/// Types that implement this can be used as image channel types.
///
/// Implemented for `u8, u16, u32, u64, i8, i16, i32, i64, f16, f32, f64`.
pub trait ImageChannelType: bytemuck::Pod {
/// The [`ChannelDatatype`] for this type.
const CHANNEL_TYPE: ChannelDatatype;
}
impl ImageChannelType for u8 {
const CHANNEL_TYPE: ChannelDatatype = ChannelDatatype::U8;
}
impl ImageChannelType for u16 {
const CHANNEL_TYPE: ChannelDatatype = ChannelDatatype::U16;
}
impl ImageChannelType for u32 {
const CHANNEL_TYPE: ChannelDatatype = ChannelDatatype::U32;
}
impl ImageChannelType for u64 {
const CHANNEL_TYPE: ChannelDatatype = ChannelDatatype::U64;
}
impl ImageChannelType for i8 {
const CHANNEL_TYPE: ChannelDatatype = ChannelDatatype::I8;
}
impl ImageChannelType for i16 {
const CHANNEL_TYPE: ChannelDatatype = ChannelDatatype::I16;
}
impl ImageChannelType for i32 {
const CHANNEL_TYPE: ChannelDatatype = ChannelDatatype::I32;
}
impl ImageChannelType for i64 {
const CHANNEL_TYPE: ChannelDatatype = ChannelDatatype::I64;
}
impl ImageChannelType for half::f16 {
const CHANNEL_TYPE: ChannelDatatype = ChannelDatatype::F16;
}
impl ImageChannelType for f32 {
const CHANNEL_TYPE: ChannelDatatype = ChannelDatatype::F32;
}
impl ImageChannelType for f64 {
const CHANNEL_TYPE: ChannelDatatype = ChannelDatatype::F64;
}
// ----------------------------------------------------------------------------
/// Returns the indices of an appropriate set of dimensions.
///
/// Ignores leading and trailing 1-sized dimensions.
///
/// For instance: `[1, 480, 640, 3, 1]` would return `[1, 2, 3]`,
/// the indices of the `[480, 640, 3]` dimensions.
pub fn find_non_empty_dim_indices(shape: &[TensorDimension]) -> SmallVec<[usize; 4]> {
match shape.len() {
0 => return smallvec![],
1 => return smallvec![0],
2 => return smallvec![0, 1],
_ => {}
}
// Find a range of non-unit dimensions.
// [1, 1, 1, 480, 640, 3, 1, 1, 1]
// ^---------^ goal range
let mut non_unit_indices =
shape
.iter()
.enumerate()
.filter_map(|(ind, dim)| if dim.size != 1 { Some(ind) } else { None });
// 0 is always a valid index.
let mut min = non_unit_indices.next().unwrap_or(0);
let mut max = non_unit_indices.last().unwrap_or(min);
// Note, these are inclusive ranges.
// First, empty inner dimensions are more likely to be intentional than empty outer dimensions.
// Grow to a min-size of 2.
// (1x1x3x1) -> 3x1 mono rather than 1x1x3 RGB
while max == min && max + 1 < shape.len() {
max += 1;
}
// Next, consider empty outer dimensions if we still need them.
// Grow up to 3 if the inner dimension is already 3 or 4 (Color Images)
// Otherwise, only grow up to 2.
// (1x1x3) -> 1x1x3 rgb rather than 1x3 mono
let target_len = match shape[max].size {
3 | 4 => 3,
_ => 2,
};
while max - min + 1 < target_len && 0 < min {
min -= 1;
}
(min..=max).collect()
}
#[test]
fn test_find_non_empty_dim_indices() {
fn expect(shape: &[u64], expected: &[usize]) {
let dim: Vec<_> = shape
.iter()
.map(|s| TensorDimension {
size: *s,
name: None,
})
.collect();
let got = find_non_empty_dim_indices(&dim);
assert!(
got.as_slice() == expected,
"Input: {shape:?}, got {got:?}, expected {expected:?}"
);
}
expect(&[], &[]);
expect(&[0], &[0]);
expect(&[1], &[0]);
expect(&[100], &[0]);
expect(&[480, 640], &[0, 1]);
expect(&[480, 640, 1], &[0, 1]);
expect(&[480, 640, 1, 1], &[0, 1]);
expect(&[480, 640, 3], &[0, 1, 2]);
expect(&[1, 480, 640], &[1, 2]);
expect(&[1, 480, 640, 3, 1], &[1, 2, 3]);
expect(&[1, 3, 480, 640, 1], &[1, 2, 3]);
expect(&[1, 1, 480, 640], &[2, 3]);
expect(&[1, 1, 480, 640, 1, 1], &[2, 3]);
expect(&[1, 1, 3], &[0, 1, 2]);
expect(&[1, 1, 3, 1], &[2, 3]);
}
// ----------------------------------------------------------------------------
// TODO(andreas): Expose this in the API?
/// Yuv matrix coefficients that determine how a YUV image is meant to be converted to RGB.
///
/// A rigorious definition of the yuv conversion matrix would still require to define
/// the transfer characteristics & color primaries of the resulting RGB space.
/// See [`re_video::decode`]'s documentation.
///
/// However, at this point we generally assume that no further processing is needed after the transform.
/// This is acceptable for most non-HDR content because of the following properties of `Bt709`/`Bt601`/ sRGB:
/// * Bt709 & sRGB primaries are practically identical
/// * Bt601 PAL & Bt709 color primaries are the same (with some slight differences for Bt709 NTSC)
/// * Bt709 & sRGB transfer function are almost identical (and the difference is widely ignored)
/// (sources: <https://en.wikipedia.org/wiki/Rec._709>, <https://en.wikipedia.org/wiki/Rec._601>)
/// …which means for the moment we pretty much only care about the (actually quite) different YUV conversion matrices!
#[derive(Clone, Copy, Debug)]
pub enum YuvMatrixCoefficients {
/// BT.601 (aka. SDTV, aka. Rec.601)
///
/// Wiki: <https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion/>
Bt601,
/// BT.709 (aka. HDTV, aka. Rec.709)
///
/// Wiki: <https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.709_conversion/>
///
/// These are the same primaries we usually assume and use for all of Rerun's rendering
/// since they are the same primaries used by sRGB.
/// <https://en.wikipedia.org/wiki/Rec._709#Relationship_to_sRGB/>
/// The OETF/EOTF function (<https://en.wikipedia.org/wiki/Transfer_functions_in_imaging>) is different,
/// but for all other purposes they are the same.
/// (The only reason for us to convert to optical units ("linear" instead of "gamma") is for
/// lighting computation & tonemapping where we typically start out with sRGB anyways!)
Bt709,
//
// Not yet supported. These vary a lot more from the other two!
//
// /// BT.2020 (aka. PQ, aka. Rec.2020)
// ///
// /// Wiki: <https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion/>
// BT2020_ConstantLuminance,
// BT2020_NonConstantLuminance,
}
/// Returns sRGB from YUV color.
///
/// This conversion mirrors the function of the same name in `yuv_converter.wgsl`
///
/// Specifying the color standard should be exposed in the future [#3541](https://github.com/rerun-io/rerun/pull/3541)
pub fn rgb_from_yuv(
y: u8,
u: u8,
v: u8,
limited_range: bool,
coefficients: YuvMatrixCoefficients,
) -> [u8; 3] {
let (mut y, mut u, mut v) = (y as f32, u as f32, v as f32);
// rescale YUV values
if limited_range {
// Via https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion:
// "The resultant signals range from 16 to 235 for Y′ (Cb and Cr range from 16 to 240);
// the values from 0 to 15 are called footroom, while the values from 236 to 255 are called headroom."
y = (y - 16.0) / 219.0;
u = (u - 128.0) / 224.0;
v = (v - 128.0) / 224.0;
} else {
y /= 255.0;
u = (u - 128.0) / 255.0;
v = (v - 128.0) / 255.0;
}
let r;
let g;
let b;
match coefficients {
YuvMatrixCoefficients::Bt601 => {
// BT.601 (aka. SDTV, aka. Rec.601). wiki: https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion
r = y + 1.402 * v;
g = y - 0.344 * u - 0.714 * v;
b = y + 1.772 * u;
}
YuvMatrixCoefficients::Bt709 => {
// BT.709 (aka. HDTV, aka. Rec.709). wiki: https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.709_conversion
r = y + 1.575 * v;
g = y - 0.187 * u - 0.468 * v;
b = y + 1.856 * u;
}
}
[(255.0 * r) as u8, (255.0 * g) as u8, (255.0 * b) as u8]
}