rerun_bindings/catalog/
mod.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#![expect(clippy::needless_pass_by_value)] // A lot of arguments to #[pyfunction] need to be by value

mod catalog_client;
mod connection_handle;
mod dataframe_query;
mod dataset;
mod entry;
mod errors;
mod table;

use std::sync::Arc;

use arrow::{
    array::{Float32Array, RecordBatch},
    datatypes::Field,
};
use pyo3::{exceptions::PyRuntimeError, prelude::*, Bound, PyResult};

use crate::catalog::dataframe_query::PyDataframeQueryView;

pub use catalog_client::PyCatalogClient;
pub use connection_handle::ConnectionHandle;
pub use dataset::PyDataset;
pub use entry::{PyEntry, PyEntryId, PyEntryKind};
pub use errors::to_py_err;
pub use table::PyTable;

/// Register the `rerun.catalog` module.
pub(crate) fn register(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
    m.add_class::<PyCatalogClient>()?;

    m.add_class::<PyEntryId>()?;
    m.add_class::<PyEntryKind>()?;
    m.add_class::<PyEntry>()?;
    m.add_class::<PyDataset>()?;
    m.add_class::<PyTable>()?;

    m.add_class::<PyDataframeQueryView>()?;

    m.add_class::<PyVectorDistanceMetric>()?;

    Ok(())
}

// TODO(ab): when the new query APIs are implemented, move these type next to it (they were salvaged
// from the legacy server API)

/// The type of distance metric to use for vector index and search.
#[pyclass(name = "VectorDistanceMetric", eq, eq_int)]
#[derive(Clone, Debug, PartialEq)]
enum PyVectorDistanceMetric {
    L2,
    Cosine,
    Dot,
    Hamming,
}

impl From<PyVectorDistanceMetric> for re_protos::manifest_registry::v1alpha1::VectorDistanceMetric {
    fn from(metric: PyVectorDistanceMetric) -> Self {
        match metric {
            PyVectorDistanceMetric::L2 => Self::L2,
            PyVectorDistanceMetric::Cosine => Self::Cosine,
            PyVectorDistanceMetric::Dot => Self::Dot,
            PyVectorDistanceMetric::Hamming => Self::Hamming,
        }
    }
}

/// A type alias for either a `VectorDistanceMetric` enum or a string literal.
#[derive(FromPyObject)]
enum VectorDistanceMetricLike {
    #[pyo3(transparent, annotation = "enum")]
    VectorDistanceMetric(PyVectorDistanceMetric),

    #[pyo3(transparent, annotation = "literal")]
    CatchAll(String),
}

impl TryFrom<VectorDistanceMetricLike>
    for re_protos::manifest_registry::v1alpha1::VectorDistanceMetric
{
    type Error = PyErr;

    fn try_from(metric: VectorDistanceMetricLike) -> Result<Self, PyErr> {
        match metric {
            VectorDistanceMetricLike::VectorDistanceMetric(metric) => Ok(metric.into()),
            VectorDistanceMetricLike::CatchAll(metric) => match metric.to_lowercase().as_str() {
                "l2" => Ok(PyVectorDistanceMetric::L2.into()),
                "cosine" => Ok(PyVectorDistanceMetric::Cosine.into()),
                "dot" => Ok(PyVectorDistanceMetric::Dot.into()),
                "hamming" => Ok(PyVectorDistanceMetric::Hamming.into()),
                _ => Err(pyo3::exceptions::PyValueError::new_err(format!(
                    "Unknown vector distance metric: {metric}"
                ))),
            },
        }
    }
}

impl From<PyVectorDistanceMetric> for i32 {
    fn from(metric: PyVectorDistanceMetric) -> Self {
        let proto_typed =
            re_protos::manifest_registry::v1alpha1::VectorDistanceMetric::from(metric);

        proto_typed as Self
    }
}

/// A type alias for a vector (vector search input data).
#[derive(FromPyObject)]
enum VectorLike<'py> {
    NumPy(numpy::PyArrayLike1<'py, f32>),
    Vector(Vec<f32>),
}

impl VectorLike<'_> {
    fn to_record_batch(&self) -> PyResult<RecordBatch> {
        let schema = arrow::datatypes::Schema::new_with_metadata(
            vec![Field::new(
                "items",
                arrow::datatypes::DataType::Float32,
                false,
            )],
            Default::default(),
        );

        match self {
            VectorLike::NumPy(array) => {
                let floats: Vec<f32> = array
                    .as_array()
                    .as_slice()
                    .ok_or_else(|| {
                        PyRuntimeError::new_err("Failed to convert numpy array to slice".to_owned())
                    })?
                    .to_vec();

                RecordBatch::try_new(Arc::new(schema), vec![Arc::new(Float32Array::from(floats))])
                    .map_err(to_py_err)
            }
            VectorLike::Vector(floats) => RecordBatch::try_new(
                Arc::new(schema),
                vec![Arc::new(Float32Array::from(floats.clone()))],
            )
            .map_err(to_py_err),
        }
    }
}