re_datafusion/
datafusion_connector.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
use std::sync::Arc;

use datafusion::{catalog::TableProvider, error::DataFusionError};

use re_grpc_client::redap::RedapClient;
use re_log_types::{external::re_tuid::Tuid, EntryId};
use re_protos::catalog::v1alpha1::{
    ext::EntryDetails, DatasetEntry, EntryFilter, ReadDatasetEntryRequest,
};

use crate::partition_table::PartitionTableProvider;
use crate::table_entry_provider::TableEntryTableProvider;

pub struct DataFusionConnector {
    catalog: RedapClient,
}

impl DataFusionConnector {
    pub async fn new(origin: &str) -> anyhow::Result<Self> {
        let catalog = re_grpc_client::redap::client(origin.parse()?).await?;
        Ok(Self { catalog })
    }
}

impl DataFusionConnector {
    pub async fn get_entry_list(&mut self) -> Result<Arc<dyn TableProvider>, DataFusionError> {
        // TODO(jleibs): Clean this up with better helpers
        let entry: EntryDetails = self
            .catalog
            .find_entries(re_protos::catalog::v1alpha1::FindEntriesRequest {
                filter: Some(EntryFilter {
                    name: Some("__entries".to_owned()),
                    ..Default::default()
                }),
            })
            .await
            .map_err(|err| DataFusionError::External(Box::new(err)))?
            .into_inner()
            .entries
            .into_iter()
            .next()
            .ok_or(DataFusionError::External("No __entries table found".into()))?
            .try_into()
            .map_err(|err| DataFusionError::External(Box::new(err)))?;

        TableEntryTableProvider::new(self.catalog.clone(), entry.id)
            .into_provider()
            .await
    }

    pub async fn get_dataset_entry(
        &mut self,
        id: Tuid,
    ) -> Result<Option<DatasetEntry>, tonic::Status> {
        let entry = self
            .catalog
            .read_dataset_entry(ReadDatasetEntryRequest {
                id: Some(id.into()),
            })
            .await?
            .into_inner()
            .dataset;

        Ok(entry)
    }

    pub async fn get_partition_table(
        &self,
        dataset_id: EntryId,
    ) -> Result<Arc<dyn TableProvider>, DataFusionError> {
        PartitionTableProvider::new(self.catalog.clone(), dataset_id)
            .into_provider()
            .await
    }
}