1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
use arrow::{
    array::{make_array, RecordBatch},
    datatypes::{Field, Schema},
    error::ArrowError,
};

use crate::TransportChunk;

impl TransportChunk {
    /// Create an arrow-rs [`RecordBatch`] containing the data from this [`TransportChunk`].
    ///
    /// This is a "fairly" cheap operation, as it does not copy the underlying arrow data,
    /// but does incur overhead of generating an alternative representation of the arrow-
    /// related rust structures that refer to those data buffers.
    pub fn try_to_arrow_record_batch(&self) -> Result<RecordBatch, ArrowError> {
        let fields: Vec<Field> = self
            .schema
            .fields
            .iter()
            .map(|f| f.clone().into())
            .collect();

        let metadata = self.schema.metadata.clone().into_iter().collect();

        let schema = Schema::new(fields).with_metadata(metadata);

        let columns: Vec<_> = self
            .data
            .columns()
            .iter()
            .map(|arr2_array| {
                let data = arrow2::array::to_data(arr2_array.as_ref());
                make_array(data)
            })
            .collect();

        RecordBatch::try_new(std::sync::Arc::new(schema), columns)
    }

    /// Create a [`TransportChunk`] from an arrow-rs [`RecordBatch`].
    ///
    /// This is a "fairly" cheap operation, as it does not copy the underlying arrow data,
    /// but does incur overhead of generating an alternative representation of the arrow-
    /// related rust structures that refer to those data buffers.
    pub fn from_arrow_record_batch(batch: &RecordBatch) -> Self {
        let fields: Vec<arrow2::datatypes::Field> = batch
            .schema()
            .fields
            .iter()
            .map(|f| f.clone().into())
            .collect();

        let metadata = batch.schema().metadata.clone().into_iter().collect();

        let schema = arrow2::datatypes::Schema::from(fields).with_metadata(metadata);

        let columns: Vec<_> = batch
            .columns()
            .iter()
            .map(|array| arrow2::array::from_data(&array.to_data()))
            .collect();

        let data = arrow2::chunk::Chunk::new(columns);

        Self { schema, data }
    }
}