Autonomy Software C++ 24.5.1
Welcome to the Autonomy Software repository of the Mars Rover Design Team (MRDT) at Missouri University of Science and Technology (Missouri S&T)! API reference contains the source code and other resources for the development of the autonomy software for our Mars rover. The Autonomy Software project aims to compete in the University Rover Challenge (URC) by demonstrating advanced autonomous capabilities and robust navigation algorithms.
Loading...
Searching...
No Matches
duckdb::PhysicalArrowCollector Class Reference
Inheritance diagram for duckdb::PhysicalArrowCollector:
Collaboration diagram for duckdb::PhysicalArrowCollector:

Public Member Functions

 PhysicalArrowCollector (PhysicalPlan &physical_plan, PreparedStatementData &data, bool parallel, idx_t batch_size)
 
SinkResultType Sink (ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const override
 
SinkCombineResultType Combine (ExecutionContext &context, OperatorSinkCombineInput &input) const override
 
unique_ptr< QueryResultGetResult (GlobalSinkState &state) const override
 The final method used to fetch the query result from this operator.
 
unique_ptr< GlobalSinkStateGetGlobalSinkState (ClientContext &context) const override
 
unique_ptr< LocalSinkStateGetLocalSinkState (ExecutionContext &context) const override
 
SinkFinalizeType Finalize (Pipeline &pipeline, Event &event, ClientContext &context, OperatorSinkFinalizeInput &input) const override
 
bool ParallelSink () const override
 
bool SinkOrderDependent () const override
 
- Public Member Functions inherited from duckdb::PhysicalResultCollector
 PhysicalResultCollector (PhysicalPlan &physical_plan, PreparedStatementData &data)
 
bool IsSink () const override
 
vector< const_reference< PhysicalOperator > > GetChildren () const override
 
void BuildPipelines (Pipeline &current, MetaPipeline &meta_pipeline) override
 
bool IsSource () const override
 
virtual bool IsStreaming () const
 Whether this is a streaming result collector.
 
- Public Member Functions inherited from duckdb::PhysicalOperator
 PhysicalOperator (PhysicalPlan &physical_plan, PhysicalOperatorType type, vector< LogicalType > types, idx_t estimated_cardinality)
 
 PhysicalOperator (const PhysicalOperator &other)=delete
 Deleted copy constructors.
 
PhysicalOperatoroperator= (const PhysicalOperator &)=delete
 
virtual string GetName () const
 
virtual InsertionOrderPreservingMap< string > ParamsToString () const
 
virtual string ToString (ExplainFormat format=ExplainFormat::DEFAULT) const
 
void Print () const
 
const vector< LogicalType > & GetTypes () const
 Return a vector of the types that will be returned by this operator.
 
virtual bool Equals (const PhysicalOperator &other) const
 
idx_t EstimatedThreadCount () const
 Functions to help decide how to set up pipeline dependencies.
 
bool CanSaturateThreads (ClientContext &context) const
 
virtual void Verify ()
 
virtual unique_ptr< OperatorStateGetOperatorState (ExecutionContext &context) const
 
virtual unique_ptr< GlobalOperatorStateGetGlobalOperatorState (ClientContext &context) const
 
virtual OperatorResultType Execute (ExecutionContext &context, DataChunk &input, DataChunk &chunk, GlobalOperatorState &gstate, OperatorState &state) const
 
virtual OperatorFinalizeResultType FinalExecute (ExecutionContext &context, DataChunk &chunk, GlobalOperatorState &gstate, OperatorState &state) const
 
virtual OperatorFinalResultType OperatorFinalize (Pipeline &pipeline, Event &event, ClientContext &context, OperatorFinalizeInput &input) const
 
virtual bool ParallelOperator () const
 
virtual bool RequiresFinalExecute () const
 
virtual bool RequiresOperatorFinalize () const
 
virtual OrderPreservationType OperatorOrder () const
 The influence the operator has on order (insertion order means no influence)
 
virtual unique_ptr< LocalSourceStateGetLocalSourceState (ExecutionContext &context, GlobalSourceState &gstate) const
 
virtual unique_ptr< GlobalSourceStateGetGlobalSourceState (ClientContext &context) const
 
SourceResultType GetData (ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const
 
virtual OperatorPartitionData GetPartitionData (ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate, LocalSourceState &lstate, const OperatorPartitionInfo &partition_info) const
 
virtual bool ParallelSource () const
 
virtual bool SupportsPartitioning (const OperatorPartitionInfo &partition_info) const
 
virtual OrderPreservationType SourceOrder () const
 The type of order emitted by the operator (as a source)
 
virtual ProgressData GetProgress (ClientContext &context, GlobalSourceState &gstate) const
 Returns the current progress percentage, or a negative value if progress bars are not supported.
 
virtual ProgressData GetSinkProgress (ClientContext &context, GlobalSinkState &gstate, const ProgressData source_progress) const
 Returns the current progress percentage, or a negative value if progress bars are not supported.
 
virtual InsertionOrderPreservingMap< string > ExtraSourceParams (GlobalSourceState &gstate, LocalSourceState &lstate) const
 
virtual void PrepareFinalize (ClientContext &context, GlobalSinkState &sink_state) const
 
virtual SinkNextBatchType NextBatch (ExecutionContext &context, OperatorSinkNextBatchInput &input) const
 
virtual OperatorPartitionInfo RequiredPartitionInfo () const
 
virtual vector< const_reference< PhysicalOperator > > GetSources () const
 
bool AllSourcesSupportBatchIndex () const
 
template<class TARGET >
TARGETCast ()
 
template<class TARGET >
const TARGETCast () const
 

Static Public Member Functions

static unique_ptr< PhysicalOperatorCreate (ClientContext &context, PreparedStatementData &data, idx_t batch_size)
 
- Static Public Member Functions inherited from duckdb::PhysicalResultCollector
static unique_ptr< PhysicalOperatorGetResultCollector (ClientContext &context, PreparedStatementData &data)
 
- Static Public Member Functions inherited from duckdb::PhysicalOperator
static void SetEstimatedCardinality (InsertionOrderPreservingMap< string > &result, idx_t estimated_cardinality)
 
static idx_t GetMaxThreadMemory (ClientContext &context)
 The maximum amount of memory the operator should use per thread.
 
static OperatorCachingMode SelectOperatorCachingMode (ExecutionContext &context)
 Whether operator caching is allowed in the current execution context.
 

Public Attributes

idx_t record_batch_size
 User provided batch size.
 
bool parallel
 
- Public Attributes inherited from duckdb::PhysicalResultCollector
StatementType statement_type
 
StatementProperties properties
 
QueryResultMemoryType memory_type
 
PhysicalOperatorplan
 
vector< string > names
 
- Public Attributes inherited from duckdb::PhysicalOperator
ArenaLinkedList< reference< PhysicalOperator > > children
 The child operators.
 
PhysicalOperatorType type
 The physical operator type.
 
vector< LogicalTypetypes
 The return types.
 
idx_t estimated_cardinality
 The estimated cardinality.
 
unique_ptr< GlobalSinkStatesink_state
 The global sink state.
 
unique_ptr< GlobalOperatorStateop_state
 The global operator state.
 
mutex lock
 Lock for (re)setting any of the operator states.
 

Additional Inherited Members

- Static Public Attributes inherited from duckdb::PhysicalResultCollector
static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::RESULT_COLLECTOR
 
- Static Public Attributes inherited from duckdb::PhysicalOperator
static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::INVALID
 
- Protected Member Functions inherited from duckdb::PhysicalResultCollector
unique_ptr< ColumnDataCollectionCreateCollection (ClientContext &context) const
 
- Protected Member Functions inherited from duckdb::PhysicalOperator
virtual SourceResultType GetDataInternal (ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const
 

Constructor & Destructor Documentation

◆ PhysicalArrowCollector()

duckdb::PhysicalArrowCollector::PhysicalArrowCollector ( PhysicalPlan physical_plan,
PreparedStatementData data,
bool  parallel,
idx_t  batch_size 
)
inline
46071 : PhysicalResultCollector(physical_plan, data), record_batch_size(batch_size), parallel(parallel) {
46072 }
idx_t record_batch_size
User provided batch size.
Definition duckdb.cpp:46088

Member Function Documentation

◆ Create()

unique_ptr< PhysicalOperator > duckdb::PhysicalArrowCollector::Create ( ClientContext context,
PreparedStatementData data,
idx_t  batch_size 
)
static
46136 {
46137 auto &physical_plan = *data.physical_plan;
46138 auto &root = physical_plan.Root();
46139
46141 // Not an order-preserving plan: use the parallel materialized collector.
46142 return make_uniq<PhysicalArrowCollector>(physical_plan, data, true, batch_size);
46143 }
46144
46145 if (!PhysicalPlanGenerator::UseBatchIndex(context, root)) {
46146 // Order-preserving plan, and we cannot use the batch index: use single-threaded result collector.
46147 return make_uniq<PhysicalArrowCollector>(physical_plan, data, false, batch_size);
46148 }
46149
46150 // Order-preserving plan, and we can use the batch index: use a batch collector.
46151 return make_uniq<PhysicalArrowBatchCollector>(physical_plan, data, batch_size);
46152}
static bool PreserveInsertionOrder(ClientContext &context, PhysicalOperator &plan)
Whether or not we should preserve insertion order for executing the given sink.
static bool UseBatchIndex(ClientContext &context, PhysicalOperator &plan)
Whether or not we can (or should) use a batch-index based operator for executing the given sink.

◆ Sink()

SinkResultType duckdb::PhysicalArrowCollector::Sink ( ExecutionContext context,
DataChunk chunk,
OperatorSinkInput input 
) const
overridevirtual

The sink method is called constantly with new input, as long as new input is available. Note that this method CAN be called in parallel, proper locking is needed when accessing dat a inside the GlobalSinkState.

Reimplemented from duckdb::PhysicalOperator.

46155 {
46156 auto &lstate = input.local_state.Cast<ArrowCollectorLocalState>();
46157 // Append to the appender, up to chunk size
46158
46159 auto count = chunk.size();
46160 auto &appender = lstate.appender;
46161 D_ASSERT(count != 0);
46162
46163 idx_t processed = 0;
46164 do {
46165 if (!appender) {
46166 // Create the appender if we haven't started this chunk yet
46167 auto properties = context.client.GetClientProperties();
46168 D_ASSERT(processed < count);
46169 auto initial_capacity = MinValue(record_batch_size, count - processed);
46170 appender = make_uniq<ArrowAppender>(types, initial_capacity, properties,
46172 }
46173
46174 // Figure out how much we can still append to this chunk
46175 auto row_count = appender->RowCount();
46176 D_ASSERT(record_batch_size > row_count);
46177 auto to_append = MinValue(record_batch_size - row_count, count - processed);
46178
46179 // Append and check if the chunk is finished
46180 appender->Append(chunk, processed, processed + to_append, count);
46181 processed += to_append;
46182 row_count = appender->RowCount();
46183 if (row_count >= record_batch_size) {
46184 lstate.FinishArray();
46185 }
46186 } while (processed < count);
46187 return SinkResultType::NEED_MORE_INPUT;
46188}
static unordered_map< idx_t, const shared_ptr< ArrowTypeExtensionData > > GetExtensionTypes(ClientContext &context, const vector< LogicalType > &duckdb_types)
This function returns possible extension types to given DuckDB types.
vector< LogicalType > types
The return types.
Definition duckdb.hpp:22152
Here is the call graph for this function:

◆ Combine()

SinkCombineResultType duckdb::PhysicalArrowCollector::Combine ( ExecutionContext context,
OperatorSinkCombineInput input 
) const
overridevirtual

The combine is called when a single thread has completed execution of its part of the pipeline, it is the final time that a specific LocalSinkState is accessible. This method can be called in parallel while other Sink() or Combine() calls are active on the same GlobalSinkState.

Reimplemented from duckdb::PhysicalOperator.

46191 {
46192 auto &gstate = input.global_state.Cast<ArrowCollectorGlobalState>();
46193 auto &lstate = input.local_state.Cast<ArrowCollectorLocalState>();
46194 auto &last_appender = lstate.appender;
46195 auto &arrays = lstate.finished_arrays;
46196 if (arrays.empty() && !last_appender) {
46197 // Nothing to do
46198 return SinkCombineResultType::FINISHED;
46199 }
46200 if (last_appender) {
46201 // FIXME: we could set these aside and merge them in a finalize event in an effort to create more balanced
46202 // chunks out of these remnants
46203 lstate.FinishArray();
46204 }
46205 // Collect all the finished arrays
46206 lock_guard<mutex> l(gstate.glock);
46207 // Move the arrays from our local state into the global state
46208 gstate.chunks.insert(gstate.chunks.end(), std::make_move_iterator(arrays.begin()),
46209 std::make_move_iterator(arrays.end()));
46210 arrays.clear();
46211 gstate.tuple_count += lstate.tuple_count;
46212 return SinkCombineResultType::FINISHED;
46213}

◆ GetResult()

unique_ptr< QueryResult > duckdb::PhysicalArrowCollector::GetResult ( GlobalSinkState state) const
overridevirtual

The final method used to fetch the query result from this operator.

Implements duckdb::PhysicalResultCollector.

46215 {
46216 auto &gstate = state_p.Cast<ArrowCollectorGlobalState>();
46217 return std::move(gstate.result);
46218}

◆ GetGlobalSinkState()

unique_ptr< GlobalSinkState > duckdb::PhysicalArrowCollector::GetGlobalSinkState ( ClientContext context) const
overridevirtual

Reimplemented from duckdb::PhysicalOperator.

46220 {
46221 return make_uniq<ArrowCollectorGlobalState>();
46222}

◆ GetLocalSinkState()

unique_ptr< LocalSinkState > duckdb::PhysicalArrowCollector::GetLocalSinkState ( ExecutionContext context) const
overridevirtual

Reimplemented from duckdb::PhysicalOperator.

46224 {
46225 return make_uniq<ArrowCollectorLocalState>();
46226}

◆ Finalize()

SinkFinalizeType duckdb::PhysicalArrowCollector::Finalize ( Pipeline pipeline,
Event event,
ClientContext context,
OperatorSinkFinalizeInput input 
) const
overridevirtual

The finalize is called when ALL threads are finished execution. It is called only once per pipeline, and is entirely single threaded. If Finalize returns SinkResultType::Finished, the sink is marked as finished

Reimplemented from duckdb::PhysicalOperator.

46229 {
46230 auto &gstate = input.global_state.Cast<ArrowCollectorGlobalState>();
46231
46232 if (gstate.chunks.empty()) {
46233 if (gstate.tuple_count != 0) {
46234 throw InternalException(
46235 "PhysicalArrowCollector Finalize contains no chunks, but tuple_count is non-zero (%d)",
46236 gstate.tuple_count);
46237 }
46238 gstate.result = make_uniq<ArrowQueryResult>(statement_type, properties, names, types,
46239 context.GetClientProperties(), record_batch_size);
46240 return SinkFinalizeType::READY;
46241 }
46242
46243 gstate.result = make_uniq<ArrowQueryResult>(statement_type, properties, names, types, context.GetClientProperties(),
46245 auto &arrow_result = gstate.result->Cast<ArrowQueryResult>();
46246 arrow_result.SetArrowData(std::move(gstate.chunks));
46247
46248 return SinkFinalizeType::READY;
46249}

◆ ParallelSink()

bool duckdb::PhysicalArrowCollector::ParallelSink ( ) const
overridevirtual

Reimplemented from duckdb::PhysicalOperator.

46251 {
46252 return parallel;
46253}

◆ SinkOrderDependent()

bool duckdb::PhysicalArrowCollector::SinkOrderDependent ( ) const
overridevirtual

Whether or not the sink operator depends on the order of the input chunks If this is set to true, we cannot do things like caching intermediate vectors

Reimplemented from duckdb::PhysicalOperator.

46255 {
46256 return true;
46257}

The documentation for this class was generated from the following file: