Autonomy Software C++ 24.5.1
Welcome to the Autonomy Software repository of the Mars Rover Design Team (MRDT) at Missouri University of Science and Technology (Missouri S&T)! API reference contains the source code and other resources for the development of the autonomy software for our Mars rover. The Autonomy Software project aims to compete in the University Rover Challenge (URC) by demonstrating advanced autonomous capabilities and robust navigation algorithms.
Loading...
Searching...
No Matches
duckdb::UncompressedStringStorage Struct Reference

Static Public Member Functions

static unique_ptr< AnalyzeStateStringInitAnalyze (ColumnData &col_data, PhysicalType type)
 
static bool StringAnalyze (AnalyzeState &state_p, Vector &input, idx_t count)
 
static idx_t StringFinalAnalyze (AnalyzeState &state_p)
 
static unique_ptr< SegmentScanStateStringInitScan (const QueryContext &context, ColumnSegment &segment)
 
static void StringScanPartial (ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result, idx_t result_offset)
 
static void StringScan (ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result)
 
static void Select (ColumnSegment &segment, ColumnScanState &state, idx_t vector_count, Vector &result, const SelectionVector &sel, idx_t sel_count)
 
static void StringFetchRow (ColumnSegment &segment, ColumnFetchState &state, row_t row_id, Vector &result, idx_t result_idx)
 
static unique_ptr< CompressedSegmentStateStringInitSegment (ColumnSegment &segment, block_id_t block_id, optional_ptr< ColumnSegmentState > segment_state)
 
static unique_ptr< CompressionAppendStateStringInitAppend (ColumnSegment &segment)
 
static idx_t StringAppend (CompressionAppendState &append_state, ColumnSegment &segment, SegmentStatistics &stats, UnifiedVectorFormat &data, idx_t offset, idx_t count)
 
static idx_t StringAppendBase (ColumnSegment &segment, SegmentStatistics &stats, UnifiedVectorFormat &data, idx_t offset, idx_t count)
 
static idx_t StringAppendBase (BufferHandle &handle, ColumnSegment &segment, SegmentStatistics &stats, UnifiedVectorFormat &data, idx_t offset, idx_t count)
 
static void StringRevertAppend (ColumnSegment &segment, idx_t new_count)
 
static idx_t FinalizeAppend (ColumnSegment &segment, SegmentStatistics &stats)
 
static void UpdateStringStats (SegmentStatistics &stats, const string_t &new_value)
 
static void SetDictionary (ColumnSegment &segment, BufferHandle &handle, StringDictionaryContainer dict)
 
static StringDictionaryContainer GetDictionary (ColumnSegment &segment, BufferHandle &handle)
 
static uint32_t GetDictionaryEnd (ColumnSegment &segment, BufferHandle &handle)
 
static idx_t RemainingSpace (ColumnSegment &segment, BufferHandle &handle)
 
static void WriteString (ColumnSegment &segment, string_t string, block_id_t &result_block, int32_t &result_offset)
 
static void WriteStringMemory (ColumnSegment &segment, string_t string, block_id_t &result_block, int32_t &result_offset)
 
static string_t ReadOverflowString (ColumnSegment &segment, Vector &result, block_id_t block, int32_t offset)
 
static string_t ReadString (data_ptr_t target, int32_t offset, uint32_t string_length)
 
static string_t ReadStringWithLength (data_ptr_t target, int32_t offset)
 
static void WriteStringMarker (data_ptr_t target, block_id_t block_id, int32_t offset)
 
static void ReadStringMarker (data_ptr_t target, block_id_t &block_id, int32_t &offset)
 
static string_t FetchStringFromDict (ColumnSegment &segment, uint32_t dict_end_offset, Vector &result, data_ptr_t base_ptr, int32_t dict_offset, uint32_t string_length)
 
static unique_ptr< ColumnSegmentStateSerializeState (ColumnSegment &segment)
 
static unique_ptr< ColumnSegmentStateDeserializeState (Deserializer &deserializer)
 
static void VisitBlockIds (const ColumnSegment &segment, BlockIdVisitor &visitor)
 

Static Public Attributes

static constexpr uint16_t DICTIONARY_HEADER_SIZE = sizeof(uint32_t) + sizeof(uint32_t)
 Dictionary header size at the beginning of the string segment (offset + length)
 
static constexpr uint16_t BIG_STRING_MARKER = (uint16_t)-1
 Marker used in length field to indicate the presence of a big string.
 
static constexpr idx_t BIG_STRING_MARKER_BASE_SIZE = sizeof(block_id_t) + sizeof(int32_t)
 Base size of big string marker (block id + offset)
 
static constexpr idx_t BIG_STRING_MARKER_SIZE = BIG_STRING_MARKER_BASE_SIZE
 The marker size of the big string.
 

Member Function Documentation

◆ StringInitAppend()

static unique_ptr< CompressionAppendState > duckdb::UncompressedStringStorage::StringInitAppend ( ColumnSegment segment)
inlinestatic
75237 {
75238 auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
75239 // This block was initialized in StringInitSegment
75240 auto handle = buffer_manager.Pin(segment.block);
75241 return make_uniq<CompressionAppendState>(std::move(handle));
75242 }

◆ StringAppend()

static idx_t duckdb::UncompressedStringStorage::StringAppend ( CompressionAppendState append_state,
ColumnSegment segment,
SegmentStatistics stats,
UnifiedVectorFormat data,
idx_t  offset,
idx_t  count 
)
inlinestatic
75245 {
75246 return StringAppendBase(append_state.handle, segment, stats, data, offset, count);
75247 }

◆ StringAppendBase() [1/2]

static idx_t duckdb::UncompressedStringStorage::StringAppendBase ( ColumnSegment segment,
SegmentStatistics stats,
UnifiedVectorFormat data,
idx_t  offset,
idx_t  count 
)
inlinestatic
75250 {
75251 auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
75252 auto handle = buffer_manager.Pin(segment.block);
75253 return StringAppendBase(handle, segment, stats, data, offset, count);
75254 }

◆ StringAppendBase() [2/2]

static idx_t duckdb::UncompressedStringStorage::StringAppendBase ( BufferHandle handle,
ColumnSegment segment,
SegmentStatistics stats,
UnifiedVectorFormat data,
idx_t  offset,
idx_t  count 
)
inlinestatic
75257 {
75258 D_ASSERT(segment.GetBlockOffset() == 0);
75259 auto handle_ptr = handle.Ptr();
75260 auto source_data = UnifiedVectorFormat::GetData<string_t>(data);
75261 auto result_data = reinterpret_cast<int32_t *>(handle_ptr + DICTIONARY_HEADER_SIZE);
75262 auto dictionary_size = reinterpret_cast<uint32_t *>(handle_ptr);
75263 auto dictionary_end = reinterpret_cast<uint32_t *>(handle_ptr + sizeof(uint32_t));
75264
75265 idx_t remaining_space = RemainingSpace(segment, handle);
75266 auto base_count = segment.count.load();
75267 for (idx_t i = 0; i < count; i++) {
75268 auto source_idx = data.sel->get_index(offset + i);
75269 auto target_idx = base_count + i;
75270 if (remaining_space < sizeof(int32_t)) {
75271 // string index does not fit in the block at all
75272 segment.count += i;
75273 return i;
75274 }
75275 remaining_space -= sizeof(int32_t);
75276 const bool is_null = !data.validity.RowIsValid(source_idx);
75277 if (is_null) {
75278 stats.statistics.SetHasNullFast();
75279 // null value is stored as a copy of the last value, this is done to be able to efficiently do the
75280 // string_length calculation
75281 if (target_idx > 0) {
75282 result_data[target_idx] = result_data[target_idx - 1];
75283 } else {
75284 result_data[target_idx] = 0;
75285 }
75286 continue;
75287 }
75288 auto end = handle.Ptr() + *dictionary_end;
75289
75290#ifdef DEBUG
75291 GetDictionary(segment, handle).Verify(segment.GetBlockSize());
75292#endif
75293 // Unknown string, continue
75294 // non-null value, check if we can fit it within the block
75295 idx_t string_length = source_data[source_idx].GetSize();
75296
75297 // determine whether or not we have space in the block for this string
75298 bool use_overflow_block = false;
75299 idx_t required_space = string_length;
75300 if (DUCKDB_UNLIKELY(required_space >= StringUncompressed::GetStringBlockLimit(segment.GetBlockSize()))) {
75301 // string exceeds block limit, store in overflow block and only write a marker here
75302 required_space = BIG_STRING_MARKER_SIZE;
75303 use_overflow_block = true;
75304 }
75305 if (DUCKDB_UNLIKELY(required_space > remaining_space)) {
75306 // no space remaining: return how many tuples we ended up writing
75307 segment.count += i;
75308 return i;
75309 }
75310
75311 // we have space: write the string
75312 UpdateStringStats(stats, source_data[source_idx]);
75313
75314 if (DUCKDB_UNLIKELY(use_overflow_block)) {
75315 // write to overflow blocks
75316 block_id_t block;
75317 int32_t current_offset;
75318 // write the string into the current string block
75319 WriteString(segment, source_data[source_idx], block, current_offset);
75320 *dictionary_size += BIG_STRING_MARKER_SIZE;
75321 remaining_space -= BIG_STRING_MARKER_SIZE;
75322 auto dict_pos = end - *dictionary_size;
75323
75324 // write a big string marker into the dictionary
75325 WriteStringMarker(dict_pos, block, current_offset);
75326
75327 // place the dictionary offset into the set of vectors
75328 // note: for overflow strings we write negative value
75329
75330 // dictionary_size is an uint32_t value, so we can cast up.
75331 D_ASSERT(NumericCast<idx_t>(*dictionary_size) <= segment.GetBlockSize());
75332 result_data[target_idx] = -NumericCast<int32_t>((*dictionary_size));
75333 } else {
75334 // string fits in block, append to dictionary and increment dictionary position
75335 D_ASSERT(string_length < NumericLimits<uint16_t>::Maximum());
75336 *dictionary_size += required_space;
75337 remaining_space -= required_space;
75338 auto dict_pos = end - *dictionary_size;
75339 // now write the actual string data into the dictionary
75340 memcpy(dict_pos, source_data[source_idx].GetData(), string_length);
75341
75342 // dictionary_size is an uint32_t value, so we can cast up.
75343 D_ASSERT(NumericCast<idx_t>(*dictionary_size) <= segment.GetBlockSize());
75344 // Place the dictionary offset into the set of vectors.
75345 result_data[target_idx] = NumericCast<int32_t>(*dictionary_size);
75346 }
75347 D_ASSERT(RemainingSpace(segment, handle) <= segment.GetBlockSize());
75348#ifdef DEBUG
75349 GetDictionary(segment, handle).Verify(segment.GetBlockSize());
75350#endif
75351 }
75352 segment.count += count;
75353 return count;
75354 }
::uint32_t uint32_t
::int32_t int32_t
static constexpr idx_t BIG_STRING_MARKER_SIZE
The marker size of the big string.
Definition duckdb.cpp:75219
static constexpr uint16_t DICTIONARY_HEADER_SIZE
Dictionary header size at the beginning of the string segment (offset + length)
Definition duckdb.cpp:75213

◆ StringRevertAppend()

static void duckdb::UncompressedStringStorage::StringRevertAppend ( ColumnSegment segment,
idx_t  new_count 
)
inlinestatic
75356 {
75357 if (new_count >= segment.count) {
75358 return;
75359 }
75360 // we need to decrement the dictionary size by all of the strings we are erasing
75361 auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
75362 auto handle = buffer_manager.Pin(segment.block);
75363 auto handle_ptr = handle.Ptr();
75364 auto result_data = reinterpret_cast<int32_t *>(handle_ptr + DICTIONARY_HEADER_SIZE);
75365 auto dictionary_size = reinterpret_cast<uint32_t *>(handle_ptr);
75366 uint32_t new_dictionary_size;
75367 if (new_count == 0) {
75368 new_dictionary_size = 0;
75369 } else {
75370 auto entry_offset = result_data[new_count - 1];
75371 if (entry_offset < 0) {
75372 // overflow strings store the dict offset negatively - invert size
75373 new_dictionary_size = -entry_offset;
75374 } else {
75375 new_dictionary_size = entry_offset;
75376 }
75377 }
75378 *dictionary_size = new_dictionary_size;
75379 }

◆ UpdateStringStats()

static void duckdb::UncompressedStringStorage::UpdateStringStats ( SegmentStatistics stats,
const string_t new_value 
)
inlinestatic
75384 {
75385 stats.statistics.SetHasNoNullFast();
75386 if (stats.statistics.GetStatsType() == StatisticsType::GEOMETRY_STATS) {
75387 GeometryStats::Update(stats.statistics, new_value);
75388 } else {
75389 StringStats::Update(stats.statistics, new_value);
75390 }
75391 }

◆ FetchStringFromDict()

static string_t duckdb::UncompressedStringStorage::FetchStringFromDict ( ColumnSegment segment,
uint32_t  dict_end_offset,
Vector result,
data_ptr_t  base_ptr,
int32_t  dict_offset,
uint32_t  string_length 
)
inlinestatic
75407 {
75408 D_ASSERT(dict_offset <= NumericCast<int32_t>(segment.GetBlockSize()));
75409 if (DUCKDB_LIKELY(dict_offset >= 0)) {
75410 // regular string - fetch from dictionary
75411 auto dict_end = base_ptr + dict_end_offset;
75412 auto dict_pos = dict_end - dict_offset;
75413
75414 auto str_ptr = char_ptr_cast(dict_pos);
75415 return string_t(str_ptr, string_length);
75416 } else {
75417 // read overflow string
75418 block_id_t block_id;
75419 int32_t offset;
75420 ReadStringMarker(base_ptr + dict_end_offset - AbsValue<int32_t>(dict_offset), block_id, offset);
75421
75422 return ReadOverflowString(segment, result, block_id, offset);
75423 }
75424 }

The documentation for this struct was generated from the following file: