75257 {
75258 D_ASSERT(segment.GetBlockOffset() == 0);
75259 auto handle_ptr = handle.Ptr();
75260 auto source_data = UnifiedVectorFormat::GetData<string_t>(data);
75262 auto dictionary_size =
reinterpret_cast<uint32_t *
>(handle_ptr);
75263 auto dictionary_end =
reinterpret_cast<uint32_t *
>(handle_ptr +
sizeof(
uint32_t));
75264
75265 idx_t remaining_space = RemainingSpace(segment, handle);
75266 auto base_count = segment.count.load();
75267 for (idx_t i = 0; i < count; i++) {
75268 auto source_idx = data.sel->get_index(offset + i);
75269 auto target_idx = base_count + i;
75270 if (remaining_space <
sizeof(
int32_t)) {
75271
75272 segment.count += i;
75273 return i;
75274 }
75275 remaining_space -=
sizeof(
int32_t);
75276 const bool is_null = !data.validity.RowIsValid(source_idx);
75277 if (is_null) {
75278 stats.statistics.SetHasNullFast();
75279
75280
75281 if (target_idx > 0) {
75282 result_data[target_idx] = result_data[target_idx - 1];
75283 } else {
75284 result_data[target_idx] = 0;
75285 }
75286 continue;
75287 }
75288 auto end = handle.Ptr() + *dictionary_end;
75289
75290#ifdef DEBUG
75291 GetDictionary(segment, handle).Verify(segment.GetBlockSize());
75292#endif
75293
75294
75295 idx_t string_length = source_data[source_idx].GetSize();
75296
75297
75298 bool use_overflow_block = false;
75299 idx_t required_space = string_length;
75300 if (DUCKDB_UNLIKELY(required_space >= StringUncompressed::GetStringBlockLimit(segment.GetBlockSize()))) {
75301
75303 use_overflow_block = true;
75304 }
75305 if (DUCKDB_UNLIKELY(required_space > remaining_space)) {
75306
75307 segment.count += i;
75308 return i;
75309 }
75310
75311
75312 UpdateStringStats(stats, source_data[source_idx]);
75313
75314 if (DUCKDB_UNLIKELY(use_overflow_block)) {
75315
75316 block_id_t block;
75318
75319 WriteString(segment, source_data[source_idx], block, current_offset);
75322 auto dict_pos = end - *dictionary_size;
75323
75324
75325 WriteStringMarker(dict_pos, block, current_offset);
75326
75327
75328
75329
75330
75331 D_ASSERT(NumericCast<idx_t>(*dictionary_size) <= segment.GetBlockSize());
75332 result_data[target_idx] = -NumericCast<int32_t>((*dictionary_size));
75333 } else {
75334
75335 D_ASSERT(string_length < NumericLimits<uint16_t>::Maximum());
75336 *dictionary_size += required_space;
75337 remaining_space -= required_space;
75338 auto dict_pos = end - *dictionary_size;
75339
75340 memcpy(dict_pos, source_data[source_idx].GetData(), string_length);
75341
75342
75343 D_ASSERT(NumericCast<idx_t>(*dictionary_size) <= segment.GetBlockSize());
75344
75345 result_data[target_idx] = NumericCast<int32_t>(*dictionary_size);
75346 }
75347 D_ASSERT(RemainingSpace(segment, handle) <= segment.GetBlockSize());
75348#ifdef DEBUG
75349 GetDictionary(segment, handle).Verify(segment.GetBlockSize());
75350#endif
75351 }
75352 segment.count += count;
75353 return count;
75354 }
static constexpr idx_t BIG_STRING_MARKER_SIZE
The marker size of the big string.
Definition duckdb.cpp:75219
static constexpr uint16_t DICTIONARY_HEADER_SIZE
Dictionary header size at the beginning of the string segment (offset + length)
Definition duckdb.cpp:75213