Autonomy Software C++ 24.5.1
Welcome to the Autonomy Software repository of the Mars Rover Design Team (MRDT) at Missouri University of Science and Technology (Missouri S&T)! API reference contains the source code and other resources for the development of the autonomy software for our Mars rover. The Autonomy Software project aims to compete in the University Rover Challenge (URC) by demonstrating advanced autonomous capabilities and robust navigation algorithms.
Loading...
Searching...
No Matches
duckdb::CSVWriter Class Reference
Collaboration diagram for duckdb::CSVWriter:

Public Member Functions

 CSVWriter (WriteStream &stream, vector< string > name_list, bool shared=true)
 Create a CSVWriter that writes to a (non-owned) WriteStream.
 
 CSVWriter (CSVReaderOptions &options, FileSystem &fs, const string &file_path, FileCompressionType compression, bool shared=true)
 Create a CSVWriter that writes to a file.
 
void Initialize (bool force=false)
 Writes header and prefix if necessary.
 
void WriteRawString (const string &data)
 Writes the raw string directly into the output stream.
 
void WriteHeader ()
 Writes the header directly into the output stream.
 
void WriteRawString (const string &prefix, CSVWriterState &local_state)
 Write the Raw String, using the local_state.
 
void WriteChunk (DataChunk &input, CSVWriterState &local_state)
 Write a chunk of VARCHAR vectors to the CSV file (any casts are the responsibility of caller)
 
void WriteChunk (DataChunk &input)
 (Non-shared only) variant of WriteChunk
 
void Flush (CSVWriterState &local_state)
 Flushes all data in the local write state.
 
void Flush ()
 (Non-shared only) variant of Flush
 
void Reset (optional_ptr< CSVWriterState > local_state)
 Resets the state of the writer. Warning: the file_writer is not reset.
 
void Close ()
 Closes the writer, optionally writes a postfix.
 
idx_t BytesWritten ()
 
idx_t FileSize ()
 BytesWritten + OriginalSize;.
 
bool WrittenAnything ()
 
void SetWrittenAnything (bool val)
 

Public Attributes

vector< unique_ptr< Expression > > string_casts
 
CSVReaderOptions options
 
CSVWriterOptions writer_options
 

Protected Member Functions

void FlushInternal (CSVWriterState &local_state)
 
void ResetInternal (optional_ptr< CSVWriterState > local_state)
 

Static Protected Member Functions

static void WriteQuoteOrEscape (WriteStream &writer, char quote_or_escape)
 
static string AddEscapes (char to_be_escaped, char escape, const string &val)
 
static bool RequiresQuotes (const char *str, idx_t len, const string &null_str, const vector< bool > &requires_quotes)
 
static void WriteQuotedString (WriteStream &writer, const char *str, idx_t len, bool force_quote, const string &null_str, const vector< bool > &requires_quotes, char quote, char escape)
 
static void WriteQuotedString (WriteStream &writer, const char *str, idx_t len, idx_t col_idx, CSVReaderOptions &options, CSVWriterOptions &writer_options)
 
static void WriteChunk (DataChunk &input, MemoryStream &writer, CSVReaderOptions &options, bool &written_anything, CSVWriterOptions &writer_options)
 
static void WriteHeader (MemoryStream &stream, CSVReaderOptions &options, CSVWriterOptions &writer_options)
 

Protected Attributes

bool written_anything = false
 If we've written any rows yet, allows us to prevent a trailing comma when writing JSON ARRAY.
 
unique_ptr< BufferedFileWriterfile_writer
 (optional) The owned file writer of this CSVWriter
 
WriteStreamwrite_stream
 The WriteStream to write the CSV data to.
 
idx_t bytes_written = 0
 
bool should_initialize
 
mutex lock
 
bool shared
 
unique_ptr< CSVWriterStateglobal_write_state
 

Constructor & Destructor Documentation

◆ CSVWriter() [1/2]

duckdb::CSVWriter::CSVWriter ( WriteStream stream,
vector< string >  name_list,
bool  shared = true 
)

Create a CSVWriter that writes to a (non-owned) WriteStream.

51010 : writer_options(options.dialect_options.state_machine_options.delimiter.GetValue(),
51011 options.dialect_options.state_machine_options.quote.GetValue(), options.write_newline),
51012 write_stream(stream), should_initialize(true), shared(shared) {
51013 auto size = name_list.size();
51014 options.name_list = std::move(name_list);
51015 options.force_quote.resize(size, false);
51016 options.force_quote.resize(size, false);
51017
51018 if (!shared) {
51019 global_write_state = make_uniq<CSVWriterState>();
51020 }
51021}
WriteStream & write_stream
The WriteStream to write the CSV data to.
Definition duckdb.cpp:50934
GOpaque< Size > size(const GMat &src)
const T & GetValue() const
Returns CSV Option value.
Definition duckdb.hpp:46291
DialectOptions dialect_options
See struct above.
Definition duckdb.hpp:51726
vector< bool > force_quote
True, if column with that index must be quoted.
Definition duckdb.hpp:51820
vector< string > name_list
User-defined name list.
Definition duckdb.hpp:51759
CSVOption< string > delimiter
Delimiter to separate columns within each line.
Definition duckdb.hpp:46401
CSVOption< char > quote
Quote used for columns that contain reserved characters, e.g '.
Definition duckdb.hpp:46403
Here is the call graph for this function:

◆ CSVWriter() [2/2]

duckdb::CSVWriter::CSVWriter ( CSVReaderOptions options,
FileSystem fs,
const string &  file_path,
FileCompressionType  compression,
bool  shared = true 
)

Create a CSVWriter that writes to a file.

51025 : options(options_p),
51026 writer_options(options.dialect_options.state_machine_options.delimiter.GetValue(),
51027 options.dialect_options.state_machine_options.quote.GetValue(), options.write_newline),
51028 file_writer(make_uniq<BufferedFileWriter>(fs, file_path,
51030 FileLockType::WRITE_LOCK | compression)),
51031 write_stream(*file_writer), should_initialize(true), shared(shared) {
51032 if (!shared) {
51033 global_write_state = make_uniq<CSVWriterState>();
51034 }
51035}
unique_ptr< BufferedFileWriter > file_writer
(optional) The owned file writer of this CSVWriter
Definition duckdb.cpp:50931
static constexpr FileOpenFlags FILE_FLAGS_FILE_CREATE_NEW
Always create a new file. If a file exists, the file is truncated. Cannot be used together with CREAT...
Definition duckdb.hpp:7858
static constexpr FileOpenFlags FILE_FLAGS_WRITE
Open file with write access.
Definition duckdb.hpp:7852

Member Function Documentation

◆ Initialize()

void duckdb::CSVWriter::Initialize ( bool  force = false)

Writes header and prefix if necessary.

51037 {
51038 if (!force && !should_initialize) {
51039 return;
51040 }
51041
51042 if (!options.prefix.empty()) {
51043 WriteRawString(options.prefix);
51044 }
51045
51046 if (!(options.dialect_options.header.IsSetByUser() && !options.dialect_options.header.GetValue())) {
51047 WriteHeader();
51048 }
51049
51050 should_initialize = false;
51051}
void WriteRawString(const string &data)
Writes the raw string directly into the output stream.
Definition duckdb.cpp:51068
void WriteHeader()
Writes the header directly into the output stream.
Definition duckdb.cpp:51087
string prefix
Prefix/suffix/custom newline the entire file once (enables writing of files as JSON arrays)
Definition duckdb.hpp:51822
CSVOption< bool > header
Whether the file has a header line.
Definition duckdb.hpp:51709
Here is the call graph for this function:

◆ WriteRawString() [1/2]

void duckdb::CSVWriter::WriteRawString ( const string &  data)

Writes the raw string directly into the output stream.

51068 {
51069 if (shared) {
51070 lock_guard<mutex> flock(lock);
51071 bytes_written += raw_string.size();
51072 write_stream.WriteData(const_data_ptr_cast(raw_string.c_str()), raw_string.size());
51073 } else {
51074 bytes_written += raw_string.size();
51075 write_stream.WriteData(const_data_ptr_cast(raw_string.c_str()), raw_string.size());
51076 }
51077}
Here is the caller graph for this function:

◆ WriteHeader() [1/2]

void duckdb::CSVWriter::WriteHeader ( )

Writes the header directly into the output stream.

51087 {
51088 CSVWriterState state;
51089 WriteHeader(*state.stream, options, writer_options);
51090 state.written_anything = true;
51091 Flush(state);
51092}
void Flush()
(Non-shared only) variant of Flush
Definition duckdb.cpp:51103
Here is the call graph for this function:
Here is the caller graph for this function:

◆ WriteRawString() [2/2]

void duckdb::CSVWriter::WriteRawString ( const string &  prefix,
CSVWriterState local_state 
)

Write the Raw String, using the local_state.

51079 {
51080 local_state.stream->WriteData(const_data_ptr_cast(prefix.c_str()), prefix.size());
51081
51082 if (!local_state.require_manual_flush && local_state.stream->GetPosition() >= writer_options.flush_size) {
51083 Flush(local_state);
51084 }
51085}
idx_t flush_size
The size of the CSV file (in bytes) that we buffer before we flush it to disk.
Definition duckdb.cpp:50842
Here is the call graph for this function:

◆ WriteChunk() [1/3]

void duckdb::CSVWriter::WriteChunk ( DataChunk input,
CSVWriterState local_state 
)

Write a chunk of VARCHAR vectors to the CSV file (any casts are the responsibility of caller)

51053 {
51054 WriteChunk(input, *local_state.stream, options, local_state.written_anything, writer_options);
51055
51056 if (!local_state.require_manual_flush && local_state.stream->GetPosition() >= local_state.flush_size) {
51057 Flush(local_state);
51058 }
51059}
void WriteChunk(DataChunk &input, CSVWriterState &local_state)
Write a chunk of VARCHAR vectors to the CSV file (any casts are the responsibility of caller)
Definition duckdb.cpp:51053
Here is the call graph for this function:
Here is the caller graph for this function:

◆ WriteChunk() [2/3]

void duckdb::CSVWriter::WriteChunk ( DataChunk input)

(Non-shared only) variant of WriteChunk

51061 {
51062 // Method intended for non-shared use only
51063 D_ASSERT(!shared);
51064
51065 WriteChunk(input, *global_write_state);
51066}
Here is the call graph for this function:

◆ Flush() [1/2]

void duckdb::CSVWriter::Flush ( CSVWriterState local_state)

Flushes all data in the local write state.

51094 {
51095 if (shared) {
51096 lock_guard<mutex> flock(lock);
51097 FlushInternal(local_state);
51098 } else {
51099 FlushInternal(local_state);
51100 }
51101}

◆ Flush() [2/2]

void duckdb::CSVWriter::Flush ( )

(Non-shared only) variant of Flush

51103 {
51104 // Method intended for non-shared use only
51105 D_ASSERT(!shared);
51106 FlushInternal(*global_write_state);
51107}
Here is the caller graph for this function:

◆ Reset()

void duckdb::CSVWriter::Reset ( optional_ptr< CSVWriterState local_state)

Resets the state of the writer. Warning: the file_writer is not reset.

51109 {
51110 if (shared) {
51111 lock_guard<mutex> flock(lock);
51112 ResetInternal(local_state);
51113 } else {
51114 ResetInternal(local_state);
51115 }
51116}

◆ Close()

void duckdb::CSVWriter::Close ( )

Closes the writer, optionally writes a postfix.

51118 {
51119 if (shared) {
51120 lock_guard<mutex> flock(lock);
51121 if (file_writer) {
51122 file_writer->Close();
51123 }
51124 } else {
51125 if (file_writer) {
51126 file_writer->Close();
51127 }
51128 }
51129}

◆ BytesWritten()

idx_t duckdb::CSVWriter::BytesWritten ( )
51158 {
51159 if (shared) {
51160 lock_guard<mutex> flock(lock);
51161 return bytes_written;
51162 }
51163 return bytes_written;
51164}

◆ FileSize()

idx_t duckdb::CSVWriter::FileSize ( )

BytesWritten + OriginalSize;.

51173 {
51174 if (shared) {
51175 lock_guard<mutex> flock(lock);
51176 return GetFileSize(file_writer, bytes_written);
51177 }
51178 return GetFileSize(file_writer, bytes_written);
51179}

◆ WrittenAnything()

bool duckdb::CSVWriter::WrittenAnything ( )
inline
50908 {
50909 return written_anything;
50910 }
bool written_anything
If we've written any rows yet, allows us to prevent a trailing comma when writing JSON ARRAY.
Definition duckdb.cpp:50928

◆ SetWrittenAnything()

void duckdb::CSVWriter::SetWrittenAnything ( bool  val)
inline
50911 {
50912 if (shared) {
50913 lock_guard<mutex> guard(lock);
50914 written_anything = val;
50915 } else {
50916 written_anything = val;
50917 }
50918 }

◆ FlushInternal()

void duckdb::CSVWriter::FlushInternal ( CSVWriterState local_state)
protected
51131 {
51132 if (!local_state.written_anything) {
51133 return;
51134 }
51135
51136 if (!written_anything) {
51137 written_anything = true;
51138 } else if (writer_options.newline_writing_mode == CSVNewLineMode::WRITE_BEFORE) {
51139 write_stream.WriteData(const_data_ptr_cast(writer_options.newline.c_str()), writer_options.newline.size());
51140 }
51141
51142 written_anything = true;
51143 bytes_written += local_state.stream->GetPosition();
51144 write_stream.WriteData(local_state.stream->GetData(), local_state.stream->GetPosition());
51145
51146 local_state.Reset();
51147}
string newline
The newline string to write.
Definition duckdb.cpp:50840
CSVNewLineMode newline_writing_mode
How to write newlines.
Definition duckdb.cpp:50846

◆ ResetInternal()

void duckdb::CSVWriter::ResetInternal ( optional_ptr< CSVWriterState local_state)
protected
51149 {
51150 if (local_state) {
51151 local_state->Reset();
51152 }
51153
51154 written_anything = false;
51155 bytes_written = 0;
51156}

◆ WriteQuoteOrEscape()

void duckdb::CSVWriter::WriteQuoteOrEscape ( WriteStream writer,
char  quote_or_escape 
)
staticprotected
51181 {
51182 if (quote_or_escape != '\0') {
51183 writer.Write(quote_or_escape);
51184 }
51185}

◆ AddEscapes()

string duckdb::CSVWriter::AddEscapes ( char  to_be_escaped,
char  escape,
const string &  val 
)
staticprotected
51187 {
51188 if (escape == '\0') {
51189 return val;
51190 }
51191 idx_t i = 0;
51192 string new_val = "";
51193 idx_t found = val.find(to_be_escaped);
51194
51195 while (found != string::npos) {
51196 while (i < found) {
51197 new_val += val[i];
51198 i++;
51199 }
51200 new_val += escape;
51201 found = val.find(to_be_escaped, found + 1);
51202 }
51203 while (i < val.length()) {
51204 new_val += val[i];
51205 i++;
51206 }
51207 return new_val;
51208}

◆ RequiresQuotes()

bool duckdb::CSVWriter::RequiresQuotes ( const char str,
idx_t  len,
const string &  null_str,
const vector< bool > &  requires_quotes 
)
staticprotected
51211 {
51212 // check if the string is equal to the null string
51213 if (len == null_str.size() && memcmp(str, null_str.c_str(), len) == 0) {
51214 return true;
51215 }
51216 auto str_data = const_data_ptr_cast(str);
51217 for (idx_t i = 0; i < len; i++) {
51218 if (requires_quotes[str_data[i]]) {
51219 // this byte requires quotes - write a quoted string
51220 return true;
51221 }
51222 }
51223 // no newline, quote or delimiter in the string
51224 // no quoting or escaping necessary
51225 return false;
51226}

◆ WriteQuotedString() [1/2]

void duckdb::CSVWriter::WriteQuotedString ( WriteStream writer,
const char str,
idx_t  len,
bool  force_quote,
const string &  null_str,
const vector< bool > &  requires_quotes,
char  quote,
char  escape 
)
staticprotected
51237 {
51238 if (!force_quote) {
51239 // force quote is disabled: check if we need to add quotes anyway
51240 force_quote = RequiresQuotes(str, len, null_str, requires_quotes);
51241 }
51242 // If a quote is set to none (i.e., null-terminator) we skip the quotation
51243 if (force_quote && quote != '\0') {
51244 // quoting is enabled: we might need to escape things in the string
51245 bool requires_escape = false;
51246 // simple CSV
51247 // do a single loop to check for a quote or escape value
51248 for (idx_t i = 0; i < len; i++) {
51249 if (str[i] == quote || str[i] == escape) {
51250 requires_escape = true;
51251 break;
51252 }
51253 }
51254
51255 if (!requires_escape) {
51256 // fast path: no need to escape anything
51257 WriteQuoteOrEscape(writer, quote);
51258 writer.WriteData(const_data_ptr_cast(str), len);
51259 WriteQuoteOrEscape(writer, quote);
51260 return;
51261 }
51262
51263 // slow path: need to add escapes
51264 string new_val(str, len);
51265 new_val = AddEscapes(escape, escape, new_val);
51266 if (escape != quote) {
51267 // need to escape quotes separately
51268 new_val = AddEscapes(quote, escape, new_val);
51269 }
51270 WriteQuoteOrEscape(writer, quote);
51271 writer.WriteData(const_data_ptr_cast(new_val.c_str()), new_val.size());
51272 WriteQuoteOrEscape(writer, quote);
51273 } else {
51274 writer.WriteData(const_data_ptr_cast(str), len);
51275 }
51276}

◆ WriteQuotedString() [2/2]

void duckdb::CSVWriter::WriteQuotedString ( WriteStream writer,
const char str,
idx_t  len,
idx_t  col_idx,
CSVReaderOptions options,
CSVWriterOptions writer_options 
)
staticprotected
51229 {
51230 WriteQuotedString(writer, str, len, options.force_quote[col_idx], options.null_str[0],
51231 writer_options.requires_quotes, options.dialect_options.state_machine_options.quote.GetValue(),
51232 options.dialect_options.state_machine_options.escape.GetValue());
51233}
vector< string > null_str
Specifies the strings that represents a null value.
Definition duckdb.hpp:51740
CSVOption< char > escape
Escape character to escape quote character.
Definition duckdb.hpp:46405
vector< bool > requires_quotes
For each byte whether the CSV file requires quotes when containing the byte.
Definition duckdb.cpp:50844

◆ WriteChunk() [3/3]

void duckdb::CSVWriter::WriteChunk ( DataChunk input,
MemoryStream writer,
CSVReaderOptions options,
bool &  written_anything,
CSVWriterOptions writer_options 
)
staticprotected
51280 {
51281 // now loop over the vectors and output the values
51282 for (idx_t row_idx = 0; row_idx < input.size(); row_idx++) {
51283 if (row_idx == 0 && !written_anything) {
51284 written_anything = true;
51285 } else if (writer_options.newline_writing_mode == CSVNewLineMode::WRITE_BEFORE) {
51286 writer.WriteData(const_data_ptr_cast(writer_options.newline.c_str()), writer_options.newline.size());
51287 }
51288 // write values
51289 D_ASSERT(options.null_str.size() == 1);
51290 for (idx_t col_idx = 0; col_idx < input.ColumnCount(); col_idx++) {
51291 if (col_idx != 0) {
51292 CSVWriter::WriteQuoteOrEscape(writer,
51293 options.dialect_options.state_machine_options.delimiter.GetValue()[0]);
51294 }
51295 if (FlatVector::IsNull(input.data[col_idx], row_idx)) {
51296 // write null value
51297 writer.WriteData(const_data_ptr_cast(options.null_str[0].c_str()), options.null_str[0].size());
51298 continue;
51299 }
51300
51301 // non-null value, fetch the string value from the cast chunk
51302 auto str_data = FlatVector::GetData<string_t>(input.data[col_idx]);
51303 // FIXME: we could gain some performance here by checking for certain types if they ever require quotes
51304 // (e.g. integers only require quotes if the delimiter is a number, decimals only require quotes if the
51305 // delimiter is a number or "." character)
51306
51307 WriteQuotedString(writer, str_data[row_idx].GetData(), str_data[row_idx].GetSize(), col_idx, options,
51308 writer_options);
51309 }
51310 if (writer_options.newline_writing_mode == CSVNewLineMode::WRITE_AFTER) {
51311 writer.WriteData(const_data_ptr_cast(writer_options.newline.c_str()), writer_options.newline.size());
51312 }
51313 }
51314}

◆ WriteHeader() [2/2]

void duckdb::CSVWriter::WriteHeader ( MemoryStream stream,
CSVReaderOptions options,
CSVWriterOptions writer_options 
)
staticprotected
51316 {
51317 for (idx_t i = 0; i < options.name_list.size(); i++) {
51318 if (i != 0) {
51319 WriteQuoteOrEscape(stream, options.dialect_options.state_machine_options.delimiter.GetValue()[0]);
51320 }
51321
51322 WriteQuotedString(stream, options.name_list[i].c_str(), options.name_list[i].size(), i, options,
51323 writer_options);
51324 }
51325}

The documentation for this class was generated from the following file: