Autonomy Software C++ 24.5.1
Welcome to the Autonomy Software repository of the Mars Rover Design Team (MRDT) at Missouri University of Science and Technology (Missouri S&T)! API reference contains the source code and other resources for the development of the autonomy software for our Mars rover. The Autonomy Software project aims to compete in the University Rover Challenge (URC) by demonstrating advanced autonomous capabilities and robust navigation algorithms.
Loading...
Searching...
No Matches
duckdb::HivePartitioning Class Reference

Static Public Member Functions

static DUCKDB_API std::map< string, string > Parse (const string &filename)
 Parse a filename that follows the hive partitioning scheme.
 
static DUCKDB_API void ApplyFiltersToFileList (ClientContext &context, vector< OpenFileInfo > &files, vector< unique_ptr< Expression > > &filters, const HivePartitioningFilterInfo &filter_info, MultiFilePushdownInfo &info)
 
static DUCKDB_API Value GetValue (ClientContext &context, const string &key, const string &value, const LogicalType &type)
 
static DUCKDB_API string Escape (const string &input)
 Escape a hive partition key or value using URL encoding.
 
static DUCKDB_API string Unescape (const string &input)
 Unescape a hive partition key or value encoded using URL encoding.
 
static DUCKDB_API bool IsNull (const string &input)
 Whether the column is "NULL"/"__HIVE_DEFAULT_PARTITION".
 

Member Function Documentation

◆ Parse()

std::map< string, string > duckdb::HivePartitioning::Parse ( const string &  filename)
static

Parse a filename that follows the hive partitioning scheme.

77861 {
77862 idx_t partition_start = 0;
77863 idx_t equality_sign = 0;
77864 bool candidate_partition = true;
77865 std::map<string, string> result;
77866 for (idx_t c = 0; c < filename.size(); c++) {
77867 if (filename[c] == '?' || filename[c] == '\n') {
77868 // get parameter or newline - not a partition
77869 candidate_partition = false;
77870 }
77871 if (filename[c] == '\\' || filename[c] == '/') {
77872 // separator
77873 if (candidate_partition && equality_sign > partition_start) {
77874 // we found a partition with an equality sign
77875 string key = filename.substr(partition_start, equality_sign - partition_start);
77876 string value = filename.substr(equality_sign + 1, c - equality_sign - 1);
77877 result.insert(make_pair(std::move(key), std::move(value)));
77878 }
77879 partition_start = c + 1;
77880 candidate_partition = true;
77881 } else if (filename[c] == '=') {
77882 if (equality_sign > partition_start) {
77883 // multiple equality signs - not a partition
77884 candidate_partition = false;
77885 }
77886 equality_sign = c;
77887 }
77888 }
77889 return result;
77890}

◆ ApplyFiltersToFileList()

void duckdb::HivePartitioning::ApplyFiltersToFileList ( ClientContext context,
vector< OpenFileInfo > &  files,
vector< unique_ptr< Expression > > &  filters,
const HivePartitioningFilterInfo filter_info,
MultiFilePushdownInfo info 
)
static

Prunes a list of filenames based on a set of filters, can be used by TableFunctions in the pushdown_complex_filter function to skip files with filename-based filters. Also removes the filters that always evaluate to true.

77921 {
77922 vector<OpenFileInfo> pruned_files;
77923 vector<bool> have_preserved_filter(filters.size(), false);
77924 vector<unique_ptr<Expression>> pruned_filters;
77925 unordered_set<idx_t> filters_applied_to_files;
77926 auto table_index = info.table_index;
77927
77928 if ((!filter_info.filename_enabled && !filter_info.hive_enabled) || filters.empty()) {
77929 return;
77930 }
77931
77932 for (idx_t i = 0; i < files.size(); i++) {
77933 auto &file = files[i];
77934 bool should_prune_file = false;
77935 auto known_values = GetKnownColumnValues(file.path, filter_info);
77936
77937 for (idx_t j = 0; j < filters.size(); j++) {
77938 auto &filter = filters[j];
77939 unique_ptr<Expression> filter_copy = filter->Copy();
77940 ConvertKnownColRefToConstants(context, filter_copy, known_values, table_index);
77941 // Evaluate the filter, if it can be evaluated here, we can not prune this filter
77942 Value result_value;
77943
77944 if (!filter_copy->IsScalar() || !filter_copy->IsFoldable() ||
77945 !ExpressionExecutor::TryEvaluateScalar(context, *filter_copy, result_value)) {
77946 // can not be evaluated only with the filename/hive columns added, we can not prune this filter
77947 if (!have_preserved_filter[j]) {
77948 pruned_filters.emplace_back(filter->Copy());
77949 have_preserved_filter[j] = true;
77950 }
77951 } else if (result_value.IsNull() || !result_value.GetValue<bool>()) {
77952 // filter evaluates to false
77953 should_prune_file = true;
77954 // convert the filter to a table filter.
77955 if (filters_applied_to_files.find(j) == filters_applied_to_files.end()) {
77956 info.extra_info.file_filters += filter->ToString();
77957 filters_applied_to_files.insert(j);
77958 }
77959 }
77960 }
77961
77962 if (!should_prune_file) {
77963 pruned_files.push_back(file);
77964 }
77965 }
77966
77967 D_ASSERT(filters.size() >= pruned_filters.size());
77968
77969 info.extra_info.total_files = files.size();
77970 info.extra_info.filtered_files = pruned_files.size();
77971
77972 filters = std::move(pruned_filters);
77973 files = std::move(pruned_files);
77974}
static DUCKDB_API bool TryEvaluateScalar(ClientContext &context, const Expression &expr, Value &result)
Try to evaluate a scalar expression and fold it into a single value, returns false if an exception is...
void filter(InputArray image, InputArray kernel, OutputArray output)
Here is the call graph for this function:

◆ GetValue()

Value duckdb::HivePartitioning::GetValue ( ClientContext context,
const string &  key,
const string &  value,
const LogicalType type 
)
static
77893 {
77894 // Handle nulls
77895 if (IsNull(str_val)) {
77896 return Value(type);
77897 }
77898 if (type.id() == LogicalTypeId::VARCHAR) {
77899 // for string values we can directly return the type
77900 return Value(Unescape(str_val));
77901 }
77902 if (str_val.empty()) {
77903 // empty strings are NULL for non-string types
77904 return Value(type);
77905 }
77906
77907 // cast to the target type
77908 Value value(Unescape(str_val));
77909 if (!value.TryCastAs(context, type)) {
77910 throw InvalidInputException("Unable to cast '%s' (from hive partition column '%s') to: '%s'", value.ToString(),
77911 StringUtil::Upper(key), type.ToString());
77912 }
77913 return value;
77914}
static DUCKDB_API string Unescape(const string &input)
Unescape a hive partition key or value encoded using URL encoding.
Definition duckdb.cpp:77849
static DUCKDB_API bool IsNull(const string &input)
Whether the column is "NULL"/"__HIVE_DEFAULT_PARTITION".
Definition duckdb.cpp:77853
static DUCKDB_API string Upper(const string &str)
Convert a string to UPPERCASE.

◆ Escape()

string duckdb::HivePartitioning::Escape ( const string &  input)
static

Escape a hive partition key or value using URL encoding.

77845 {
77846 return StringUtil::URLEncode(input);
77847}
static DUCKDB_API string URLEncode(const string &str, bool encode_slash=true)
Encode special URL characters in a string.
Here is the call graph for this function:

◆ Unescape()

string duckdb::HivePartitioning::Unescape ( const string &  input)
static

Unescape a hive partition key or value encoded using URL encoding.

77849 {
77850 return StringUtil::URLDecode(input);
77851}
static DUCKDB_API string URLDecode(const string &str, bool plus_to_space=false)
Decode URL escape sequences (e.g. %20) in a string.
Here is the call graph for this function:

◆ IsNull()

bool duckdb::HivePartitioning::IsNull ( const string &  input)
static

Whether the column is "NULL"/"__HIVE_DEFAULT_PARTITION".

77853 {
77854 return StringUtil::CIEquals(input, "NULL") || input == "__HIVE_DEFAULT_PARTITION__";
77855}
static DUCKDB_API bool CIEquals(const string &l1, const string &l2)
Case insensitive equals.
Here is the call graph for this function:

The documentation for this class was generated from the following files: