Autonomy Software C++ 24.5.1
Welcome to the Autonomy Software repository of the Mars Rover Design Team (MRDT) at Missouri University of Science and Technology (Missouri S&T)! API reference contains the source code and other resources for the development of the autonomy software for our Mars rover. The Autonomy Software project aims to compete in the University Rover Challenge (URC) by demonstrating advanced autonomous capabilities and robust navigation algorithms.
Loading...
Searching...
No Matches
duckdb::StringUtil Class Reference

#include <duckdb.hpp>

Classes

struct  EnumStringLiteral
 

Static Public Member Functions

static string GenerateRandomName (idx_t length=16)
 
static uint8_t GetHexValue (char c)
 
static uint8_t GetBinaryValue (char c)
 
static bool CharacterIsSpace (char c)
 
static bool CharacterIsNewline (char c)
 
static bool CharacterIsDigit (char c)
 
static bool CharacterIsHex (char c)
 
static char CharacterToUpper (char c)
 
static char CharacterToLower (char c)
 
static bool CharacterIsAlpha (char c)
 
static bool CharacterIsAlphaNumeric (char c)
 
static bool CharacterIsOperator (char c)
 
template<class TO >
static vector< TO > ConvertStrings (const vector< string > &strings)
 
static vector< SQLIdentifierConvertToSQLIdentifiers (const vector< string > &strings)
 
static vector< SQLStringConvertToSQLStrings (const vector< string > &strings)
 
static DUCKDB_API bool Contains (const string &haystack, const string &needle)
 Returns true if the needle string exists in the haystack.
 
static DUCKDB_API bool Contains (const string &haystack, const char &needle_char)
 
static DUCKDB_API optional_idx Find (const string &haystack, const string &needle)
 Returns the position of needle string within the haystack.
 
static DUCKDB_API bool StartsWith (string str, string prefix)
 Returns true if the target string starts with the given prefix.
 
static DUCKDB_API bool EndsWith (const string &str, const string &suffix)
 Returns true if the target string ends with the given suffix.
 
static DUCKDB_API string Repeat (const string &str, const idx_t n)
 Repeat a string multiple times.
 
static DUCKDB_API vector< string > Split (const string &str, char delimiter)
 Split the input string based on newline char.
 
static DUCKDB_API vector< string > SplitWithParentheses (const string &str, char delimiter=',', char par_open='(', char par_close=')')
 Split the input string, ignore delimiters within parentheses. Note: leading/trailing spaces are NOT stripped.
 
static DUCKDB_API vector< string > SplitWithQuote (const string &str, char delimiter=',', char quote='"')
 Split the input string allong a quote. Note that any escaping is NOT supported.
 
static DUCKDB_API string Join (const vector< string > &input, const string &separator)
 Join multiple strings into one string. Components are concatenated by the given separator.
 
static DUCKDB_API string Join (const set< string > &input, const string &separator)
 
static DUCKDB_API string URLEncode (const string &str, bool encode_slash=true)
 Encode special URL characters in a string.
 
static DUCKDB_API idx_t URLEncodeSize (const char *input, idx_t input_size, bool encode_slash=true)
 
static DUCKDB_API void URLEncodeBuffer (const char *input, idx_t input_size, char *output, bool encode_slash=true)
 
static DUCKDB_API string URLDecode (const string &str, bool plus_to_space=false)
 Decode URL escape sequences (e.g. %20) in a string.
 
static DUCKDB_API idx_t URLDecodeSize (const char *input, idx_t input_size, bool plus_to_space=false)
 
static DUCKDB_API void URLDecodeBuffer (const char *input, idx_t input_size, char *output, bool plus_to_space=false)
 
static DUCKDB_API void SkipBOM (const char *buffer_ptr, const idx_t &buffer_size, idx_t &buffer_pos)
 BOM skipping (https://en.wikipedia.org/wiki/Byte_order_mark)
 
static DUCKDB_API idx_t ToUnsigned (const string &str)
 
static DUCKDB_API int64_t ToSigned (const string &str)
 
static DUCKDB_API double ToDouble (const string &str)
 
template<class T >
static string ToString (const vector< T > &input, const string &separator)
 
template<typename C , typename S , typename FUNC >
static string Join (const C &input, S count, const string &separator, FUNC f)
 
static DUCKDB_API string BytesToHumanReadableString (idx_t bytes, idx_t multiplier=1024)
 Return a string that formats the give number of bytes.
 
static DUCKDB_API string TryParseFormattedBytes (const string &arg, idx_t &result)
 
static DUCKDB_API idx_t ParseFormattedBytes (const string &arg)
 
static DUCKDB_API string Upper (const string &str)
 Convert a string to UPPERCASE.
 
static DUCKDB_API string Lower (const string &str)
 Convert a string to lowercase.
 
static DUCKDB_API string Title (const string &str)
 Convert a string to Title Case.
 
static DUCKDB_API bool IsLower (const string &str)
 
static DUCKDB_API bool IsUpper (const string &str)
 
static DUCKDB_API uint64_t CIHash (const string &str)
 Case insensitive hash.
 
static DUCKDB_API uint64_t CIHash (const char *str, idx_t size)
 
static DUCKDB_API bool CIEquals (const string &l1, const string &l2)
 Case insensitive equals.
 
static DUCKDB_API bool CIEquals (const char *l1, idx_t l1_size, const char *l2, idx_t l2_size)
 Case insensitive equals (null-terminated strings)
 
static DUCKDB_API bool CIStartsWith (const string &str, const string &prefix)
 Case insensitive starts-with.
 
static DUCKDB_API bool CILessThan (const string &l1, const string &l2)
 Case insensitive compare.
 
static DUCKDB_API idx_t CIFind (vector< string > &vec, const string &str)
 Case insensitive find, returns DConstants::INVALID_INDEX if not found.
 
template<typename... ARGS>
static string Format (const string fmt_str, ARGS... params)
 Format a string using printf semantics.
 
static DUCKDB_API vector< string > Split (const string &input, const string &split)
 Split the input string into a vector of strings based on the split string.
 
static DUCKDB_API void LTrim (string &str)
 Remove the whitespace char in the left end of the string.
 
static DUCKDB_API void RTrim (string &str)
 Remove the whitespace char in the right end of the string.
 
static DUCKDB_API void RTrim (string &str, const string &chars_to_trim)
 Remove the all chars from chars_to_trim char in the right end of the string.
 
static DUCKDB_API void Trim (string &str)
 Remove the whitespace char in the left and right end of the string.
 
static DUCKDB_API string Replace (string source, const string &from, const string &to)
 
static DUCKDB_API idx_t LevenshteinDistance (const string &s1, const string &s2, idx_t not_equal_penalty=1)
 
static DUCKDB_API idx_t SimilarityScore (const string &s1, const string &s2)
 Returns the similarity score between two strings (edit distance metric - lower is more similar)
 
static DUCKDB_API double SimilarityRating (const string &s1, const string &s2)
 Returns a normalized similarity rating between 0.0 - 1.0 (higher is more similar)
 
static DUCKDB_API vector< string > TopNStrings (vector< pair< string, double > > scores, idx_t n=5, double threshold=0.5)
 
static DUCKDB_API vector< string > TopNStrings (const vector< pair< string, idx_t > > &scores, idx_t n=5, idx_t threshold=5)
 
static DUCKDB_API vector< string > TopNLevenshtein (const vector< string > &strings, const string &target, idx_t n=5, idx_t threshold=5)
 
static DUCKDB_API vector< string > TopNJaroWinkler (const vector< string > &strings, const string &target, idx_t n=5, double threshold=0.5)
 
static DUCKDB_API string CandidatesMessage (const vector< string > &candidates, const string &candidate="Candidate bindings")
 
static DUCKDB_API string CandidatesErrorMessage (const vector< string > &strings, const string &target, const string &message_prefix, idx_t n=5)
 
static bool Equals (const char *s1, const char *s2)
 
static bool Equals (const string &s1, const char *s2)
 
static bool Equals (const char *s1, const string &s2)
 
static bool Equals (const string &s1, const string &s2)
 
static bool Equals (const string_t &s1, const char *s2)
 
static bool Equals (const char *s1, const string_t &s2)
 
static DUCKDB_API unique_ptr< ComplexJSONParseJSONMap (const string &json, bool ignore_errors=false)
 
static DUCKDB_API string ExceptionToJSONMap (ExceptionType type, const string &message, const unordered_map< string, string > &map)
 
static DUCKDB_API string ToJSONMap (const unordered_map< string, string > &map)
 Transforms an unordered map to a JSON string.
 
static DUCKDB_API string ToComplexJSONMap (const ComplexJSON &complex_json)
 Transforms an complex JSON to a JSON string.
 
static DUCKDB_API string ValidateJSON (const char *data, const idx_t &len)
 
static DUCKDB_API string GetFileName (const string &file_path)
 
static DUCKDB_API string GetFileExtension (const string &file_name)
 
static DUCKDB_API string GetFileStem (const string &file_name)
 
static DUCKDB_API string GetFilePath (const string &file_path)
 
static DUCKDB_API uint32_t StringToEnum (const EnumStringLiteral enum_list[], idx_t enum_count, const char *enum_name, const char *str_value)
 
static DUCKDB_API const charEnumToString (const EnumStringLiteral enum_list[], idx_t enum_count, const char *enum_name, uint32_t enum_value)
 

Static Public Attributes

static DUCKDB_API const uint8_t ASCII_TO_LOWER_MAP []
 
static DUCKDB_API const uint8_t ASCII_TO_UPPER_MAP []
 

Detailed Description

String Utility Functions Note that these are not the most efficient implementations (i.e., they copy memory) and therefore they should only be used for debug messages and other such things.

Member Function Documentation

◆ GetHexValue()

static uint8_t duckdb::StringUtil::GetHexValue ( char  c)
inlinestatic
3800 {
3801 if (c >= '0' && c <= '9') {
3802 return UnsafeNumericCast<uint8_t>(c - '0');
3803 }
3804 if (c >= 'a' && c <= 'f') {
3805 return UnsafeNumericCast<uint8_t>(c - 'a' + 10);
3806 }
3807 if (c >= 'A' && c <= 'F') {
3808 return UnsafeNumericCast<uint8_t>(c - 'A' + 10);
3809 }
3810 throw InvalidInputException("Invalid input for hex digit: %s", string(1, c));
3811 }

◆ GetBinaryValue()

static uint8_t duckdb::StringUtil::GetBinaryValue ( char  c)
inlinestatic
3812 {
3813 if (c >= '0' && c <= '1') {
3814 return UnsafeNumericCast<uint8_t>(c - '0');
3815 }
3816 throw InvalidInputException("Invalid input for binary digit: %s", string(1, c));
3817 }

◆ CharacterIsSpace()

static bool duckdb::StringUtil::CharacterIsSpace ( char  c)
inlinestatic
3819 {
3820 return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
3821 }

◆ CharacterIsNewline()

static bool duckdb::StringUtil::CharacterIsNewline ( char  c)
inlinestatic
3822 {
3823 return c == '\n' || c == '\r';
3824 }

◆ CharacterIsDigit()

static bool duckdb::StringUtil::CharacterIsDigit ( char  c)
inlinestatic
3825 {
3826 return c >= '0' && c <= '9';
3827 }

◆ CharacterIsHex()

static bool duckdb::StringUtil::CharacterIsHex ( char  c)
inlinestatic
3828 {
3829 return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
3830 }

◆ CharacterToUpper()

static char duckdb::StringUtil::CharacterToUpper ( char  c)
inlinestatic
3831 {
3832 if (c >= 'a' && c <= 'z') {
3833 return UnsafeNumericCast<char>(c - ('a' - 'A'));
3834 }
3835 return c;
3836 }

◆ CharacterToLower()

static char duckdb::StringUtil::CharacterToLower ( char  c)
inlinestatic
3837 {
3838 if (c >= 'A' && c <= 'Z') {
3839 return UnsafeNumericCast<char>(c + ('a' - 'A'));
3840 }
3841 return c;
3842 }

◆ CharacterIsAlpha()

static bool duckdb::StringUtil::CharacterIsAlpha ( char  c)
inlinestatic
3843 {
3844 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
3845 }

◆ CharacterIsAlphaNumeric()

static bool duckdb::StringUtil::CharacterIsAlphaNumeric ( char  c)
inlinestatic
3846 {
3847 return CharacterIsAlpha(c) || CharacterIsDigit(c);
3848 }

◆ CharacterIsOperator()

static bool duckdb::StringUtil::CharacterIsOperator ( char  c)
inlinestatic
3849 {
3850 if (c == '_') {
3851 return false;
3852 }
3853 if (c >= '!' && c <= '/') {
3854 return true;
3855 }
3856 if (c >= ':' && c <= '@') {
3857 return true;
3858 }
3859 if (c >= '[' && c <= '`') {
3860 return true;
3861 }
3862 if (c >= '{' && c <= '~') {
3863 return true;
3864 }
3865 return false;
3866 }

◆ ConvertStrings()

template<class TO >
static vector< TO > duckdb::StringUtil::ConvertStrings ( const vector< string > &  strings)
inlinestatic
3869 {
3870 vector<TO> result;
3871 for (auto &string : strings) {
3872 result.emplace_back(string);
3873 }
3874 return result;
3875 }

◆ ConvertToSQLIdentifiers()

static vector< SQLIdentifier > duckdb::StringUtil::ConvertToSQLIdentifiers ( const vector< string > &  strings)
inlinestatic
3877 {
3878 return ConvertStrings<SQLIdentifier>(strings);
3879 }

◆ ConvertToSQLStrings()

static vector< SQLString > duckdb::StringUtil::ConvertToSQLStrings ( const vector< string > &  strings)
inlinestatic
3881 {
3882 return ConvertStrings<SQLString>(strings);
3883 }

◆ ToString()

template<class T >
static string duckdb::StringUtil::ToString ( const vector< T > &  input,
const string &  separator 
)
inlinestatic
3933 {
3934 vector<string> input_list;
3935 for (auto &i : input) {
3936 input_list.push_back(i.ToString());
3937 }
3938 return StringUtil::Join(input_list, separator);
3939 }
static DUCKDB_API string Join(const vector< string > &input, const string &separator)
Join multiple strings into one string. Components are concatenated by the given separator.

◆ Join()

template<typename C , typename S , typename FUNC >
static string duckdb::StringUtil::Join ( const C &  input,
count,
const string &  separator,
FUNC  f 
)
inlinestatic

Join multiple items of container with given size, transformed to string using function, into one string using the given separator

3944 {
3945 // The result
3946 std::string result;
3947
3948 // If the input isn't empty, append the first element. We do this so we
3949 // don't need to introduce an if into the loop.
3950 if (count > 0) {
3951 result += f(input[0]);
3952 }
3953
3954 // Append the remaining input components, after the first
3955 for (size_t i = 1; i < count; i++) {
3956 result += separator + f(input[i]);
3957 }
3958
3959 return result;
3960 }

◆ Format()

template<typename... ARGS>
static string duckdb::StringUtil::Format ( const string  fmt_str,
ARGS...  params 
)
inlinestatic

Format a string using printf semantics.

4002 {
4003 return Exception::ConstructMessage(fmt_str, params...);
4004 }
PyParams params(const std::string &tag, const std::string &model, const std::string &weights, const std::string &device)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ LevenshteinDistance()

static DUCKDB_API idx_t duckdb::StringUtil::LevenshteinDistance ( const string &  s1,
const string &  s2,
idx_t  not_equal_penalty = 1 
)
static

Get the levenshtein distance from two strings The not_equal_penalty is the penalty given when two characters in a string are not equal The regular levenshtein distance has a not equal penalty of 1, which means changing a character is as expensive as adding or removing one For similarity searches we often want to give extra weight to changing a character For example: with an equal penalty of 1, "pg_am" is closer to "depdelay" than "depdelay_minutes" with an equal penalty of 3, "depdelay_minutes" is closer to "depdelay" than to "pg_am"

◆ TopNStrings() [1/2]

static DUCKDB_API vector< string > duckdb::StringUtil::TopNStrings ( vector< pair< string, double > >  scores,
idx_t  n = 5,
double  threshold = 0.5 
)
static

Get the top-n strings (sorted by the given score distance) from a set of scores. The scores should be normalized between 0.0 and 1.0, where 1.0 is the highest score At least one entry is returned (if there is one). Strings are only returned if they have a score higher than the threshold.

◆ TopNStrings() [2/2]

static DUCKDB_API vector< string > duckdb::StringUtil::TopNStrings ( const vector< pair< string, idx_t > > &  scores,
idx_t  n = 5,
idx_t  threshold = 5 
)
static

DEPRECATED: old TopNStrings method that uses the levenshtein distance metric instead of the normalized 0.0 - 1.0 rating

◆ TopNLevenshtein()

static DUCKDB_API vector< string > duckdb::StringUtil::TopNLevenshtein ( const vector< string > &  strings,
const string &  target,
idx_t  n = 5,
idx_t  threshold = 5 
)
static

Computes the levenshtein distance of each string in strings, and compares it to target, then returns TopNStrings with the given params.

◆ TopNJaroWinkler()

static DUCKDB_API vector< string > duckdb::StringUtil::TopNJaroWinkler ( const vector< string > &  strings,
const string &  target,
idx_t  n = 5,
double  threshold = 0.5 
)
static

Computes the jaro winkler distance of each string in strings, and compares it to target, then returns TopNStrings with the given params.

◆ CandidatesErrorMessage()

static DUCKDB_API string duckdb::StringUtil::CandidatesErrorMessage ( const vector< string > &  strings,
const string &  target,
const string &  message_prefix,
idx_t  n = 5 
)
static

Generate an error message in the form of "{message_prefix}: nearest_string, nearest_string2, ... Equivalent to calling TopNLevenshtein followed by CandidatesMessage

Here is the caller graph for this function:

◆ Equals() [1/4]

static bool duckdb::StringUtil::Equals ( const char s1,
const char s2 
)
inlinestatic

Returns true if two null-terminated strings are equal or point to the same address. Returns false if only one of the strings is nullptr

4060 {
4061 if (s1 == s2) {
4062 return true;
4063 }
4064 if (s1 == nullptr || s2 == nullptr) {
4065 return false;
4066 }
4067 return strcmp(s1, s2) == 0;
4068 }

◆ Equals() [2/4]

static bool duckdb::StringUtil::Equals ( const string &  s1,
const char s2 
)
inlinestatic
4069 {
4070 return Equals(s1.c_str(), s2);
4071 }
static bool Equals(const char *s1, const char *s2)
Definition duckdb.hpp:4060

◆ Equals() [3/4]

static bool duckdb::StringUtil::Equals ( const char s1,
const string &  s2 
)
inlinestatic
4072 {
4073 return Equals(s1, s2.c_str());
4074 }

◆ Equals() [4/4]

static bool duckdb::StringUtil::Equals ( const string &  s1,
const string &  s2 
)
inlinestatic
4075 {
4076 return s1 == s2;
4077 }

◆ ParseJSONMap()

static DUCKDB_API unique_ptr< ComplexJSON > duckdb::StringUtil::ParseJSONMap ( const string &  json,
bool  ignore_errors = false 
)
static

JSON method that parses a { string: value } JSON blob NOTE: this method is not efficient NOTE: this method is used in Exception construction - as such it does NOT throw on invalid JSON, instead an empty map is returned Parses complex (i.e., nested) Json maps, it also parses invalid JSONs, as a pure string.

Here is the caller graph for this function:

◆ ExceptionToJSONMap()

static DUCKDB_API string duckdb::StringUtil::ExceptionToJSONMap ( ExceptionType  type,
const string &  message,
const unordered_map< string, string > &  map 
)
static

JSON method that constructs a { string: value } JSON map This is the inverse of ParseJSONMap NOTE: this method is not efficient


The documentation for this class was generated from the following file: