String
Usage
1auto str = String{u8"text"}; // unchecked, assuming valid UTF-8
2// ...
3std::cout << str.toCharString();
1void processData(std::string_view view) {
2 auto str = String::fromCharString(view); // checked, throws on encoding errors
3 // ...
4}
Interface
-
class String
Thin wrapper around
std::u8string
.The class mirrors the API of
std::u8string
closely and adds a few convenience functions. It is primarily intended to ease the integration of the parser into applications that usestd::string
for text processing.- Tested:
StringTest
,StringUtf8Test
String Conversion
-
inline std::string toCharString() const noexcept
Convert the wrapped string into a
char
basedstd::string
.This helper performs the required conversion from
char8_t
tochar
and is primarily meant for interoperability with APIs that expect a regularstd::string
.- Returns:
A
char
based string.
Public Types
-
using ConstByteSpan = std::span<const std::byte>
A span of bytes referencing the underlying data of the string.
Public Functions
-
template<std::size_t N>
inline constexpr String(const char8_t (&literal)[N]) noexcept Construct from a UTF-8 string literal.
- Template Parameters:
N – The length of the literal including the null terminator.
- Parameters:
literal – The UTF-8 literal to copy.
-
inline explicit constexpr String(const char8_t *str, std::size_t size) noexcept
Construct from a UTF-8 character pointer and size.
- Parameters:
str – Pointer to UTF-8 characters.
size – Number of characters to read.
-
inline explicit constexpr String(const std::u8string_view str) noexcept
Construct from a UTF-8 string view.
- Parameters:
str – The UTF-8 string view to copy.
-
inline String(const char *str) noexcept
Construct from a null-terminated narrow string.
- Parameters:
str – The null-terminated narrow character string.
-
inline String(const char *str, std::size_t size) noexcept
Construct from a narrow character pointer and size.
- Parameters:
str – Pointer to narrow characters.
size – Number of characters to read.
-
inline explicit constexpr String(const WrappedString &str) noexcept
Copy construct from the underlying UTF-8 string.
- Parameters:
str – The string to wrap.
-
inline constexpr String(WrappedString &&str) noexcept
Move construct from the underlying UTF-8 string.
- Parameters:
str – The string to move from.
-
inline constexpr String(std::size_t count, value_type c) noexcept
Construct a string with a repeated character.
- Parameters:
count – Number of characters.
c – The character to repeat.
-
template<typename InputIt>
inline constexpr String(InputIt begin, InputIt end) noexcept Construct a string from a character range.
- Template Parameters:
InputIt – Input iterator type.
- Parameters:
begin – Iterator to the first character.
end – Iterator to one-past-last character.
-
template<std::size_t N>
inline constexpr String(const char (&literal)[N]) noexcept Construct from a narrow literal string.
- Template Parameters:
N – The length of the literal including the null terminator.
- Parameters:
literal – The narrow literal string.
-
inline explicit String(const std::string &str) noexcept
Construct from a standard narrow string.
- Parameters:
str – The std::string to convert.
-
inline constexpr String(std::size_t count, char c) noexcept
Construct a string with a repeated narrow character.
- Parameters:
count – Number of characters.
c – The character to repeat.
-
String() = default
Default constructor.
-
~String() = default
Default destructor.
-
inline String operator+(const String &other) const noexcept
Concatenate two strings.
Concatenate two String objects.
-
inline String &operator+=(const String &other) noexcept
Append another string to this string.
- Parameters:
other – The string to append.
- Returns:
Reference to this string.
-
inline String &operator+=(value_type c) noexcept
Append a character to this String.
- Parameters:
c – The character to append.
- Returns:
Reference to this String.
-
template<std::size_t N>
inline bool operator==(const char8_t (&literal)[N]) const noexcept Compare this String to a UTF-8 literal for equality.
- Template Parameters:
N – The size of the literal including null terminator.
- Parameters:
literal – The UTF-8 literal to compare against.
- Returns:
true
if the literal matches exactly.
-
template<std::size_t N>
inline bool operator!=(const char8_t (&literal)[N]) const noexcept Compare this String to a UTF-8 literal for inequality.
- Template Parameters:
N – The size of the literal including null terminator.
- Parameters:
literal – The UTF-8 literal to compare against.
- Returns:
true
if the literal does not match.
-
template<std::size_t N>
inline String operator+(const char8_t (&literal)[N]) const noexcept Concatenate a UTF-8 literal to this String.
- Template Parameters:
N – The size of the literal including null terminator.
- Parameters:
literal – The UTF-8 literal to append.
- Returns:
A new String with the literal appended.
-
inline String operator+(const std::u8string &other) const noexcept
Concatenate a std::u8string to this String.
- Parameters:
other – The u8string to append.
- Returns:
A new String with the contents appended.
-
template<std::size_t N>
inline String &operator+=(const char8_t (&literal)[N]) noexcept Append a UTF-8 literal to this String.
- Template Parameters:
N – The size of the literal including null terminator.
- Parameters:
literal – The UTF-8 literal to append.
- Returns:
Reference to this String.
-
inline String &operator+=(const std::u8string &other) noexcept
Append a std::u8string to this String.
- Parameters:
other – The u8string to append.
- Returns:
Reference to this String.
-
template<std::size_t N>
inline bool operator==(const char (&literal)[N]) const noexcept Compare this String to a narrow literal for equality.
- Template Parameters:
N – The size of the literal including null terminator.
- Parameters:
literal – The narrow literal to compare.
- Returns:
true
if the literal matches exactly.
-
template<std::size_t N>
inline bool operator!=(const char (&literal)[N]) const noexcept Compare this String to a narrow literal for inequality.
- Template Parameters:
N – The size of the literal including null terminator.
- Parameters:
literal – The narrow literal to compare.
- Returns:
true
if the literal does not match.
-
template<std::size_t N>
inline String operator+(const char (&literal)[N]) const noexcept Concatenate a narrow literal to this String.
- Template Parameters:
N – The size of the literal including null terminator.
- Parameters:
literal – The narrow literal to append.
- Returns:
A new String with the literal appended.
-
inline String operator+(const std::string &other) const noexcept
Concatenate a std::string to this String.
- Parameters:
other – The std::string to append.
- Returns:
A new String with the contents appended.
-
template<std::size_t N>
inline String &operator+=(const char (&literal)[N]) noexcept Append a narrow literal to this String.
- Template Parameters:
N – The size of the literal including null terminator.
- Parameters:
literal – The narrow literal to append.
- Returns:
Reference to this String.
-
inline String &operator+=(const std::string &other) noexcept
Append a std::string to this String.
- Parameters:
other – The std::string to append.
- Returns:
Reference to this String.
-
inline String &operator+=(const impl::Char unicodeChar) noexcept
Append a single Unicode character to this String.
- Parameters:
unicodeChar – The character to append.
- Returns:
Reference to this String.
-
inline constexpr size_type length() const noexcept
Get the number of characters in this String.
- Returns:
The length of the string.
-
inline constexpr size_type max_size() const noexcept
Get the maximum number of characters this String can hold.
- Returns:
The maximum possible size.
-
inline void reserve(size_type size) noexcept
Reserve storage to at least the specified capacity.
- Parameters:
size – The minimum capacity to reserve.
-
inline void shrink_to_fit() noexcept
Reduce memory usage to fit the current size.
-
inline constexpr size_type capacity() const noexcept
Get the current capacity of the String.
- Returns:
The allocated storage size.
-
inline void append(const String &other) noexcept
Append another String to this one.
- Parameters:
other – The String to append.
-
inline void append(const value_type character) noexcept
Append a character to this String.
- Parameters:
character – The character to append.
-
template<std::size_t N>
inline void append(const char8_t (&literal)[N]) noexcept Append a UTF-8 literal to this String.
- Template Parameters:
N – The literal length including null terminator.
- Parameters:
literal – The UTF-8 literal to append.
-
inline void append(const std::u8string &str) noexcept
Append a std::u8string to this String.
- Parameters:
str – The u8string to append.
-
inline void append(const std::u8string_view str) noexcept
Append a UTF-8 string view to this String.
- Parameters:
str – The u8string_view to append.
-
inline void append(const std::string &str) noexcept
Append a std::string to this String.
- Parameters:
str – The std::string to append.
-
inline void append(const std::string_view str) noexcept
Append a std::string_view to this String.
- Parameters:
str – The string_view to append.
-
inline void append(const impl::Char unicodeChar) noexcept
Append a Unicode character to this String.
- Parameters:
unicodeChar – The unicode character to append.
-
inline String substr(size_type pos = 0, size_type count = npos) const
Extract a substring from this String.
- Parameters:
pos – The starting index.
count – The number of characters.
- Returns:
The extracted substring.
-
inline String &erase(size_type index = 0, size_type count = npos) noexcept
Erase a substring from the string.
- Parameters:
index – The starting index to begin erasure.
count – The number of characters to erase.
- Returns:
Reference to this string after erasure.
-
inline iterator erase(iterator position) noexcept
Erase the character at the specified position.
- Parameters:
position – Iterator to the character to remove.
- Returns:
Iterator following the removed character.
-
inline iterator erase(const_iterator position) noexcept
Erase the character at the specified position.
- Parameters:
position – Iterator to the character to remove.
- Returns:
Iterator following the removed character.
-
inline iterator erase(iterator first, iterator last) noexcept
Erase a range of characters from the string.
- Parameters:
first – Iterator to the first character to remove.
last – Iterator past the last character to remove.
- Returns:
Iterator following the last removed character.
-
inline iterator erase(const_iterator first, const_iterator last) noexcept
Erase a range of characters from the string.
- Parameters:
first – Iterator to the first character to remove.
last – Iterator past the last character to remove.
- Returns:
Iterator following the last removed character.
-
template<typename FindStr>
inline size_type find(FindStr s, size_type pos, size_type count) const Find the first occurrence of a substring in the string.
- Template Parameters:
FindStr – Type of the search string.
- Parameters:
s – The substring to search for.
pos – The starting position of the search.
count – The number of characters of the substring.
- Returns:
The index of the first occurrence, or npos if not found.
-
template<typename FindStr>
inline size_type find(FindStr s, size_type pos = 0) const This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
-
template<typename FindStr>
inline size_type rfind(FindStr s, size_type pos, size_type count) const Find the last occurrence of a substring in the string.
- Template Parameters:
FindStr – Type of the search string.
- Parameters:
s – The substring to search for.
pos – The starting position of the search.
count – The number of characters of the substring.
- Returns:
The index of the last occurrence, or npos if not found.
-
template<typename FindStr>
inline size_type rfind(FindStr s, size_type pos = npos) const This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
-
template<typename FindStr>
inline size_type find_first_of(FindStr s, size_type pos, size_type count) const Find the first occurrence of any character from a set.
- Template Parameters:
FindStr – Type of the search set string.
- Parameters:
s – The set of characters to search for.
pos – The starting position of the search.
count – The number of characters in the set.
- Returns:
The index of the first matching character, or npos if not found.
-
template<typename FindStr>
inline size_type find_first_of(FindStr s, size_type pos = 0) const This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
-
template<typename FindStr>
inline size_type find_first_not_of(FindStr s, size_type pos, size_type count) const Find the first character not in a set.
- Template Parameters:
FindStr – Type of the search set string.
- Parameters:
s – The set of characters to exclude.
pos – The starting position of the search.
count – The number of characters in the set.
- Returns:
The index of the first non-matching character, or npos if none.
-
template<typename FindStr>
inline size_type find_first_not_of(FindStr s, size_type pos = 0) const This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
-
template<typename FindStr>
inline size_type find_last_of(FindStr s, size_type pos, size_type count) const Find the last occurrence of any character from a set.
- Template Parameters:
FindStr – Type of the search set string.
- Parameters:
s – The set of characters to search for.
pos – The starting position of the search.
count – The number of characters in the set.
- Returns:
The index of the last matching character, or npos if not found.
-
template<typename FindStr>
inline size_type find_last_of(FindStr s, size_type pos = npos) const This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
-
template<typename FindStr>
inline size_type find_last_not_of(FindStr s, size_type pos, size_type count) const Find the last character not in a set.
- Template Parameters:
FindStr – Type of the search set string.
- Parameters:
s – The set of characters to exclude.
pos – The starting position of the search.
count – The number of characters in the set.
- Returns:
The index of the last non-matching character, or npos if none.
-
template<typename FindStr>
inline size_type find_last_not_of(FindStr s, size_type pos = npos) const This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
-
template<typename FindStr>
inline bool starts_with(FindStr s) const noexcept Check if the string starts with a given prefix.
- Template Parameters:
FindStr – Type of the prefix string.
- Parameters:
s – The prefix to check.
- Returns:
true
if the string starts with the prefix,false
otherwise.
-
template<typename FindStr>
inline bool ends_with(FindStr s) const noexcept Check if the string ends with a given suffix.
- Template Parameters:
FindStr – Type of the suffix string.
- Parameters:
s – The suffix to check.
- Returns:
true
if the string ends with the suffix,false
otherwise.
-
template<typename FindStr>
inline bool contains(FindStr s) const noexcept Check if the string contains a given substring.
- Template Parameters:
FindStr – Type of the search string.
- Parameters:
s – The substring to search for.
- Returns:
true
if the substring is found,false
otherwise.
-
std::size_t escapedSize(EscapeMode mode) const noexcept
Get the byte size of the escaped string.
Use this function to calculate the size requirements of an escaped string, without the actual conversion.
- Returns:
The byte size of the escaped text (without trailing zero end byte).
-
String toEscaped(EscapeMode mode) const noexcept
Create an escaped version of this string.
- Parameters:
mode – The escape mode to use for escaping.
Public Static Attributes
-
static constexpr auto npos = WrappedString::npos
Constant representing an invalid or not-found position.
-
enum class erbsland::conf::EscapeMode : uint8_t
Escaping modes.
- Not Tested:
Tested via
Char
andString
.
Values:
-
enumerator Text
Escaping for double-quoted text.
See reference documentation, chapter Text. Even allowed, the tab character is escaped as well. - Escape characters U+0000-U+001F, <code>\\</code>, <code>\"</code>, U+007F - Use short formats for <code>\\\\</code>, <code>\\"</code>, <code>\\n</code>, <code>\\r</code>, <code>\\t</code>. - Everything else as <code>\\u{x}</code>.
-
enumerator FullTextName
Full text name escaping.
See reference documentation, chapter "Parser-Specific Usage of Text Names". Also mentioned in the specification for test adapters. - Escape characters U+0000-U+001F, <code>\\</code>, <code>\"</code>, <code>.</code>, <code>=</code>, U+007F-... - Escape all characters in <code>\\u{X}</code> format.
-
enumerator FullTestAdapter
Full test adapter escaping.
-
enumerator ErrorText
Escape for error output and log messages.
- Escapes all Unicode code points that may disrupt the display or have unexpected side effects. - Escapes all control codes. - Escapes backslash and double-quote. - Use short formats for <code>\\\\</code>, <code>\\"</code>, <code>\\n</code>, <code>\\r</code>, <code>\\t</code>. - Everything else as <code>\\u{x}</code>.