Background
A string class is a container that stores and manipulates sequences of characters. The characters are stored contiguously in memory.
The book-keeping node for such a data-structure stores meta-information and is typically implemented as a triple (pointer-to T, size, capacity). sizeof(std::string) on a 64-bit machine is 24-bytes.
Recall, that C-style strings are just char[] arrays. We rely on the null termination character \0 to detect the end of the string. So,strlen() has running-time.
In contrast, our string class has an m_size field to track the size of the character sequence.
To maintain backward compatibility with C-style strings, the string class stores any piece of text e.g. Bjarne Stroustrup as Bjarne Stroustrup\0.
+=======================+
| dev:string |
+-----------------------+
| - char* m_data ---|-----------> Pointer to buffer on the Heap
| - size_t m_size |
| - size_t m_capacity |
+=======================+
In case of very small strings (smaller than 24-bytes), it might be optimal to store the string in the book-keeping node itself, so it is allocated on the instead of living on the heap. This is known as Small Buffer Optimization(SBO).
We will define two layouts : string_long and string_short, and the actual string type would be the union of these two layouts.
For long strings (length >= 24 bytes), the m_capacity will always be an integral multiple of 2. Hence, the LSB of Byte-24 for long strings is always 0.
For small strings upto length 23 bytes, we use the first 7-bits of Byte-24 to store the actual string length (this suffices, because ). We set the the last bit to 1 to indicate, this is a small string.
Based on the above suggestion, use the LSB(Least Significant Bit) of Byte 24 (last byte), as a bit-flag in the API to distinguish if we are in SBO-mode (short string) or otherwise (long string).
/**
* @brief: A string class
* The capacity is usually a multiple of 2, so the
* least significant bit of the m_capacity field
* is set to 0, to indicate string_long format. And
* if set to 1, it implies SSO.
*/
struct string_long{
char* m_buffer_ptr; // 8 bytes
std::size_t m_size; // 8 bytes
std::size_t m_capacity; // 8 bytes
};
struct string_short{
static constexpr size_t capacity{23uz};
char m_buffer[capacity]; // 23 bytes
unsigned char m_size; // 1 byte
};
class string{
union{
string_short s;
string_long l;
} m_data;
};Implementing a basic string class
#include <cassert>
#include <cstring>
#include <iostream>
#include <stdexcept>
#include <utility>
#include <cassert>
#include <cstring>
#include <gtest/gtest.h>
/**
* @brief: A string class
* The capacity is usually a multiple of 2, so the
* least significant bit of the m_capacity field
* is set to 0, to indicate string_long format. And
* if set to 1, it implies SSO.
*/
namespace dev {
struct string_long {
char *m_buffer_ptr; // 8 bytes
size_t m_size; // 8 bytes
size_t m_capacity; // 8 bytes
};
struct string_short {
static constexpr size_t capacity{23uz};
char m_buffer[capacity]; // 23 bytes
unsigned char m_size; // 1 byte
};
class string {
using iterator = char*;
using const_iterator = const char*;
private:
union {
string_short s;
string_long l;
} m_data;
static constexpr size_t string_short_mask = 0x01;
static constexpr size_t growth_factor = 2;
bool is_short_string() { return m_data.s.m_size & string_short_mask; }
bool is_short_string() const { return m_data.s.m_size & string_short_mask; }
bool is_long_string() { return !is_short_string(); }
bool is_long_string() const { return !is_short_string(); }
size_t get_short_size() { return m_data.s.m_size >> 1; }
size_t get_short_size() const { return m_data.s.m_size >> 1; }
void set_short_size(size_t size) { m_data.s.m_size = (size << 1) | string_short_mask; }
size_t get_long_size() { return m_data.l.m_size; }
size_t get_long_size() const { return m_data.l.m_size; }
void set_long_size(size_t size) { m_data.l.m_size = size; }
void set_size(size_t size){
if(is_short_string())
set_short_size(size);
else
set_long_size(size);
}
void set_sso_flag() { m_data.s.m_size |= string_short_mask; }
void turn_off_sso_flag() {
m_data.l.m_capacity &= size_t(~string_short_mask);
}
size_t get_long_capacity() { return m_data.l.m_capacity; }
size_t get_short_capacity() { return string_short::capacity; }
void set_long_capacity(size_t capacity) {
assert(!(capacity & 0x01)); // capacity in the long string format must be an
// integral multiple of 2
m_data.l.m_capacity = capacity;
}
char *allocate_helper(size_t new_capacity) {
assert(!(new_capacity & 0x01)); // new_capacity must be a multiple of 2
return static_cast<char *>(operator new(new_capacity));
}
void deallocate_helper(char *buffer_ptr) { operator delete(buffer_ptr); }
// helper function for allocating on the heap
void construct_slow_path(const char *chars_array, size_t len) {
size_t bytes_to_alloc =
((len + 1) & 0x01) ? len + 2 : len + 1; // An extra byte for '\0'
m_data.l.m_buffer_ptr = allocate_helper(bytes_to_alloc);
for(auto i{0uz}; i < len; ++i)
m_data.l.m_buffer_ptr[i] = chars_array[i];
m_data.l.m_buffer_ptr[len] = '\0';
set_long_size(len);
turn_off_sso_flag();
set_long_capacity(bytes_to_alloc);
}
// helper function for copying string to the buffer on the stack
void construct_fast_path(const char *chars_array, size_t len) {
for(auto i{0uz}; i < len; ++i)
m_data.s.m_buffer[i] = chars_array[i];
m_data.s.m_buffer[len] = '\0';
set_short_size(len);
set_sso_flag();
}
bool is_full() { return size() == capacity(); }
public:
// ---------------------------- constructors -------------------------
// default constructor
string(){
m_data.s.m_buffer[0] = '\0';
set_short_size(0);
set_sso_flag();
}
// from literal constructor
string(const char *chars_array) : string() {
size_t len = strlen(chars_array);
if (len > 0 && len < string_short::capacity) {
construct_fast_path(chars_array, len);
} else {
construct_slow_path(chars_array, len);
}
}
// copy constructor
string(const string &other){
if (other.is_short_string()) {
size_t len = other.size();
for(auto i{0uz}; i<len; ++i)
m_data.s.m_buffer[i] = other[i];
set_short_size(len);
m_data.s.m_buffer[len] = '\0';
} else {
m_data.l.m_buffer_ptr = allocate_helper(other.m_data.l.m_capacity);
size_t len = other.get_long_size();
for(auto i{0uz}; i<len; ++i)
m_data.l.m_buffer_ptr[i] = other[i];
m_data.l.m_size = len;
m_data.l.m_capacity = other.m_data.l.m_capacity;
m_data.l.m_buffer_ptr[len] = '\0';
}
}
// move constructor
string(string &&other) {
if (other.is_short_string()) {
size_t len = other.size();
for(auto i{0uz}; i<len; ++i)
m_data.s.m_buffer[i] = other[i];
set_short_size(len);
m_data.s.m_buffer[len] = '\0';
other.m_data.s.m_buffer[0] = '\0';
other.set_short_size(0);
} else {
m_data.l.m_buffer_ptr =
std::exchange(other.m_data.l.m_buffer_ptr, nullptr);
size_t len = other.get_long_size();
m_data.l.m_size = std::exchange(other.m_data.l.m_size, 0);
m_data.l.m_capacity = std::exchange(other.m_data.l.m_capacity, 0);
m_data.l.m_buffer_ptr[len] = '\0';
}
}
// swap
friend void swap(string &lhs, string &rhs) {
// The lhs and the rhs could have different active members.
// You cannot swap two unions with different active members.
// In such cases, you have to use a temporary.
auto temp = std::move(lhs.m_data);
lhs.m_data = std::move(rhs.m_data);
rhs.m_data = std::move(temp);
}
void swap(string &other) {
using std::swap;
swap(*this, other);
}
// copy assignment
string &operator=(const string &other) {
if(this == &other)
return *this;
string(other).swap(*this);
return *this;
}
// move assignment
string &operator=(string &&other) {
string(std::move(other)).swap(*this);
return *this;
}
// construct a string from an arbitrary range [first, last)
template<typename InputIt>
string(InputIt first, InputIt last) : string(){
for(auto it{first}; it!=last; ++it)
{
push_back(*it);
}
}
// construct a string from an initializer list
string(std::initializer_list<char> list)
: string{ list.begin(), list.end() }
{}
// constructs a string with count copies of the character ch
string(size_t count, char ch) : string(){
for(auto i{0uz}; i < count; ++i)
{
push_back(ch);
}
}
// destructor
~string() {
if (is_long_string()) {
deallocate_helper(m_data.l.m_buffer_ptr);
m_data.l.m_buffer_ptr = nullptr;
m_data.l.m_size = 0;
}
}
// ---------------------- capacity functions -------------------------
// size of the string
size_t size() {
if (is_short_string())
return get_short_size();
else
return get_long_size();
}
// const version
size_t size() const {
if (is_short_string())
return get_short_size();
else
return get_long_size();
}
// capacity
size_t capacity() {
if (is_short_string())
return get_short_capacity() - 1;
else
return get_long_capacity() - 1;
}
// checks if string is empty
bool empty() { return size() == 0; }
// allocate new storage
void reserve(size_t new_capacity) {
// All existing iterators and references will be invalidated.
size_t current_capacity = capacity();
size_t current_size = size();
if (new_capacity <= current_capacity ||
new_capacity < string_short::capacity)
return;
// new_capacity request must be atleast 23 bytes and must be larger than
// current_capacity.
// Book an extra byte for null termination character '\0'
++new_capacity;
// new_capacity must be an integral multiple of 2
new_capacity = !(new_capacity & 0x01) ? new_capacity : new_capacity + 1;
char* mem = allocate_helper(new_capacity);
char* buffer_ptr = data();
for(auto i{0uz}; i < current_size; ++i)
mem[i] = buffer_ptr[i];
if(is_long_string())
deallocate_helper(buffer_ptr);
m_data.l.m_buffer_ptr = mem;
m_data.l.m_capacity = new_capacity;
set_long_size(current_size);
turn_off_sso_flag();
}
// ---------------------------- Element Access ------------------------
// data() return the pointer to the first character of the string
char *data() {
return is_short_string() ? &m_data.s.m_buffer[0] : m_data.l.m_buffer_ptr;
}
// const version
const char *data() const {
return is_short_string() ? &m_data.s.m_buffer[0] : m_data.l.m_buffer_ptr;
}
// Array subscript operator[]
char &operator[](size_t pos) {
char *buffer_ptr = data();
return buffer_ptr[pos];
}
// Array subscript operator[]
const char &operator[](size_t pos) const{
const char *buffer_ptr = data();
return buffer_ptr[pos];
}
// indexed access with bounds checking
char &at(size_t pos) {
if (pos < 0 || pos > size())
throw std::out_of_range("Index out of bounds!");
char *buffer_ptr = data();
return buffer_ptr[pos];
}
// Returns a reference to the first character of the string
char &front() {
char *buffer_ptr = data();
return buffer_ptr[0];
}
// Returns a reference to the last character of the string
char &back() {
char *buffer_ptr = data();
return buffer_ptr[size() - 1];
}
// ---------------------------------- Iterators -----------------------------
char *begin() { return data(); }
const char *begin() const { return data(); }
char *end() { return data() + size(); }
const char *end() const { return data() + size(); }
// ------------------------------- Modifiers -----------------------------
// Appends the given character ch to the end of the string
void push_back(char ch) {
size_t current_size = size();
if (is_full()) {
size_t new_capacity = capacity() * growth_factor;
reserve(new_capacity);
}
char* buffer_ptr = data();
buffer_ptr[current_size] = ch;
buffer_ptr[current_size + 1] = '\0';
set_size(current_size + 1);
}
// removes a character from the end of the string
void pop_back(){
size_t new_size = size() - 1;
set_size(new_size);
data()[new_size] = '\0';
}
// inserts the characters from the range [first, last)
// before the element (if any) pointed to by pos
template<typename InputIt>
void insert(const_iterator pos, InputIt first, InputIt last){
auto pos_ = const_cast<iterator>(pos);
auto first_ = first;
auto last_ = last;
if(first < last){
size_t offset = std::distance(begin(), pos_);
size_t n = std::distance(first, last);
size_t num_elems_to_shift = std::distance(pos_, end());
size_t remaining_capacity = capacity() - size();
// handle self-referential insertion
string temp;
if(first_ >= begin() && first_ < end()
&& last_ > begin() && last_ <= end())
{
for(auto it{first_}; it!=last_; ++it)
temp.push_back(*it);
first_ = temp.begin();
last_ = temp.end();
}
if(n > remaining_capacity)
{
size_t excess_capacity_reqd = std::max(n - remaining_capacity, 0uz);
reserve(capacity() + excess_capacity_reqd);
pos_ = std::next(begin(), offset);
}
// objects to displace from [begin(), end()) sequence
if(n < num_elems_to_shift)
{
std::copy(end() - n, end(), end());
std::copy_backward(pos_, end() - n, end());
}
else{
std::copy(pos_, end(), pos_ + n);
}
std::copy(first_, last_, pos_);
set_size(size() + n);
char* buffer_ptr = data();
buffer_ptr[size()] = '\0';
}
}
// Removes the characters in the range [first, last)
iterator erase(const_iterator first, const_iterator last){
iterator first_ = const_cast<iterator>(first);
iterator last_ = const_cast<iterator>(last);
size_t offset = std::distance(begin(), first_);
size_t num_chars_to_shift_left = std::distance(last_, end());
last_ = std::min(last_, end());
string chars_to_remove(first_, last_);
size_t num_chars_erased = std::distance(first_, last_);
char* buffer_ptr = begin();
for(auto i{0uz}; i < num_chars_to_shift_left; ++i)
{
buffer_ptr[offset + i] = buffer_ptr[offset + i + num_chars_erased];
}
set_size(size() - num_chars_erased);
buffer_ptr[size()] = '\0';
for(auto k{0uz}; k < num_chars_erased; ++k){
buffer_ptr[size() + 1 + k] = chars_to_remove[k];
}
return buffer_ptr + offset;
}
};
} // namespace dev
// ----------------------------------------------------------------
// default constructor
// ----------------------------------------------------------------
TEST(DefaultConstructor, IsEmpty) {
dev::string s;
EXPECT_EQ(s.size(), 0);
EXPECT_TRUE(s.empty());
EXPECT_GT(s.capacity(), 0);
}
// ----------------------------------------------------------------
// from literal constructor
// ----------------------------------------------------------------
TEST(FromLiteralConstructor, ShortString) {
dev::string s("hello");
EXPECT_FALSE(s.empty());
EXPECT_EQ(s.size(), 5);
EXPECT_GT(s.capacity(), 0);
}
TEST(FromLiteralConstructor, LongString) {
dev::string s("C++ is a general-purpose programming language");
EXPECT_FALSE(s.empty());
EXPECT_EQ(s.size(), 45);
EXPECT_GT(s.capacity(), 0);
}
TEST(FromLiteralConstructor, EmbeddedNull) {
dev::string s("hello\0world");
EXPECT_FALSE(s.empty());
EXPECT_EQ(s.size(), 5);
EXPECT_GT(s.capacity(), 0);
}
// ----------------------------------------------------------------
// copy constructor
// ----------------------------------------------------------------
TEST(CopyConstructor, ShortString) {
dev::string s1("Short");
dev::string c1(s1);
EXPECT_EQ(c1.size(), 5);
}
TEST(CopyConstructor, LongString) {
dev::string s2("Long string for copying over the heap");
dev::string c2(s2);
EXPECT_EQ(c2.size(), s2.size());
}
TEST(CopyConstructor, EmbeddedNull) {
char data[] = {'a', '\0', 'b'};
dev::string s3(data, data + 3);
dev::string c3(s3);
EXPECT_EQ(c3.size(), 3);
EXPECT_EQ(c3[1], '\0');
}
// ----------------------------------------------------------------
// copy assignment
// ----------------------------------------------------------------
TEST(CopyAssignment, SSOToSSO) {
dev::string s1("A");
dev::string s2("B");
s1 = s2;
EXPECT_EQ(s1.size(), 1);
EXPECT_EQ(s1[0], 'B');
EXPECT_NE(s1.data(), s2.data());
}
TEST(CopyAssignment, LongToLong) {
dev::string s1("This is a very long string that currently lives on the heap.");
dev::string s2("And this is another long string, also on the heap, but different.");
const char* old_ptr = s1.data();
s1 = s2;
EXPECT_EQ(s1.size(), s2.size());
EXPECT_EQ(std::strcmp(s1.data(), s2.data()), 0);
EXPECT_NE(s1.data(), old_ptr);
}
TEST(CopyAssignment, LongToShort) {
dev::string s1("Long strings take up heap memory.");
dev::string s2("Short");
s1 = s2;
EXPECT_EQ(s1.size(), 5);
}
TEST(CopyAssignment, ShortToLong) {
dev::string s1("Short");
dev::string s2("Long strings take up heap memory.");
s1 = s2;
EXPECT_EQ(s1.size(), s2.size());
}
TEST(CopyAssignment, EmbeddedNull) {
char data[] = {'x', '\0', 'y', 'z'};
dev::string s1(data, data + 4);
dev::string s2("Placeholder");
s2 = s1;
EXPECT_EQ(s2.size(), 4);
EXPECT_EQ(s2[1], '\0');
EXPECT_EQ(s2[2], 'y');
}
TEST(CopyAssignment, SelfAssignment) {
dev::string s1("Self Assignment Test");
dev::string* ptr = &s1;
s1 = *ptr;
EXPECT_EQ(s1.size(), 20);
EXPECT_EQ(std::strcmp(s1.data(), "Self Assignment Test"), 0);
}
// ----------------------------------------------------------------
// move constructor
// ----------------------------------------------------------------
TEST(MoveConstructor, ShortSSO) {
dev::string source("Short SSO");
const char* original_data_addr = source.data();
dev::string destination(std::move(source));
EXPECT_EQ(destination.size(), 9);
EXPECT_EQ(std::strcmp(destination.data(), "Short SSO"), 0);
EXPECT_NE(destination.data(), original_data_addr);
EXPECT_EQ(source.size(), 0);
}
TEST(MoveConstructor, LongHeap) {
dev::string source("This is a very long string that lives on the heap.");
size_t original_size = source.size();
const char* original_heap_ptr = source.data();
dev::string destination(std::move(source));
EXPECT_EQ(destination.size(), original_size);
EXPECT_EQ(destination.data(), original_heap_ptr);
EXPECT_TRUE(source.data() == nullptr || source.size() == 0);
}
TEST(MoveConstructor, EmbeddedNull) {
char null_data[] = {'m', '\0', 'v', 'e'};
dev::string source(null_data, null_data + 4);
dev::string destination(std::move(source));
EXPECT_EQ(destination.size(), 4);
EXPECT_EQ(destination[1], '\0');
EXPECT_EQ(std::memcmp(destination.data(), null_data, 4), 0);
}
// ----------------------------------------------------------------
// move assignment
// ----------------------------------------------------------------
TEST(MoveAssignment, SSOToSSO) {
dev::string s1("Original");
dev::string s2("New");
s1 = std::move(s2);
EXPECT_EQ(s1.size(), 3);
EXPECT_EQ(std::strcmp(s1.data(), "New"), 0);
EXPECT_EQ(s2.size(), 0);
}
TEST(MoveAssignment, LongToLong) {
dev::string s1("This is a very long string currently occupying the heap memory.");
dev::string s2("Another extremely long string that we will move into the first one.");
const char* s2_heap_ptr = s2.data();
size_t s2_size = s2.size();
s1 = std::move(s2);
EXPECT_EQ(s1.size(), s2_size);
EXPECT_EQ(s1.data(), s2_heap_ptr);
EXPECT_TRUE(s2.data() == nullptr || s2.size() == 0);
}
TEST(MoveAssignment, LongToShort) {
dev::string s1("A long string that will be overwritten by a short moved string.");
dev::string s2("Short");
s1 = std::move(s2);
EXPECT_EQ(s1.size(), 5);
}
TEST(MoveAssignment, EmbeddedNull) {
char data[] = {'m', '\0', 'v', 'e'};
dev::string s1(data, data + 4);
dev::string s2("Temporary");
s2 = std::move(s1);
EXPECT_EQ(s2.size(), 4);
EXPECT_EQ(s2[1], '\0');
}
TEST(MoveAssignment, SelfMove) {
dev::string s1("Don't break me");
dev::string* ptr = &s1;
s1 = std::move(*ptr);
EXPECT_EQ(s1.size(), 14);
EXPECT_EQ(std::strcmp(s1.data(), "Don't break me"), 0);
}
// ----------------------------------------------------------------
// reserve
// ----------------------------------------------------------------
TEST(Reserve, ShortNoOp) {
dev::string s("A");
s.reserve(10);
EXPECT_EQ(s.capacity(), 22);
}
TEST(Reserve, LongGrowth) {
dev::string s("A");
s.reserve(50);
EXPECT_GE(s.capacity(), 50);
EXPECT_FALSE(s.empty());
EXPECT_EQ(s.size(), 1);
EXPECT_EQ(s[0], 'A');
}
TEST(Reserve, EmbeddedNull) {
char data[] = {'x', '\0', 'y'};
dev::string s(data, data + 3);
s.reserve(100);
EXPECT_EQ(s.size(), 3);
EXPECT_EQ(s[1], '\0');
}
// ----------------------------------------------------------------
// push_back
// ----------------------------------------------------------------
TEST(PushBack, ShortSSO) {
dev::string s;
s.push_back('A');
s.push_back('B');
s.push_back('C');
EXPECT_EQ(s.size(), 3);
EXPECT_EQ(s[0], 'A');
EXPECT_EQ(s[2], 'C');
EXPECT_EQ(s.data()[3], '\0');
}
TEST(PushBack, SSOToHeapTransition) {
dev::string s;
for (int i = 0; i < 23; ++i)
s.push_back('x');
EXPECT_EQ(s.size(), 23);
s.push_back('!');
EXPECT_EQ(s.size(), 24);
EXPECT_GT(s.capacity(), 23);
EXPECT_EQ(s[0], 'x');
EXPECT_EQ(s[23], '!');
EXPECT_EQ(s.data()[24], '\0');
}
TEST(PushBack, EmbeddedNull) {
dev::string s("Start");
s.push_back('\0');
s.push_back('Z');
EXPECT_EQ(s.size(), 7);
EXPECT_EQ(s[5], '\0');
EXPECT_EQ(s[6], 'Z');
EXPECT_EQ(s.data()[7], '\0');
}
// ----------------------------------------------------------------
// pop_back
// ----------------------------------------------------------------
TEST(PopBack, ShortSSO) {
dev::string s("ABC");
s.pop_back();
EXPECT_EQ(s.size(), 2);
EXPECT_EQ(s[0], 'A');
EXPECT_EQ(s[1], 'B');
EXPECT_EQ(s.data()[2], '\0');
}
TEST(PopBack, LongHeap) {
dev::string s("123456789012345678901234");
size_t original_size = s.size();
size_t original_capacity = s.capacity();
for (auto i{1uz}; i <= 23; ++i) {
s.pop_back();
EXPECT_EQ(s.size(), original_size - i);
EXPECT_EQ(s.capacity(), original_capacity);
EXPECT_EQ(s.data()[s.size()], '\0');
}
}
TEST(PopBack, EmbeddedNull) {
char null_data[] = {'a', 'b', '\0', 'c'};
dev::string s(null_data, null_data + 4);
s.pop_back();
EXPECT_EQ(s.size(), 3);
EXPECT_EQ(s.back(), '\0');
EXPECT_EQ(s.data()[3], '\0');
}
// ----------------------------------------------------------------
// insert
// ----------------------------------------------------------------
TEST(Insert, ShortSSOExternalRange) {
dev::string s("AC");
char b = 'B';
s.insert(s.begin() + 1, &b, &b + 1);
EXPECT_EQ(s.size(), 3);
EXPECT_EQ(std::strcmp(s.data(), "ABC"), 0);
}
TEST(Insert, LongHeapExternalRange) {
dev::string s("This is a long string.");
const char* extra = " Indeed!";
s.insert(s.end() - 1, extra, extra + std::strlen(extra));
EXPECT_EQ(s.size(), 22 + 8);
EXPECT_TRUE(std::strstr(s.data(), "Indeed!."));
}
TEST(Insert, EmbeddedNull) {
dev::string s("a");
char nulls[] = {'\0', 'b'};
s.insert(s.end(), nulls, nulls + 2);
EXPECT_EQ(s.size(), 3);
EXPECT_EQ(s[1], '\0');
EXPECT_EQ(s[2], 'b');
}
TEST(Insert, SSOToHeapTransition) {
dev::string s("Small");
dev::string long_val(50, 'z');
s.insert(s.begin() + 1, long_val.begin(), long_val.end());
EXPECT_EQ(s.size(), 55);
EXPECT_EQ(s[0], 'S');
for (auto i{1uz}; i <= 50; ++i)
EXPECT_EQ(s[i], 'z');
EXPECT_EQ(s[51], 'm');
EXPECT_EQ(s[52], 'a');
EXPECT_EQ(s[53], 'l');
EXPECT_EQ(s[54], 'l');
EXPECT_EQ(s[55], '\0');
}
TEST(Insert, SelfReferential) {
dev::string s("Hello");
s.insert(s.begin(), s.begin(), s.end());
EXPECT_EQ(s.size(), 10);
EXPECT_EQ(std::strcmp(s.data(), "HelloHello"), 0);
}
TEST(Insert, SelfReferentialWithReallocation) {
dev::string s("Trigger");
dev::string padding(20, '!');
s.insert(s.end(), padding.begin(), padding.end());
s.insert(s.begin(), s.begin(), s.end());
EXPECT_EQ(s.size(), 54);
}
// ----------------------------------------------------------------
// erase
// ----------------------------------------------------------------
TEST(Erase, ShortSSOMiddle) {
dev::string s("ABXCD");
auto it = s.erase(s.begin() + 2, s.begin() + 3);
EXPECT_EQ(s.size(), 4);
EXPECT_EQ(std::strcmp(s.data(), "ABCD"), 0);
EXPECT_EQ(*it, 'C');
EXPECT_EQ(s.data()[4], '\0');
}
TEST(Erase, LongHeapEraseToEnd) {
dev::string s("This is a long string that we will truncate.");
s.erase(s.begin() + 26, s.end());
EXPECT_EQ(s.size(), 26);
EXPECT_EQ(std::strcmp(s.data(), "This is a long string that"), 0);
EXPECT_EQ(s.data()[26], '\0');
}
TEST(Erase, EmbeddedNull) {
char data[] = {'a', 'b', '\0', 'c', 'd'};
dev::string s(data, data + 5);
s.erase(s.begin() + 2, s.begin() + 3);
EXPECT_EQ(s.size(), 4);
EXPECT_EQ(s[2], 'c');
EXPECT_EQ(std::memcmp(s.data(), "abcd", 4), 0);
}
TEST(Erase, FullErase) {
dev::string s("Clear me");
s.erase(s.begin(), s.end());
EXPECT_EQ(s.size(), 0);
EXPECT_TRUE(s.empty());
EXPECT_EQ(s.data()[0], '\0');
}
// ----------------------------------------------------------------
// operator[]
// ----------------------------------------------------------------
TEST(ElementAccess, ShortSSO) {
dev::string s("Short");
EXPECT_EQ(s[0], 'S');
EXPECT_EQ(s[4], 't');
s[0] = 's';
EXPECT_EQ(s[0], 's');
}
TEST(ElementAccess, LongHeap) {
dev::string s("This is a very long string for indexing.");
EXPECT_EQ(s[0], 'T');
EXPECT_EQ(s[10], 'v');
s[10] = 'V';
EXPECT_EQ(s[10], 'V');
}
TEST(ElementAccess, EmbeddedNull) {
char data[] = {'\0', 'x', '\0', 'y'};
dev::string s(data, data + 4);
EXPECT_EQ(s[0], '\0');
EXPECT_EQ(s[1], 'x');
EXPECT_EQ(s[2], '\0');
}
// ----------------------------------------------------------------
// front and back
// ----------------------------------------------------------------
TEST(FrontAndBack, ShortSSO) {
dev::string s("Hi");
EXPECT_EQ(s.front(), 'H');
EXPECT_EQ(s.back(), 'i');
}
TEST(FrontAndBack, LongHeap) {
dev::string s("Long strings need front/back too");
EXPECT_EQ(s.front(), 'L');
EXPECT_EQ(s.back(), 'o');
}
TEST(FrontAndBack, EmbeddedNull) {
char data[] = {'\0', 'a', '\0'};
dev::string s(data, data + 3);
EXPECT_EQ(s.front(), '\0');
EXPECT_EQ(s.back(), '\0');
}
// ----------------------------------------------------------------
// data()
// ----------------------------------------------------------------
TEST(DataFunction, ShortSSO) {
dev::string s("SSO");
const char* ptr = s.data();
EXPECT_EQ(ptr[0], 'S');
EXPECT_EQ(ptr[3], '\0');
}
TEST(DataFunction, LongHeap) {
dev::string s("A long string to check pointer consistency");
const char* ptr = s.data();
EXPECT_EQ(ptr[0], 'A');
EXPECT_NE(ptr, reinterpret_cast<const char*>(&s));
}
TEST(DataFunction, EmbeddedNull) {
char data[] = {'\0', 'A'};
dev::string s(data, data + 2);
EXPECT_EQ(s.data()[0], '\0');
EXPECT_EQ(s.data()[1], 'A');
}
// ----------------------------------------------------------------
// size()
// ----------------------------------------------------------------
TEST(SizeFunction, ZeroSize) {
dev::string s("");
EXPECT_EQ(s.size(), 0);
}
TEST(SizeFunction, MaxSSOSize) {
dev::string s("1234567890123456789012");
EXPECT_EQ(s.size(), 22);
EXPECT_EQ(s.capacity(), 22);
}
TEST(SizeFunction, SSOToHeapTransition) {
dev::string s("1234567890123456789012");
s.push_back('3');
EXPECT_EQ(s.size(), 23);
EXPECT_GE(s.capacity(), 24);
}
TEST(SizeFunction, EmbeddedNull) {
char data[] = {'H', 'i', '\0', '!', '\0'};
dev::string s(data, data + 5);
EXPECT_EQ(s.size(), 5);
}
// ----------------------------------------------------------------
// capacity()
// ----------------------------------------------------------------
TEST(CapacityFunction, SSOCapacity) {
dev::string s("Short");
EXPECT_EQ(s.capacity(), 22);
}
TEST(CapacityFunction, LongCapacity) {
dev::string s("This string is long enough to be on the heap.");
EXPECT_GE(s.capacity(), s.size());
}
TEST(CapacityFunction, GrowthOnPushBack) {
dev::string s("12345678901234567890123");
size_t cap_before = s.capacity();
s.push_back('!');
EXPECT_GT(s.capacity(), cap_before);
}
// ----------------------------------------------------------------
// empty()
// ----------------------------------------------------------------
TEST(EmptyFunction, TrulyEmpty) {
dev::string s;
EXPECT_TRUE(s.empty());
}
TEST(EmptyFunction, NotEmptySSO) {
dev::string s("a");
EXPECT_FALSE(s.empty());
}
TEST(EmptyFunction, NotEmptyLong) {
dev::string s("This is a very long string");
EXPECT_FALSE(s.empty());
}
TEST(EmptyFunction, SingleEmbeddedNull) {
char null_char = '\0';
dev::string s(&null_char, &null_char + 1);
EXPECT_EQ(s.size(), 1);
EXPECT_FALSE(s.empty());
}
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}