diff --git a/benchmark/sonic.hpp b/benchmark/sonic.hpp index fb36e859..76baa062 100644 --- a/benchmark/sonic.hpp +++ b/benchmark/sonic.hpp @@ -22,9 +22,7 @@ template class SonicStringResult : public StringResult> { public: - std::string_view str_impl() const { - return const_cast(wb).ToString(); - } + std::string_view str_impl() const { return wb.ToString(); } sonic_json::WriteBuffer wb; }; diff --git a/docs/usage.md b/docs/usage.md index 1e74916b..7b970b5b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -45,6 +45,10 @@ Document is the manager of Nodes. Sonic-Cpp organizes JSON value as a tree. Document also the root of JSON value tree. There is an allocator in Document, which you should use to allocate memory for Node and Document. +> **Note:** Re-parsing a `Document` discards the previous tree. Any raw +> pointers, iterators, or `DNode*` obtained from an earlier `Parse()` become +> invalid and must be re-acquired after each parse. + ### Query in object There are two ways to find members: `operator[]` or `FindMember`. We recommend using `FindMember`. @@ -209,6 +213,23 @@ using MyDoc = sonic_json::GenericDocument; Sonic uses rapidjson's allocator, you can define your own allocator follow [rapidjson allocaotr](http://rapidjson.org/md_doc_internals.html#InternalAllocator) +### Detecting OOM on Post-Parse Mutations + +DNode mutations like `PushBack`, `AddMember`, and `Reserve` do not return a +status code. When you use `MemoryPoolAllocator`, you can check +`HadOom()` / `ClearOom()` around these operations if you need to detect an +allocation failure: + +```c++ +auto& alloc = doc.GetAllocator(); +alloc.ClearOom(); +doc.PushBack(v, alloc); +if (alloc.HadOom()) { /* handle OOM */ } +``` + +The flag is sticky until cleared. This is a `MemoryPoolAllocator` feature, not +part of the abstract allocator concept. + ### JSON Pointer Sonic provides a JsonPointer class but doesn't support resolving the JSON pointer syntax of [RFC 6901](https://www.rfc-editor.org/rfc/rfc6901). We will support diff --git a/include/sonic/allocator.h b/include/sonic/allocator.h index 52958acc..6885502d 100644 --- a/include/sonic/allocator.h +++ b/include/sonic/allocator.h @@ -190,6 +190,9 @@ class MemoryPoolAllocator { ownBaseAllocator; //!< base allocator created by this object. size_t refcount; bool ownBuffer; + //!< Sticky OOM flag shared across refcounted copies. Atomic because + //!< the per-instance SpinLock does not synchronize different copies. + std::atomic hadOom; }; static const size_t SIZEOF_SHARED_DATA = SONIC_ALIGN(sizeof(SharedData)); @@ -226,6 +229,7 @@ class MemoryPoolAllocator { : 0)) { sonic_assert(baseAllocator_ != 0); sonic_assert(shared_ != 0); + new (&shared_->hadOom) std::atomic(false); if (baseAllocator) { shared_->ownBaseAllocator = 0; } else { @@ -258,12 +262,13 @@ class MemoryPoolAllocator { baseAllocator_(baseAllocator ? baseAllocator : new BaseAllocator()), shared_(static_cast(AlignBuffer(buffer, size))) { sonic_assert(size >= SIZEOF_SHARED_DATA + SIZEOF_CHUNK_HEADER); + new (&shared_->hadOom) std::atomic(false); shared_->chunkHead = GetChunkHead(shared_); shared_->chunkHead->capacity = size - SIZEOF_SHARED_DATA - SIZEOF_CHUNK_HEADER; shared_->chunkHead->size = 0; shared_->chunkHead->next = 0; - shared_->ownBaseAllocator = 0; + shared_->ownBaseAllocator = baseAllocator ? 0 : baseAllocator_; shared_->ownBuffer = false; shared_->refcount = 1; } @@ -312,6 +317,8 @@ class MemoryPoolAllocator { } Clear(); BaseAllocator* a = shared_->ownBaseAllocator; + using AtomicBool = std::atomic; + shared_->hadOom.~AtomicBool(); if (shared_->ownBuffer) { baseAllocator_->Free(shared_); } @@ -371,7 +378,10 @@ class MemoryPoolAllocator { LOCK_GUARD; if (sonic_unlikely(shared_->chunkHead->size + size > shared_->chunkHead->capacity)) { - if (!AddChunk(cp_.ChunkSize(size))) return NULL; + if (!AddChunk(cp_.ChunkSize(size))) { + shared_->hadOom.store(true, std::memory_order_release); + return NULL; + } } void* buffer = GetChunkBuffer(shared_) + shared_->chunkHead->size; @@ -412,9 +422,22 @@ class MemoryPoolAllocator { if (originalSize) std::memcpy(newBuffer, originalPtr, originalSize); return newBuffer; } + // Mark OOM even on the Malloc-copy fallback so the flag is set + // regardless of which internal path actually failed. + shared_->hadOom.store(true, std::memory_order_release); return nullptr; } + // Lets callers distinguish an OOM from a logical null (e.g. Malloc(0)). + bool HadOom() const { + sonic_assert(shared_->refcount > 0); + return shared_->hadOom.load(std::memory_order_acquire); + } + void ClearOom() { + sonic_assert(shared_->refcount > 0); + shared_->hadOom.store(false, std::memory_order_release); + } + //! Frees a memory block (concept Allocator) static void Free(void* ptr) noexcept { (void)ptr; } // Do nothing @@ -486,7 +509,8 @@ class MapAllocator { MapAllocator(const MapAllocator& rhs) : alloc_(rhs.alloc_) {} pointer allocate(size_type n, const void* = nullptr) { - return (T*)alloc_->Malloc(n * sizeof(T)); + if (alloc_ == nullptr || n == 0) return nullptr; + return static_cast(alloc_->Malloc(n * sizeof(T))); } void deallocate(void* p, size_type) { alloc_->Free(p); } diff --git a/include/sonic/dom/dynamicnode.h b/include/sonic/dom/dynamicnode.h index 74cc99aa..26786567 100644 --- a/include/sonic/dom/dynamicnode.h +++ b/include/sonic/dom/dynamicnode.h @@ -32,6 +32,9 @@ namespace sonic_json { +// OOM invariant: mutating operations (Reserve, AddMember, PushBack, ...) +// leave the node unchanged on allocation failure rather than propagating an +// error. Callers that need to detect OOM should use Allocator::HadOom(). template class DNode : public GenericNode> { public: @@ -82,6 +85,11 @@ class DNode : public GenericNode> { this->o.len = rhs.getTypeAndLen(); // Copy size and type. if (count > 0) { void* mem = containerMalloc(count, alloc); + if (sonic_unlikely(mem == nullptr)) { + this->setLength(0, kObject); + setChildren(nullptr); + break; + } rhsNodeType* rn = rhs.getObjChildrenFirst(); DNode* ln = (DNode*)((char*)mem + sizeof(MetaNode)); for (size_t i = 0; i < count * 2; i += 2) { @@ -98,8 +106,13 @@ class DNode : public GenericNode> { size_t a_size = rhs.Size(); this->a.len = rhs.getTypeAndLen(); // Copy size and type. if (a_size > 0) { - rhsNodeType* rn = rhs.getArrChildrenFirst(); void* mem = containerMalloc(a_size, alloc); + if (sonic_unlikely(mem == nullptr)) { + this->setLength(0, kArray); + setChildren(nullptr); + break; + } + rhsNodeType* rn = rhs.getArrChildrenFirst(); DNode* ln = (DNode*)((char*)mem + sizeof(MetaNode)); for (size_t i = 0; i < a_size; ++i) { new (ln + i) DNode(*(rn + i), alloc, copyString); @@ -302,14 +315,17 @@ class DNode : public GenericNode> { bool CreateMap(Allocator& alloc) { sonic_assert(this->IsObject()); sonic_assert(this->Capacity() >= this->Size()); - // if (this->Size() == 0) return false; + // Empty object: reserve meta storage first so children() is non-null. + // If the reserve OOMs, children() stays null and we bail instead of + // dereferencing it via getMapUnsafe() / setMap. if (nullptr == children()) { this->memberReserveImpl(16, alloc); + if (nullptr == children()) return false; } - if (getMapUnsfe()) return true; + if (getMapUnsafe()) return true; map_type* map = static_cast(alloc.Malloc(sizeof(map_type))); + if (nullptr == map) return false; new (map) map_type(MAType(&alloc)); - // SetMap(map); MemberNode* m = (MemberNode*)getObjChildrenFirstUnsafe(); for (size_t i = 0; i < this->Size(); ++i) { map->emplace(std::make_pair((m + i)->name.GetStringView(), i)); @@ -514,8 +530,9 @@ class DNode : public GenericNode> { DNode& reserveImpl(size_t new_cap, Allocator& alloc) { if (new_cap > this->Capacity()) { - setChildren(containerRealloc(children(), this->Capacity(), new_cap, - alloc)); + void* mem = + containerRealloc(children(), this->Capacity(), new_cap, alloc); + if (sonic_likely(mem != nullptr)) setChildren(mem); } return *this; } @@ -611,10 +628,13 @@ class DNode : public GenericNode> { if (new_cap > this->Capacity()) { void* old_ptr = children(); size_t old_cap = this->Capacity(); - setChildren( - containerRealloc(old_ptr, old_cap, new_cap, alloc)); - if (old_cap == 0) { - setMap(nullptr); // Set map as nullptr when first alloc memory. + void* mem = + containerRealloc(old_ptr, old_cap, new_cap, alloc); + if (sonic_likely(mem != nullptr)) { + setChildren(mem); + if (old_cap == 0) { + setMap(nullptr); // Set map as nullptr when first alloc memory. + } } } return *this; @@ -650,10 +670,9 @@ class DNode : public GenericNode> { sonic_force_inline void* containerMalloc(size_t cap, Allocator& alloc) { size_t alloc_size = cap * sizeof(T) + sizeof(MetaNode); void* mem = alloc.Malloc(alloc_size); - // init Metanode - MetaNode* meta = static_cast(mem); - new (meta) MetaNode(cap); - + if (sonic_likely(mem != nullptr)) { + new (static_cast(mem)) MetaNode(cap); + } return mem; } @@ -663,10 +682,9 @@ class DNode : public GenericNode> { size_t old_size = old_cap * sizeof(T) + sizeof(MetaNode); size_t new_size = new_cap * sizeof(T) + sizeof(MetaNode); void* mem = alloc.Realloc(old_ptr, old_size, new_size); - // init Metanode - MetaNode* meta = static_cast(mem); - meta->SetMetaCap(new_cap); - + if (sonic_likely(mem != nullptr)) { + static_cast(mem)->SetMetaCap(new_cap); + } return mem; } @@ -740,7 +758,7 @@ class DNode : public GenericNode> { return ((MetaNode*)(this->o.next.children))->map; } - sonic_force_inline map_type* getMapUnsfe() const { + sonic_force_inline map_type* getMapUnsafe() const { sonic_assert(this->IsObject()); return ((MetaNode*)(this->o.next.children))->map; } @@ -810,13 +828,17 @@ class DNode : public GenericNode> { size_t count = this->Size(); if (count >= this->Capacity()) { if (this->Capacity() == 0) { - setChildren(containerMalloc(k_default_obj_cap, alloc)); + void* mem = containerMalloc(k_default_obj_cap, alloc); + if (sonic_unlikely(mem == nullptr)) return this->MemberEnd(); + setChildren(mem); } else { size_t cap = this->Capacity(); cap += (cap + 1) / 2; // grow by factor 1.5 void* old_ptr = children(); - setChildren(containerRealloc(old_ptr, this->Capacity(), cap, - alloc)); + void* mem = + containerRealloc(old_ptr, this->Capacity(), cap, alloc); + if (sonic_unlikely(mem == nullptr)) return this->MemberEnd(); + setChildren(mem); } } @@ -824,6 +846,7 @@ class DNode : public GenericNode> { DNode name; if (copyKey) { name.SetString(key, alloc); + if (sonic_unlikely(name.IsNull())) return this->MemberEnd(); } else { name.SetString(key); } @@ -845,11 +868,11 @@ class DNode : public GenericNode> { if (nullptr == children()) { goto not_find; } - if (getMapUnsfe()) { - auto it = getMapUnsfe()->find(MSType(key.data(), key.size())); - if (it != getMapUnsfe()->end()) { + if (getMapUnsafe()) { + auto it = getMapUnsafe()->find(MSType(key.data(), key.size())); + if (it != getMapUnsafe()->end()) { m = memberBeginUnsafe() + it->second; - getMapUnsfe()->erase(it); + getMapUnsafe()->erase(it); goto find; } @@ -927,9 +950,9 @@ class DNode : public GenericNode> { if (this->Size() >= cap) { size_t new_cap = cap ? cap + (cap + 1) / 2 : k_default_array_cap; void* old_ptr = this->a.next.children; - DNode* new_child = - (DNode*)containerRealloc(old_ptr, cap, new_cap, alloc); - this->a.next.children = new_child; + void* new_ptr = containerRealloc(old_ptr, cap, new_cap, alloc); + if (sonic_unlikely(new_ptr == nullptr)) return *this; + this->a.next.children = new_ptr; } // add value to the last pos DNode& last = *(this->End()); diff --git a/include/sonic/dom/generic_document.h b/include/sonic/dom/generic_document.h index 2794344c..d3a0d22b 100644 --- a/include/sonic/dom/generic_document.h +++ b/include/sonic/dom/generic_document.h @@ -17,6 +17,8 @@ #pragma once #include +#include +#include #include "sonic/dom/dynamicnode.h" #include "sonic/dom/json_pointer.h" @@ -24,6 +26,14 @@ namespace sonic_json { +namespace internal { +template +struct has_clear : std::false_type {}; +template +struct has_clear().Clear())>> + : std::true_type {}; +} // namespace internal + template class Parser; template @@ -195,8 +205,15 @@ class GenericDocument : public NodeType { } void destroyDom() { - if (!Allocator::kNeedFree) { + if constexpr (!Allocator::kNeedFree) { this->setType(kNull); + if (own_alloc_) { + if constexpr (internal::has_clear::value) { + alloc_->Clear(); + } + str_ = nullptr; + schema_str_ = nullptr; + } return; } // NOTE: must free dynamic nodes at first @@ -213,15 +230,19 @@ class GenericDocument : public NodeType { GenericDocument& parseImpl(const char* json, size_t len) { Parser p; SAXHandler sax(*alloc_); + if (!sax.SetUp(StringView(json, len))) { + parse_result_ = kErrorNoMem; + return *this; + } parse_result_ = allocateStringBuffer(json, len); if (sonic_unlikely(HasParseError())) { return *this; } - if (!sax.SetUp(StringView(json, len))) { + parse_result_ = p.Parse(str_, len, sax); + if (sonic_unlikely(sax.oom_)) { parse_result_ = kErrorNoMem; return *this; } - parse_result_ = p.Parse(str_, len, sax); if (sonic_unlikely(HasParseError())) { return *this; } @@ -233,15 +254,22 @@ class GenericDocument : public NodeType { GenericDocument& parseSchemaImpl(const char* json, size_t len) { Parser p; SchemaHandler sax(this, *alloc_); + if (!sax.SetUp(StringView(json, len))) { + parse_result_ = kErrorNoMem; + return *this; + } parse_result_ = allocateSchemaStringBuffer(json, len); if (sonic_unlikely(HasParseError())) { return *this; } - if (!sax.SetUp(StringView(json, len))) { + parse_result_ = p.Parse(schema_str_, len, sax); + if (sonic_unlikely(sax.oom_)) { parse_result_ = kErrorNoMem; return *this; } - parse_result_ = p.Parse(schema_str_, len, sax); + if (sonic_unlikely(HasParseError())) { + return *this; + } return *this; } diff --git a/include/sonic/dom/handler.h b/include/sonic/dom/handler.h index c7475d37..d3d95544 100644 --- a/include/sonic/dom/handler.h +++ b/include/sonic/dom/handler.h @@ -34,13 +34,16 @@ class SAXHandler { using Allocator = typename NodeType::AllocatorType; using MemberType = typename NodeType::MemberNode; + bool oom_{false}; + SAXHandler() = default; SAXHandler(Allocator &alloc) : alloc_(&alloc) {} SAXHandler(const SAXHandler &) = delete; SAXHandler &operator=(const SAXHandler &rhs) = delete; SAXHandler(SAXHandler &&rhs) - : st_(rhs.st_), + : oom_(rhs.oom_), + st_(rhs.st_), np_(rhs.np_), cap_(rhs.cap_), parent_(rhs.parent_), @@ -49,6 +52,7 @@ class SAXHandler { rhs.cap_ = 0; rhs.np_ = 0; rhs.alloc_ = 0; + rhs.oom_ = false; } SAXHandler &operator=(SAXHandler &&rhs) { @@ -58,12 +62,14 @@ class SAXHandler { cap_ = rhs.cap_; parent_ = rhs.parent_; alloc_ = rhs.alloc_; + oom_ = rhs.oom_; rhs.st_ = nullptr; rhs.np_ = 0; rhs.cap_ = 0; rhs.parent_ = 0; rhs.alloc_ = 0; + rhs.oom_ = false; return *this; } @@ -74,9 +80,10 @@ class SAXHandler { size_t cap = len / 2 + 2; if (cap < 16) cap = 16; if (!st_ || cap_ < cap) { - st_ = static_cast( + NodeType *new_st = static_cast( std::realloc((void *)(st_), sizeof(NodeType) * cap)); - if (!st_) return false; + if (!new_st) return false; + st_ = new_st; cap_ = cap; } return true; @@ -170,9 +177,17 @@ class SAXHandler { obj.setLength(pairs, kObject); if (pairs) { void *mem = obj.template containerMalloc(pairs, *alloc_); - obj.setChildren(mem); - internal::Xmemcpy( - (void *)obj.getObjChildrenFirstUnsafe(), (void *)(&obj + 1), pairs); + if (sonic_unlikely(mem == nullptr)) { + NodeType *children = &obj + 1; + for (size_t i = 0; i < size_t(pairs) * 2; i++) children[i].~NodeType(); + obj.setLength(0, kObject); + obj.setChildren(nullptr); + oom_ = true; + } else { + obj.setChildren(mem); + internal::Xmemcpy( + (void *)obj.getObjChildrenFirstUnsafe(), (void *)(&obj + 1), pairs); + } } else { obj.setChildren(nullptr); } @@ -186,9 +201,18 @@ class SAXHandler { size_t old = arr.o.next.ofs; arr.setLength(count, kArray); if (count) { - arr.setChildren(arr.template containerMalloc(count, *alloc_)); - internal::Xmemcpy( - (void *)arr.getArrChildrenFirstUnsafe(), (void *)(&arr + 1), count); + void *mem = arr.template containerMalloc(count, *alloc_); + if (sonic_unlikely(mem == nullptr)) { + NodeType *children = &arr + 1; + for (size_t i = 0; i < count; i++) children[i].~NodeType(); + arr.setLength(0, kArray); + arr.setChildren(nullptr); + oom_ = true; + } else { + arr.setChildren(mem); + internal::Xmemcpy( + (void *)arr.getArrChildrenFirstUnsafe(), (void *)(&arr + 1), count); + } } else { arr.setChildren(nullptr); } @@ -214,14 +238,18 @@ class SAXHandler { if (sonic_likely(np_ < cap_)) { np_++; return true; - } else { - cap_ += cap_; - st_ = static_cast( - std::realloc((void *)(st_), sizeof(NodeType) * cap_)); - if (!st_) return false; - np_++; - return true; } + size_t new_cap = cap_ * 2; + NodeType *new_st = static_cast( + std::realloc((void *)(st_), sizeof(NodeType) * new_cap)); + if (!new_st) { + oom_ = true; + return false; + } + st_ = new_st; + cap_ = new_cap; + np_++; + return true; } NodeType *st_{nullptr}; @@ -262,10 +290,20 @@ class LazySAXHandler { NodeType &arr = *stack_.template Begin(); arr.setLength(count, kArray); if (count) { - arr.setChildren(arr.template containerMalloc(count, *alloc_)); - internal::Xmemcpy( - (void *)arr.getArrChildrenFirstUnsafe(), (void *)(&arr + 1), count); - stack_.Pop(count); + void *mem = arr.template containerMalloc(count, *alloc_); + if (sonic_unlikely(mem == nullptr)) { + NodeType *children = &arr + 1; + for (size_t i = 0; i < count; i++) children[i].~NodeType(); + stack_.Pop(count); + arr.setLength(0, kArray); + arr.setChildren(nullptr); + oom_ = true; + } else { + arr.setChildren(mem); + internal::Xmemcpy( + (void *)arr.getArrChildrenFirstUnsafe(), (void *)(&arr + 1), count); + stack_.Pop(count); + } } else { arr.setChildren(nullptr); } @@ -277,10 +315,19 @@ class LazySAXHandler { obj.setLength(pairs, kObject); if (pairs) { void *mem = obj.template containerMalloc(pairs, *alloc_); - obj.setChildren(mem); - internal::Xmemcpy( - (void *)obj.getObjChildrenFirstUnsafe(), (void *)(&obj + 1), pairs); - stack_.Pop(pairs); + if (sonic_unlikely(mem == nullptr)) { + NodeType *children = &obj + 1; + for (size_t i = 0; i < size_t(pairs) * 2; i++) children[i].~NodeType(); + stack_.Pop(pairs); + obj.setLength(0, kObject); + obj.setChildren(nullptr); + oom_ = true; + } else { + obj.setChildren(mem); + internal::Xmemcpy( + (void *)obj.getObjChildrenFirstUnsafe(), (void *)(&obj + 1), pairs); + stack_.Pop(pairs); + } } else { obj.setChildren(nullptr); } @@ -307,6 +354,7 @@ class LazySAXHandler { // allocator for node stack and string buffers Allocator *alloc_{nullptr}; internal::Stack stack_{}; + bool oom_{false}; }; } // namespace sonic_json diff --git a/include/sonic/dom/parser.h b/include/sonic/dom/parser.h index c1c7458b..3521e6c2 100644 --- a/include/sonic/dom/parser.h +++ b/include/sonic/dom/parser.h @@ -140,12 +140,16 @@ class Parser { template sonic_force_inline bool parseStrInPlace(SAX &sax) { - return sax.String(parseStringHelper()); + StringView sv = parseStringHelper(); + if (sonic_unlikely(err_ != kErrorNone)) return true; + return sax.String(sv); } template sonic_force_inline bool parseKeyInPlace(SAX &sax) { - return sax.Key(parseStringHelper()); + StringView sv = parseStringHelper(); + if (sonic_unlikely(err_ != kErrorNone)) return true; + return sax.Key(sv); } sonic_force_inline bool carry_one(char c, uint64_t &sum) const { @@ -185,7 +189,6 @@ class Parser { d /= internal::kPow10Tab[-exp10]; return true; } - return false; } SonicError parseFloatEiselLemire64(double &dbl, int exp10, uint64_t man, @@ -245,33 +248,33 @@ class Parser { } \ } while (0) -#define SET_INT_AND_RETURN(int_val) \ - do { \ - if (!sax.Int(int_val)) RETURN_SET_ERROR_CODE(kParseErrorInvalidChar); \ - RETURN_SET_ERROR_CODE(kErrorNone); \ +#define SET_INT_AND_RETURN(int_val) \ + do { \ + if (!sax.Int(int_val)) RETURN_SET_ERROR_CODE(kSaxTermination); \ + RETURN_SET_ERROR_CODE(kErrorNone); \ } while (0) -#define SET_UINT_AND_RETURN(int_val) \ - do { \ - if (!sax.Uint(int_val)) RETURN_SET_ERROR_CODE(kParseErrorInvalidChar); \ - RETURN_SET_ERROR_CODE(kErrorNone); \ +#define SET_UINT_AND_RETURN(int_val) \ + do { \ + if (!sax.Uint(int_val)) RETURN_SET_ERROR_CODE(kSaxTermination); \ + RETURN_SET_ERROR_CODE(kErrorNone); \ } while (0) -#define SET_DOUBLE_AND_RETURN(dbl) \ - do { \ - if (!sax.Double(dbl)) RETURN_SET_ERROR_CODE(kParseErrorInvalidChar); \ - RETURN_SET_ERROR_CODE(kErrorNone); \ +#define SET_DOUBLE_AND_RETURN(dbl) \ + do { \ + if (!sax.Double(dbl)) RETURN_SET_ERROR_CODE(kSaxTermination); \ + RETURN_SET_ERROR_CODE(kErrorNone); \ } while (0) -#define SET_U64_AS_DOUBLE_AND_RETURN(int_val) \ - do { \ - union { \ - double d; \ - uint64_t u; \ - } du; \ - du.u = int_val; \ - if (!sax.Double(du.d)) RETURN_SET_ERROR_CODE(kParseErrorInvalidChar); \ - RETURN_SET_ERROR_CODE(kErrorNone); \ +#define SET_U64_AS_DOUBLE_AND_RETURN(int_val) \ + do { \ + union { \ + double d; \ + uint64_t u; \ + } du; \ + du.u = int_val; \ + if (!sax.Double(du.d)) RETURN_SET_ERROR_CODE(kSaxTermination); \ + RETURN_SET_ERROR_CODE(kErrorNone); \ } while (0) static constexpr uint64_t kUint64Max = 0xFFFFFFFFFFFFFFFF; @@ -326,7 +329,7 @@ class Parser { // Zero Integer if constexpr (parseFlags & ParseFlags::kParseIntegerAsRaw) { if (!sax.Raw(s + start_idx, i - start_idx)) - RETURN_SET_ERROR_CODE(kParseErrorInvalidChar); + RETURN_SET_ERROR_CODE(kSaxTermination); RETURN_SET_ERROR_CODE(kErrorNone); } SET_UINT_AND_RETURN(0); @@ -367,7 +370,7 @@ class Parser { // Integer if constexpr (parseFlags & ParseFlags::kParseIntegerAsRaw) { if (!sax.Raw(s + start_idx, i - start_idx)) - RETURN_SET_ERROR_CODE(kParseErrorInvalidChar); + RETURN_SET_ERROR_CODE(kSaxTermination); RETURN_SET_ERROR_CODE(kErrorNone); } @@ -516,7 +519,7 @@ class Parser { return parseNumberAsString(sax); } } - if (!sax.Double(d)) RETURN_SET_ERROR_CODE(kParseErrorInvalidChar); + if (!sax.Double(d)) RETURN_SET_ERROR_CODE(kSaxTermination); RETURN_SET_ERROR_CODE(error_code); } @@ -556,22 +559,22 @@ class Parser { } \ } while (0) -#define SET_INT_AND_RETURN(int_val) \ - do { \ - if (!sax.Int(int_val)) RETURN_SET_ERROR_CODE(kParseErrorInvalidChar); \ - RETURN_SET_ERROR_CODE(kErrorNone); \ +#define SET_INT_AND_RETURN(int_val) \ + do { \ + if (!sax.Int(int_val)) RETURN_SET_ERROR_CODE(kSaxTermination); \ + RETURN_SET_ERROR_CODE(kErrorNone); \ } while (0) -#define SET_UINT_AND_RETURN(int_val) \ - do { \ - if (!sax.Uint(int_val)) RETURN_SET_ERROR_CODE(kParseErrorInvalidChar); \ - RETURN_SET_ERROR_CODE(kErrorNone); \ +#define SET_UINT_AND_RETURN(int_val) \ + do { \ + if (!sax.Uint(int_val)) RETURN_SET_ERROR_CODE(kSaxTermination); \ + RETURN_SET_ERROR_CODE(kErrorNone); \ } while (0) -#define SET_DOUBLE_AND_RETURN(dbl) \ - do { \ - if (!sax.Double(dbl)) RETURN_SET_ERROR_CODE(kParseErrorInvalidChar); \ - RETURN_SET_ERROR_CODE(kErrorNone); \ +#define SET_DOUBLE_AND_RETURN(dbl) \ + do { \ + if (!sax.Double(dbl)) RETURN_SET_ERROR_CODE(kSaxTermination); \ + RETURN_SET_ERROR_CODE(kErrorNone); \ } while (0) size_t i = pos_ - 1; @@ -689,7 +692,7 @@ class Parser { double_string_fast: // parse floating number as json string value if (!sax.NumStr(StringView(const_cast(s + start), i - start))) { - RETURN_SET_ERROR_CODE(kParseErrorInvalidChar); + RETURN_SET_ERROR_CODE(kSaxTermination); } RETURN_SET_ERROR_CODE(kErrorNone); @@ -702,6 +705,7 @@ class Parser { template void parsePrimitives(SAX &sax) { + bool ok = true; switch (json_buf_[pos_ - 1]) { case '0': case '1': @@ -715,26 +719,31 @@ class Parser { case '9': case '-': parseNumber(sax); - break; + return; case '"': - parseStrInPlace(sax); + ok = parseStrInPlace(sax); // only need check length when parsing string primitives, because the // padding "x\"x" makes parsing other invalid JSON always failed if (pos_ > len_) { setParseError(kParseErrorInvalidChar); + return; } break; case 'f': - parseFalse(sax); + ok = parseFalse(sax); break; case 't': - parseTrue(sax); + ok = parseTrue(sax); break; case 'n': - parseNull(sax); + ok = parseNull(sax); break; default: setParseError(kParseErrorInvalidChar); + return; + } + if (sonic_unlikely(!ok) && err_ == kErrorNone) { + err_ = kSaxTermination; } } @@ -753,6 +762,13 @@ class Parser { goto err_invalid_char; \ } \ } while (0) +#define sonic_sax_check(expr) \ + do { \ + if (sonic_unlikely(!(expr))) { \ + if (err_ == kErrorNone) err_ = kSaxTermination; \ + return; \ + } \ + } while (0) using namespace sonic_json::internal; // TODO (liuq19): vector is a temporary choice, will optimize in future. @@ -764,21 +780,21 @@ class Parser { uint8_t c = scan.SkipSpace(json_buf_, pos_); switch (c) { case '[': { - sax.StartArray(); + sonic_sax_check(sax.StartArray()); depth.push_back(kArrMask); c = scan.SkipSpace(json_buf_, pos_); if (c == ']') { - sax.EndArray(0); + sonic_sax_check(sax.EndArray(0)); goto scope_end; } goto arr_val; } case '{': { - sax.StartObject(); + sonic_sax_check(sax.StartObject()); depth.push_back(kObjMask); c = scan.SkipSpace(json_buf_, pos_); if (c == '}') { - sax.EndObject(0); + sonic_sax_check(sax.EndObject(0)); goto scope_end; } goto obj_key; @@ -807,30 +823,35 @@ class Parser { } if (c == '}') { pos_++; - sax.EndObject(depth.back()); + sonic_sax_check(sax.EndObject(depth.back())); goto scope_end; } goto err_invalid_char; } + } else if (sonic_unlikely(!found)) { + // Without CheckKeyReturn, `false` from Key() is a handler rejection + // (e.g. OOM), not a skip signal. + if (err_ == kErrorNone) err_ = kSaxTermination; + return; } c = scan.SkipSpace(json_buf_, pos_); switch (c) { case '{': { - sax.StartObject(); + sonic_sax_check(sax.StartObject()); depth.push_back(kObjMask); c = scan.SkipSpace(json_buf_, pos_); if (c == '}') { - sax.EndObject(0); + sonic_sax_check(sax.EndObject(0)); goto scope_end; } goto obj_key; } case '[': { - sax.StartArray(); + sonic_sax_check(sax.StartArray()); depth.push_back(kArrMask); c = scan.SkipSpace(json_buf_, pos_); if (c == ']') { - sax.EndArray(0); + sonic_sax_check(sax.EndArray(0)); goto scope_end; } goto arr_val; @@ -849,20 +870,35 @@ class Parser { parseNumber(sax); sonic_check_err(); break; - case 't': - parseTrue(sax); + case 't': { + bool ok = parseTrue(sax); sonic_check_err(); + if (sonic_unlikely(!ok)) { + err_ = kSaxTermination; + return; + } break; - case 'f': - parseFalse(sax); + } + case 'f': { + bool ok = parseFalse(sax); sonic_check_err(); + if (sonic_unlikely(!ok)) { + err_ = kSaxTermination; + return; + } break; - case 'n': - parseNull(sax); + } + case 'n': { + bool ok = parseNull(sax); sonic_check_err(); + if (sonic_unlikely(!ok)) { + err_ = kSaxTermination; + return; + } break; + } case '"': - parseStrInPlace(sax); + sonic_sax_check(parseStrInPlace(sax)); sonic_check_err(); break; default: @@ -879,7 +915,7 @@ class Parser { if (sonic_unlikely(c != '}')) { goto err_invalid_char; } - sax.EndObject(depth.back()); + sonic_sax_check(sax.EndObject(depth.back())); scope_end: sonic_check_err(); @@ -896,21 +932,21 @@ class Parser { arr_val: switch (c) { case '{': { - sax.StartObject(); + sonic_sax_check(sax.StartObject()); depth.push_back(kObjMask); c = scan.SkipSpace(json_buf_, pos_); if (c == '}') { - sax.EndObject(0); + sonic_sax_check(sax.EndObject(0)); goto scope_end; } goto obj_key; } case '[': { - sax.StartArray(); + sonic_sax_check(sax.StartArray()); depth.push_back(kArrMask); c = scan.SkipSpace(json_buf_, pos_); if (c == ']') { - sax.EndArray(0); + sonic_sax_check(sax.EndArray(0)); goto scope_end; } goto arr_val; @@ -929,20 +965,35 @@ class Parser { parseNumber(sax); sonic_check_err(); break; - case 't': - parseTrue(sax); + case 't': { + bool ok = parseTrue(sax); sonic_check_err(); + if (sonic_unlikely(!ok)) { + err_ = kSaxTermination; + return; + } break; - case 'f': - parseFalse(sax); + } + case 'f': { + bool ok = parseFalse(sax); sonic_check_err(); + if (sonic_unlikely(!ok)) { + err_ = kSaxTermination; + return; + } break; - case 'n': - parseNull(sax); + } + case 'n': { + bool ok = parseNull(sax); sonic_check_err(); + if (sonic_unlikely(!ok)) { + err_ = kSaxTermination; + return; + } break; + } case '"': - parseStrInPlace(sax); + sonic_sax_check(parseStrInPlace(sax)); sonic_check_err(); break; default: @@ -957,7 +1008,7 @@ class Parser { goto arr_val; } if (sonic_likely(c == ']')) { - sax.EndArray(depth.back() & (kArrMask - 1)); + sonic_sax_check(sax.EndArray(depth.back() & (kArrMask - 1))); goto scope_end; } goto err_invalid_char; @@ -965,8 +1016,10 @@ class Parser { doc_end: return; err_invalid_char: - err_ = kParseErrorInvalidChar; + if (err_ == kErrorNone) err_ = kParseErrorInvalidChar; return; +#undef sonic_sax_check +#undef sonic_check_err } // parseLazyImpl only mark the json positions, and not parse any more, even @@ -987,22 +1040,29 @@ class Parser { size_t sn = 0; const uint8_t *src, *sdst; +#define sonic_lazy_sax_check(expr) \ + do { \ + if (sonic_unlikely(!(expr))) { \ + return ParseResult(kSaxTermination, pos); \ + } \ + } while (0) + switch (c) { case '[': { - sax.StartArray(); + sonic_lazy_sax_check(sax.StartArray()); c = scan.SkipSpaceSafe(data, pos, len); if (c == ']') { - sax.EndArray(0); + sonic_lazy_sax_check(sax.EndArray(0)); return kErrorNone; } pos--; goto arr_val; } case '{': { - sax.StartObject(); + sonic_lazy_sax_check(sax.StartObject()); c = scan.SkipSpaceSafe(data, pos, len); if (c == '}') { - sax.EndObject(0); + sonic_lazy_sax_check(sax.EndObject(0)); return kErrorNone; } goto obj_key; @@ -1012,7 +1072,8 @@ class Parser { pos--; start = scan.SkipOne(data, pos, len); if (start < 0) goto skip_error; - sax.Raw(reinterpret_cast(data + start), pos - start); + sonic_lazy_sax_check( + sax.Raw(reinterpret_cast(data + start), pos - start)); return kErrorNone; } } @@ -1033,6 +1094,9 @@ class Parser { if (skips == 2) { // parse escaped strings uint8_t *dst = (uint8_t *)alloc.Malloc(sn + 32); + if (sonic_unlikely(dst == nullptr)) { + return ParseResult(kErrorNoMem, pos); + } sdst = dst; std::memcpy(dst, src, sn); sn = internal::parseStringInplace(dst, err); @@ -1046,7 +1110,8 @@ class Parser { } key = StringView(reinterpret_cast(sdst), sn); if (!sax.Key(key.data(), key.size(), allocated)) { - goto err_invalid_char; + if (allocated) Allocator::Free((void *)(sdst)); + return ParseResult(kSaxTermination, pos); } c = scan.SkipSpaceSafe(data, pos, len); if (sonic_unlikely(c != ':')) { @@ -1054,7 +1119,8 @@ class Parser { } start = scan.SkipOne(data, pos, len); if (start < 0) goto skip_error; - sax.Raw(reinterpret_cast(data + start), pos - start); + sonic_lazy_sax_check( + sax.Raw(reinterpret_cast(data + start), pos - start)); cnt++; c = scan.SkipSpaceSafe(data, pos, len); if (c == ',') { @@ -1064,13 +1130,14 @@ class Parser { if (sonic_unlikely(c != '}')) { goto err_invalid_char; } - sax.EndObject(cnt); + sonic_lazy_sax_check(sax.EndObject(cnt)); return kErrorNone; arr_val: start = scan.SkipOne(data, pos, len); if (start < 0) goto skip_error; - sax.Raw(reinterpret_cast(data + start), pos - start); + sonic_lazy_sax_check( + sax.Raw(reinterpret_cast(data + start), pos - start)); cnt++; c = scan.SkipSpaceSafe(data, pos, len); if (c == ',') { @@ -1079,17 +1146,16 @@ class Parser { if (sonic_unlikely(c != ']')) { goto err_invalid_char; } - sax.EndArray(cnt); + sonic_lazy_sax_check(sax.EndArray(cnt)); return kErrorNone; err_invalid_char: return ParseResult(kParseErrorInvalidChar, pos - 1); skip_error: return ParseResult(SonicError(-start), pos - 1); +#undef sonic_lazy_sax_check } -#undef sonic_check_err - private: sonic_force_inline void reset() { pos_ = 0; diff --git a/include/sonic/dom/schema_handler.h b/include/sonic/dom/schema_handler.h index 2b7f5dde..8ad0385c 100644 --- a/include/sonic/dom/schema_handler.h +++ b/include/sonic/dom/schema_handler.h @@ -34,6 +34,8 @@ class SchemaHandler { using Allocator = typename NodeType::AllocatorType; using MemberType = typename NodeType::MemberNode; + bool oom_{false}; + SchemaHandler() = default; SchemaHandler(NodeType *root, Allocator &alloc) : parent_node_(root), cur_node_(root), alloc_(&alloc) {} @@ -41,7 +43,8 @@ class SchemaHandler { SchemaHandler(const SchemaHandler &) = delete; SchemaHandler &operator=(const SchemaHandler &rhs) = delete; SchemaHandler(SchemaHandler &&rhs) - : st_(rhs.st_), + : oom_(rhs.oom_), + st_(rhs.st_), parent_node_(rhs.parent_node_), cur_node_(rhs.cur_node_), np_(rhs.np_), @@ -56,6 +59,7 @@ class SchemaHandler { rhs.np_ = 0; rhs.alloc_ = nullptr; rhs.found_node_count_ = 0; + rhs.oom_ = false; parent_st_ = std::move(rhs.parent_st_); found_count_st_ = std::move(rhs.found_count_st_); } @@ -70,6 +74,7 @@ class SchemaHandler { parent_ = rhs.parent_; found_node_count_ = rhs.found_node_count_; alloc_ = rhs.alloc_; + oom_ = rhs.oom_; rhs.st_ = nullptr; rhs.parent_node_ = nullptr; @@ -79,6 +84,7 @@ class SchemaHandler { rhs.parent_ = 0; rhs.alloc_ = nullptr; rhs.found_node_count_ = 0; + rhs.oom_ = false; parent_st_ = std::move(rhs.parent_st_); found_count_st_ = std::move(rhs.found_count_st_); return *this; @@ -91,9 +97,10 @@ class SchemaHandler { size_t cap = len / 2 + 2; if (cap < 16) cap = 16; if (!st_ || cap_ < cap) { - st_ = static_cast( + NodeType *new_st = static_cast( std::realloc((void *)(st_), sizeof(NodeType) * cap)); - if (!st_) return false; + if (!new_st) return false; + st_ = new_st; cap_ = cap; } return true; @@ -287,9 +294,17 @@ class SchemaHandler { obj.setLength(pairs, kObject); if (pairs) { void *mem = obj.template containerMalloc(pairs, *alloc_); - obj.setChildren(mem); - internal::Xmemcpy( - (void *)obj.getObjChildrenFirstUnsafe(), obj_member_ptr, pairs); + if (sonic_unlikely(mem == nullptr)) { + NodeType *children = static_cast(obj_member_ptr); + for (size_t i = 0; i < size_t(pairs) * 2; i++) children[i].~NodeType(); + obj.setLength(0, kObject); + obj.setChildren(nullptr); + oom_ = true; + } else { + obj.setChildren(mem); + internal::Xmemcpy( + (void *)obj.getObjChildrenFirstUnsafe(), obj_member_ptr, pairs); + } } else { obj.setChildren(nullptr); } @@ -317,9 +332,18 @@ class SchemaHandler { NodeType &arr = *arr_ptr; arr.setLength(count, kArray); if (count) { - arr.setChildren(arr.template containerMalloc(count, *alloc_)); - internal::Xmemcpy( - (void *)arr.getArrChildrenFirstUnsafe(), arr_element_ptr, count); + void *mem = arr.template containerMalloc(count, *alloc_); + if (sonic_unlikely(mem == nullptr)) { + NodeType *children = static_cast(arr_element_ptr); + for (size_t i = 0; i < count; i++) children[i].~NodeType(); + arr.setLength(0, kArray); + arr.setChildren(nullptr); + oom_ = true; + } else { + arr.setChildren(mem); + internal::Xmemcpy( + (void *)arr.getArrChildrenFirstUnsafe(), arr_element_ptr, count); + } } else { arr.setChildren(nullptr); } diff --git a/include/sonic/experiment/lazy_update.h b/include/sonic/experiment/lazy_update.h index 591b8fa4..4f46f96b 100644 --- a/include/sonic/experiment/lazy_update.h +++ b/include/sonic/experiment/lazy_update.h @@ -35,6 +35,9 @@ static inline ParseResult ParseLazy(NodeType &node, StringView json, if (ret.Error()) { return ret; } + if (sonic_unlikely(sax.oom_)) { + return ParseResult(kErrorNoMem, json.size()); + } NodeType *root = sax.stack_.template Begin(); node = std::move(*root); return ret; @@ -43,19 +46,18 @@ static inline ParseResult ParseLazy(NodeType &node, StringView json, template static inline SonicError UpdateNodeLazy(NodeType &target, NodeType &source, Allocator &alloc) { - ParseResult ret; SonicError err = kErrorNone; - // check the raw type - if (target.IsRaw() && *target.GetRaw().data() == '{') { - ret = ParseLazy(target, target.GetRaw(), - alloc); - } - if (source.IsRaw() && *source.GetRaw().data() == '{') { - ret = ParseLazy(source, source.GetRaw(), - alloc); + if (target.IsRaw() && !target.GetRaw().empty() && + *target.GetRaw().data() == '{') { + ParseResult ret = ParseLazy( + target, target.GetRaw(), alloc); + if (ret.Error()) return ret.Error(); } - if (ret.Error()) { - return ret.Error(); + if (source.IsRaw() && !source.GetRaw().empty() && + *source.GetRaw().data() == '{') { + ParseResult ret = ParseLazy( + source, source.GetRaw(), alloc); + if (ret.Error()) return ret.Error(); } // update the object type if (!target.IsObject() || !source.IsObject() || target.Empty()) { diff --git a/include/sonic/internal/arch/common/unicode_common.h b/include/sonic/internal/arch/common/unicode_common.h index 6c488fb7..104d6d0b 100644 --- a/include/sonic/internal/arch/common/unicode_common.h +++ b/include/sonic/internal/arch/common/unicode_common.h @@ -242,6 +242,10 @@ sonic_force_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, // inside the multilingual plane check uint32_t code_point = hex_to_u32_nocheck(*src_ptr + 2); *src_ptr += 6; + // Reject lone low surrogates: they are not valid Unicode scalar values. + if (code_point >= 0xdc00 && code_point <= 0xdfff) { + return false; + } // check for low surrogate for characters outside the Basic // Multilingual Plane. if (code_point >= 0xd800 && code_point < 0xdc00) { @@ -258,6 +262,11 @@ sonic_force_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, if ((code_point | code_point_2) >> 16) { return false; } + // The second escape must be a low surrogate; otherwise the subtraction + // below would wrap and produce a garbage code point. + if (code_point_2 < 0xdc00 || code_point_2 > 0xdfff) { + return false; + } code_point = (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; diff --git a/include/sonic/internal/stack.h b/include/sonic/internal/stack.h index add3c0a6..fa483065 100644 --- a/include/sonic/internal/stack.h +++ b/include/sonic/internal/stack.h @@ -27,7 +27,7 @@ namespace internal { class Stack { public: - Stack(size_t cap = defaultCapcity()) : cap_(cap) { + Stack(size_t cap = defaultCapcity()) : cap_(0) { buf_ = nullptr; top_ = nullptr; Reserve(cap); @@ -62,10 +62,10 @@ class Stack { size_t align_cap = SONIC_ALIGN(new_cap); size_t old_size = Size(); char* tmp = static_cast(std::realloc(buf_, align_cap)); + if (sonic_unlikely(tmp == nullptr)) return; top_ = tmp + old_size; buf_ = tmp; - sonic_assert(buf_ != NULL); - cap_ = buf_ ? new_cap : 0; + cap_ = new_cap; } /** @@ -169,8 +169,8 @@ class Stack { sonic_force_inline char* Grow(size_t cnt) { if (sonic_unlikely(top_ + cnt >= buf_ + cap_)) { if (sonic_unlikely((top_ + cnt) > buf_ + 2 * cap_)) { - cap_ = top_ - buf_ + cnt; - Reserve(cap_ + cap_ / 2); + size_t needed = (top_ - buf_) + cnt; + Reserve(needed + needed / 2); } else { Reserve(cap_ * 2); } diff --git a/include/sonic/sonic.h b/include/sonic/sonic.h index aeb803c1..c52faf19 100644 --- a/include/sonic/sonic.h +++ b/include/sonic/sonic.h @@ -24,8 +24,10 @@ #define SONIC_MAJOR_VERSION 1 #define SONIC_MINOR_VERSION 0 #define SONIC_PATCH_VERSION 2 -#define SONIC_STRS(s) #s -#define SONIC_VERSION_STRING \ - SONIC_STRS(SONIC_MAJOR_VERSION.SONIC_MINOR_VERSION.SONIC_PATCH.VERSION) +#define SONIC_STRS_IMPL(s) #s +#define SONIC_STRS(s) SONIC_STRS_IMPL(s) +#define SONIC_VERSION_STRING \ + SONIC_STRS(SONIC_MAJOR_VERSION) \ + "." SONIC_STRS(SONIC_MINOR_VERSION) "." SONIC_STRS(SONIC_PATCH_VERSION) namespace sonic_json {} // namespace sonic_json diff --git a/include/sonic/writebuffer.h b/include/sonic/writebuffer.h index 55631539..95a8053b 100644 --- a/include/sonic/writebuffer.h +++ b/include/sonic/writebuffer.h @@ -37,15 +37,18 @@ class WriteBuffer { } /** - * @brief Return the context in the buffer. - * @return a null-terminate string. - * @note a '\0' will be added in the ending, so, this function is not - * thread-safe. + * @brief Return the buffer contents as a null-terminated C string. + * @return pointer to the buffer, terminated with an appended '\0'. + * @note Not thread-safe. */ sonic_force_inline const char* ToString() const { + if (sonic_likely(stack_.Size() < stack_.Capacity())) { + *(stack_.template End()) = '\0'; + return stack_.template Begin(); + } stack_.Grow(1); *(stack_.template End()) = '\0'; - return stack_.Begin(); + return stack_.template Begin(); } sonic_force_inline StringView ToStringView() const { diff --git a/tests/allocator_test.cpp b/tests/allocator_test.cpp index 71740adb..5a44e5f4 100644 --- a/tests/allocator_test.cpp +++ b/tests/allocator_test.cpp @@ -16,7 +16,17 @@ #include "sonic/allocator.h" +#include +#include + #include "gtest/gtest.h" +#include "sonic/internal/stack.h" + +// Let huge-allocation OOM tests return null under ASAN instead of aborting. +// Dead code in non-ASAN builds; ASAN_OPTIONS still overrides it. +extern "C" __attribute__((used)) const char *__asan_default_options() { + return "allocator_may_return_null=1"; +} namespace { @@ -90,4 +100,136 @@ TEST(Allocator, MemoryPoolAllocatorMoveAndMapAllocator) { } } +TEST(Stack, ReservePreservesContents) { + // Verify that Reserve correctly grows the buffer and preserves existing + // data, and that Reserve(smaller) is a no-op. + sonic_json::internal::Stack s(8); + + s.Push('A'); + s.Push('B'); + ASSERT_EQ(2u, s.Size()); + ASSERT_EQ('B', *s.Top()); + + // Reserve with current cap: must be a no-op. + size_t old_cap = s.Capacity(); + s.Reserve(old_cap); + EXPECT_EQ(old_cap, s.Capacity()) << "Reserve(<=cap) must not change Capacity"; + EXPECT_EQ('B', *s.Top()) << "Reserve no-op must not touch Top"; + + // Reserve with larger cap: must grow and preserve data. + s.Reserve(old_cap * 4); + EXPECT_GE(s.Capacity(), old_cap * 4) + << "Reserve must grow to at least new_cap"; + EXPECT_EQ(2u, s.Size()) << "Reserve must not change Size"; + EXPECT_EQ('B', *s.Top()) << "Reserve must preserve existing data"; +} + +// Use a large-but-bounded request that fails via allocator bookkeeping +// instead of asking ASan's malloc interceptor for absurd virtual sizes. +// This still exceeds the default pool chunk capacity and exercises the +// "failed allocation sets hadOom" path without polluting test output. +struct FailAfterFirstChunkAllocator { + bool allow_ctor = true; + void *Malloc(size_t n) { + if (allow_ctor) { + allow_ctor = false; + return std::malloc(n); + } + return nullptr; + } + void *Realloc(void *, size_t, size_t) { return nullptr; } + static void Free(void *p) { std::free(p); } +}; + +TEST(Allocator, MemoryPoolAllocatorHadOomSignalsFailedMalloc) { + FailAfterFirstChunkAllocator base; + MemoryPoolAllocator pool(8, &base); + EXPECT_FALSE(pool.HadOom()); + void *p = pool.Malloc(16); + EXPECT_EQ(p, nullptr); + EXPECT_TRUE(pool.HadOom()); + pool.ClearOom(); + EXPECT_FALSE(pool.HadOom()); +} + +// Both MemoryPoolAllocator ctors place SharedData (incl. atomic +// hadOom) into raw storage. Assert the flag reads false on a freshly +// constructed allocator before any Malloc call — guards against a +// regression where hadOom is left in an indeterminate state by ctor init. +TEST(Allocator, MemoryPoolAllocatorHadOomStartsFalseOnConstruction) { + { + MemoryPoolAllocator<> pool; + EXPECT_FALSE(pool.HadOom()); + } + { + // Buffer ctor path: user-supplied storage, AlignBuffer instead of Malloc. + alignas(alignof(std::max_align_t)) unsigned char buf[4096]; + MemoryPoolAllocator<> pool(buf, sizeof(buf)); + EXPECT_FALSE(pool.HadOom()); + } +} + +TEST(Allocator, MemoryPoolAllocatorHadOomSharedAcrossCopies) { + // Flag lives on SharedData so refcounted copies see coherent state. + FailAfterFirstChunkAllocator base; + MemoryPoolAllocator a(8, &base); + MemoryPoolAllocator b( + a); // shares SharedData with a + EXPECT_FALSE(a.HadOom()); + EXPECT_FALSE(b.HadOom()); + (void)b.Malloc(16); + EXPECT_TRUE(a.HadOom()); // a sees b's failure + EXPECT_TRUE(b.HadOom()); + a.ClearOom(); + EXPECT_FALSE(b.HadOom()); +} + +// Writer on one refcounted copy, reader on another. The per-instance +// SpinLock does not synchronize different copies, so hadOom must be +// atomic for this to be race-free. +TEST(Allocator, MemoryPoolAllocatorHadOomIsThreadSafeAcrossCopies) { + FailAfterFirstChunkAllocator base; + MemoryPoolAllocator a(8, &base); + MemoryPoolAllocator b(a); // shares SharedData + + std::atomic stop{false}; + std::thread writer([&] { + for (int i = 0; i < 200 && !stop.load(); ++i) { + (void)a.Malloc(16); // sets hadOom + } + }); + std::thread reader([&] { + for (int i = 0; i < 200 && !stop.load(); ++i) { + (void)b.HadOom(); // observes hadOom + } + }); + writer.join(); + reader.join(); + stop.store(true); + + EXPECT_TRUE(b.HadOom()); + a.ClearOom(); + EXPECT_FALSE(b.HadOom()); +} + +TEST(Stack, ConstructorOomLeavesConsistentState) { + // If the ctor's initial Reserve() fails, cap_ must not lie about the + // (absent) buffer. Otherwise Grow()'s guard `top_+cnt >= buf_+cap_` reads + // as `1 >= cap_ + 0` and skips the re-allocation entirely, letting a + // subsequent Push() dereference a null top_. + constexpr size_t kHuge = (size_t{1} << 62); + sonic_json::internal::Stack s(kHuge); + + if (s.Begin() == nullptr) { + EXPECT_EQ(0u, s.Capacity()) + << "Capacity must be 0 when the ctor could not allocate a buffer"; + } + + // And a subsequent Push() must still work — Grow() re-allocates on demand. + s.Push('X'); + ASSERT_NE(s.Begin(), nullptr); + EXPECT_EQ(1u, s.Size()); + EXPECT_EQ('X', *s.Top()); +} + } // namespace diff --git a/tests/document_test.cpp b/tests/document_test.cpp index 3aed1c0e..5db2398b 100644 --- a/tests/document_test.cpp +++ b/tests/document_test.cpp @@ -577,8 +577,6 @@ TYPED_TEST(DocumentTest, SerializeOK) { } } -TYPED_TEST(DocumentTest, SerializeSort) {} - TYPED_TEST(DocumentTest, SonicErrorInvalidKey) { using DNode = typename TypeParam::NodeType; auto iter = this->doc_.MemberBegin(); diff --git a/tests/exp_update_test.cpp b/tests/exp_update_test.cpp index 3de8f1bb..76da60b7 100644 --- a/tests/exp_update_test.cpp +++ b/tests/exp_update_test.cpp @@ -148,4 +148,27 @@ TEST(UpdateLazy, InvalidJson) { } } +TEST(UpdateLazy, NestedInvalidTargetPropagates) { + // Nested invalid target merged with nested valid source: + // the error from the target-side lazy parse must be propagated and + // the update must fail ("{}"); it must NOT be silently overwritten + // by the source's successful parse. + { + std::string target = R"({"a":{"foo":}})"; // nested {"foo":} is invalid + std::string source = R"({"a":{"bar":5}})"; + auto ret = + sonic_json::UpdateLazy(target, source); + EXPECT_STREQ(ret.c_str(), "{}") + << "invalid nested target must propagate as update failure"; + } + { + std::string target = R"({"a":{"foo": @}})"; // invalid token inside nested + std::string source = R"({"a":{"bar":5}})"; + auto ret = + sonic_json::UpdateLazy(target, source); + EXPECT_STREQ(ret.c_str(), "{}") + << "invalid nested target must propagate as update failure"; + } +} + } // namespace diff --git a/tests/parser_oom_test.cpp b/tests/parser_oom_test.cpp new file mode 100644 index 00000000..3b455eb8 --- /dev/null +++ b/tests/parser_oom_test.cpp @@ -0,0 +1,650 @@ +/* + * Copyright ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include + +#include "sonic/dom/handler.h" +#include "sonic/dom/parser.h" +#include "sonic/sonic.h" + +namespace { + +using namespace sonic_json; + +TEST(Document, VersionStringExpandsMacros) { + const std::string v = SONIC_VERSION_STRING; + EXPECT_EQ(std::string::npos, v.find("SONIC_MAJOR_VERSION")); + EXPECT_EQ(std::string::npos, v.find("SONIC_PATCH")); + EXPECT_EQ(2u, std::count(v.begin(), v.end(), '.')); + for (char c : v) { + EXPECT_TRUE(c == '.' || (c >= '0' && c <= '9')); + } +} + +TEST(Document, RejectInvalidSurrogate) { + { + Document doc; + std::string json = "\"\\uDC00\""; + doc.Parse(json); + EXPECT_TRUE(doc.HasParseError()); + } + { + Document doc; + std::string json = "\"\\uD800\\u0041\""; + doc.Parse(json); + EXPECT_TRUE(doc.HasParseError()); + } + { + Document doc; + std::string json = "\"\\uDFFF\""; + doc.Parse(json); + EXPECT_TRUE(doc.HasParseError()); + } + { + Document doc; + std::string json = "\"\\uD83D\\uDE0A\""; + doc.Parse(json); + EXPECT_FALSE(doc.HasParseError()); + } +} + +TEST(Document, ReparseDoesNotLeakPoolMemory) { + Document doc; + doc.Parse(R"({"key":"value","num":42})"); + ASSERT_FALSE(doc.HasParseError()); + size_t size_after_first = doc.GetAllocator().Size(); + + doc.Parse(R"({"key":"value","num":42})"); + ASSERT_FALSE(doc.HasParseError()); + size_t size_after_second = doc.GetAllocator().Size(); + + EXPECT_EQ(size_after_first, size_after_second); +} + +struct AlwaysOomAllocator { + void* Malloc(size_t) { return nullptr; } + void* Realloc(void*, size_t, size_t) { return nullptr; } + static void Free(void*) {} + static constexpr bool kNeedFree = false; +}; + +static std::vector pad_json_bytes(const char* json, size_t len) { + std::vector buf(len + 64, 0); + std::memcpy(buf.data(), json, len); + buf[len] = 'x'; + buf[len + 1] = '"'; + buf[len + 2] = 'x'; + return buf; +} + +TEST(Document, OomDoesNotCrashPushBack) { + AlwaysOomAllocator alloc; + DNode arr; + arr.SetArray(); + DNode val; + val.SetInt64(42); + arr.PushBack(std::move(val), alloc); + EXPECT_EQ(0u, arr.Size()); +} + +TEST(Document, NoFreeAllocatorWithoutClearCompilesAndRuns) { + GenericDocument> doc; + doc.Parse("{}"); + EXPECT_TRUE(doc.HasParseError()); +} + +TEST(Document, CreateMapOomFromEmptyObjectReturnsFalse) { + AlwaysOomAllocator alloc; + DNode obj; + obj.SetObject(); + EXPECT_FALSE(obj.CreateMap(alloc)); +} + +TEST(Document, OomDoesNotCrashCopyObject) { + Document src; + src.Parse(R"({"a":1,"b":2,"c":3})"); + ASSERT_FALSE(src.HasParseError()); + + AlwaysOomAllocator alloc; + DNode dst(src, alloc); + EXPECT_TRUE(dst.IsObject()); + EXPECT_EQ(0u, dst.Size()); +} + +TEST(Document, OomDoesNotCrashCopyArray) { + Document src; + src.Parse(R"([1, 2, 3])"); + ASSERT_FALSE(src.HasParseError()); + + AlwaysOomAllocator alloc; + DNode dst(src, alloc); + EXPECT_TRUE(dst.IsArray()); + EXPECT_EQ(0u, dst.Size()); +} + +struct OomAfterNthAllocator { + size_t remaining = 0; + OomAfterNthAllocator() = default; + explicit OomAfterNthAllocator(size_t n) : remaining(n) {} + void* Malloc(size_t n) { + if (remaining == 0) return nullptr; + --remaining; + return std::malloc(n); + } + void* Realloc(void* p, size_t, size_t new_size) { + if (new_size == 0) { + std::free(p); + return nullptr; + } + if (remaining == 0) return nullptr; + if (p == nullptr) --remaining; + return std::realloc(p, new_size); + } + static void Free(void* p) { std::free(p); } + static constexpr bool kNeedFree = true; +}; + +TEST(Document, CreateMapOomForMapStorageReturnsFalse) { + OomAfterNthAllocator alloc(1); + DNode obj; + obj.SetObject(); + EXPECT_FALSE(obj.CreateMap(alloc)); +} + +TEST(Document, OomDoesNotCrashParseObject) { + OomAfterNthAllocator alloc(1); + GenericDocument> doc(&alloc); + doc.Parse(R"({"a":1,"b":2,"c":3})"); + EXPECT_TRUE(doc.HasParseError()); +} + +TEST(Document, OomDoesNotCrashParseArray) { + OomAfterNthAllocator alloc(1); + GenericDocument> doc(&alloc); + doc.Parse("[1,2,3]"); + EXPECT_TRUE(doc.HasParseError()); +} + +TEST(Document, ParseImplHandlesRepeatedOomCleanly) { + OomAfterNthAllocator alloc(0); + GenericDocument> doc(&alloc); + doc.Parse("{}"); + EXPECT_TRUE(doc.HasParseError()); + EXPECT_EQ(doc.GetParseError(), kErrorNoMem); + doc.Parse("[]"); + EXPECT_TRUE(doc.HasParseError()); + EXPECT_EQ(doc.GetParseError(), kErrorNoMem); +} + +struct SentinelTrackingAllocator { + static int balance; + void* Malloc(size_t n) { + ++balance; + void* p = std::malloc(n); + if (p) std::memset(p, '"', n); + return p; + } + void* Realloc(void* p, size_t, size_t n) { return std::realloc(p, n); } + static void Free(void* p) { + if (p) --balance; + std::free(p); + } +}; +int SentinelTrackingAllocator::balance = 0; + +struct RejectKeyLazySAX { + using Allocator = SentinelTrackingAllocator; + SentinelTrackingAllocator alloc; + bool key_called = false; + Allocator& GetAllocator() { return alloc; } + bool StartObject() { return true; } + bool EndObject(size_t) { return true; } + bool StartArray() { return true; } + bool EndArray(size_t) { return true; } + bool Key(const char*, size_t, size_t) { + key_called = true; + return false; + } + bool Raw(const char*, size_t) { return true; } +}; + +TEST(Document, ParseLazyEscapedKeyOomReportsNoMem) { + struct OomLazySAX { + using Allocator = OomAfterNthAllocator; + OomAfterNthAllocator alloc; + OomLazySAX() : alloc(0) {} + Allocator& GetAllocator() { return alloc; } + bool StartObject() { return true; } + bool EndObject(size_t) { return true; } + bool StartArray() { return true; } + bool EndArray(size_t) { return true; } + bool Key(const char*, size_t, size_t) { return true; } + bool Raw(const char*, size_t) { return true; } + }; + OomLazySAX sax; + Parser p; + const char* json = R"({"\n": 1})"; + auto buf = pad_json_bytes(json, std::strlen(json)); + auto res = p.ParseLazy(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kErrorNoMem, res.Error()); +} + +struct TrackingNthOomAllocator { + static int balance; + static size_t remaining; + void* Malloc(size_t n) { + if (remaining == 0) return nullptr; + --remaining; + ++balance; + return std::malloc(n); + } + void* Realloc(void* p, size_t, size_t n) { return std::realloc(p, n); } + static void Free(void* p) { + if (p) --balance; + std::free(p); + } + static constexpr bool kNeedFree = true; +}; +int TrackingNthOomAllocator::balance = 0; +size_t TrackingNthOomAllocator::remaining = 0; + +TEST(Document, AddMemberWithoutMapOnOomLeavesObjectEmpty) { + OomAfterNthAllocator alloc(0); + DNode obj; + obj.SetObject(); + DNode val; + val.SetInt64(1); + obj.AddMember("k", std::move(val), alloc); + EXPECT_EQ(0u, obj.Size()); +} + +TEST(LazySAXHandler, EndObjectOomLeavesStackMatchingSuccessArm) { + TrackingNthOomAllocator::balance = 0; + TrackingNthOomAllocator::remaining = 1; + TrackingNthOomAllocator alloc; + + using Node = DNode; + LazySAXHandler sax(alloc); + + ASSERT_TRUE(sax.StartObject()); + constexpr char kKey[] = "key"; + void* buf = alloc.Malloc(sizeof(kKey)); + ASSERT_NE(nullptr, buf); + std::memcpy(buf, kKey, sizeof(kKey)); + ASSERT_TRUE(sax.Key(static_cast(buf), sizeof(kKey) - 1, 1)); + ASSERT_TRUE(sax.Raw("1", 1)); + ASSERT_TRUE(sax.EndObject(1)); + EXPECT_TRUE(sax.oom_); + EXPECT_EQ(sizeof(Node), sax.stack_.Size()); +} + +TEST(Document, ParseLazyFreesEscapedKeyOnKeyFailure) { + SentinelTrackingAllocator::balance = 0; + + RejectKeyLazySAX sax; + Parser p; + const char* json = R"({"\n": 1})"; + auto buf = pad_json_bytes(json, std::strlen(json)); + p.ParseLazy(buf.data(), std::strlen(json), sax); + + ASSERT_TRUE(sax.key_called); + EXPECT_EQ(0, SentinelTrackingAllocator::balance); +} + +struct RejectingSAX { + bool reject_start_array = false; + bool reject_start_object = false; + bool reject_end_array = false; + bool reject_end_object = false; + bool reject_key = false; + bool reject_string = false; + bool reject_int = false; + bool reject_uint = false; + bool reject_double = false; + bool reject_numstr = false; + bool reject_raw = false; + bool reject_null = false; + bool reject_bool = false; + + bool Null() { return !reject_null; } + bool Bool(bool) { return !reject_bool; } + bool Int(int64_t) { return !reject_int; } + bool Uint(uint64_t) { return !reject_uint; } + bool Double(double) { return !reject_double; } + bool NumStr(StringView) { return !reject_numstr; } + bool Raw(const char*, size_t) { return !reject_raw; } + bool Key(StringView) { return !reject_key; } + bool String(StringView) { return !reject_string; } + bool StartArray() { return !reject_start_array; } + bool EndArray(uint32_t) { return !reject_end_array; } + bool StartObject() { return !reject_start_object; } + bool EndObject(uint32_t) { return !reject_end_object; } +}; + +static std::vector pad_json_for_parser(const char* json, size_t len) { + std::vector buf(len + 64, 0); + std::memcpy(buf.data(), json, len); + buf[len] = 'x'; + buf[len + 1] = '"'; + buf[len + 2] = 'x'; + return buf; +} + +TEST(Parser, StartArrayFalseAbortsParse) { + const char* json = "[1,2,3]"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_start_array = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +TEST(Parser, StartObjectFalseAbortsParse) { + const char* json = R"({"a":1})"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_start_object = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +TEST(Parser, EndArrayFalseAbortsParse) { + const char* json = "[1,2,3]"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_end_array = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +TEST(Parser, EndObjectFalseAbortsParse) { + const char* json = R"({"a":1})"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_end_object = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +TEST(Parser, KeyFalseAbortsParseWhenNotCheckKeyReturn) { + const char* json = R"({"a":1})"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_key = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +TEST(Parser, StringFalseAbortsParse) { + const char* json = R"(["x"])"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_string = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +TEST(Parser, NumberIntRejectionReportsSaxTermination) { + const char* json = "1"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_uint = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +TEST(Parser, NumberNegativeIntRejectionReportsSaxTermination) { + const char* json = "-1"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_int = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +TEST(Parser, NumberDoubleRejectionReportsSaxTermination) { + const char* json = "1.5"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_double = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +TEST(Parser, NumberIntegerAsRawRejectionReportsSaxTermination) { + const char* json = "123"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_raw = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +TEST(Parser, NestedNumberRejectionReportsSaxTermination) { + struct Case { + const char* json; + bool reject_double; + } cases[] = { + {R"({"a":1})", false}, + {R"([1])", false}, + {R"({"a":1.5})", true}, + }; + for (const auto& c : cases) { + auto buf = pad_json_for_parser(c.json, std::strlen(c.json)); + RejectingSAX sax; + if (c.reject_double) { + sax.reject_double = true; + } else { + sax.reject_uint = true; + } + Parser p; + auto res = p.Parse(buf.data(), std::strlen(c.json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); + } +} + +TEST(Parser, NumberOverflowAsNumStrRejectionReportsSaxTermination) { + const char* json = "18446744073709551616"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_numstr = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +TEST(Parser, NullRejectionReportsSaxTermination) { + const char* json = "null"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_null = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +TEST(Parser, BoolRejectionReportsSaxTermination) { + for (const char* json : {"true", "false"}) { + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_bool = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); + } +} + +TEST(Parser, PrimitiveRootStringRejectionReportsSaxTermination) { + const char* json = R"("x")"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_string = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +struct SkippingKeyCheckReturnSAX { + static constexpr bool check_key_return = true; + int keys_seen = 0; + bool Null() { return true; } + bool Bool(bool) { return true; } + bool Int(int64_t) { return true; } + bool Uint(uint64_t) { return true; } + bool Double(double) { return true; } + bool NumStr(StringView) { return true; } + bool Key(StringView) { + ++keys_seen; + return false; + } + bool String(StringView) { return true; } + bool StartArray() { return true; } + bool EndArray(uint32_t) { return true; } + bool StartObject() { return true; } + bool EndObject(uint32_t) { return true; } +}; + +TEST(Parser, KeyFalsePreservesSkipSemanticsUnderCheckKeyReturn) { + const char* json = R"({"a":1,"b":2})"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + SkippingKeyCheckReturnSAX sax; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kErrorNone, res.Error()); + EXPECT_EQ(2, sax.keys_seen); +} + +struct RejectAllLazySax { + using Allocator = SONIC_DEFAULT_ALLOCATOR; + Allocator alloc_; + Allocator& GetAllocator() { return alloc_; } + bool StartArray() { return false; } + bool EndArray(size_t) { return false; } + bool StartObject() { return false; } + bool EndObject(size_t) { return false; } + bool Key(const char*, size_t, size_t) { return false; } + bool Raw(const char*, size_t) { return false; } +}; + +TEST(ParseLazy, RawRejectionReportsSaxTermination) { + RejectAllLazySax sax; + Parser p; + const char* j = "42"; + auto buf = pad_json_bytes(j, 2); + auto r = p.ParseLazy(buf.data(), 2, sax); + EXPECT_EQ(r.Error(), kSaxTermination); +} + +TEST(ParseLazy, StartArrayRejectionReportsSaxTermination) { + RejectAllLazySax sax; + Parser p; + const char* j = "[1,2,3]"; + auto buf = pad_json_bytes(j, 7); + auto r = p.ParseLazy(buf.data(), 7, sax); + EXPECT_EQ(r.Error(), kSaxTermination); +} + +TEST(ParseLazy, StartObjectRejectionReportsSaxTermination) { + RejectAllLazySax sax; + Parser p; + const char* j = R"({"k":1})"; + auto buf = pad_json_bytes(j, 7); + auto r = p.ParseLazy(buf.data(), 7, sax); + EXPECT_EQ(r.Error(), kSaxTermination); +} + +struct AcceptAllLazySax { + using Allocator = SONIC_DEFAULT_ALLOCATOR; + Allocator alloc_; + Allocator& GetAllocator() { return alloc_; } + bool StartArray() { return true; } + bool EndArray(size_t) { return true; } + bool StartObject() { return true; } + bool EndObject(size_t) { return true; } + bool Key(const char*, size_t, size_t) { return true; } + bool Raw(const char*, size_t) { return true; } +}; + +TEST(ParseLazy, AcceptAllStillCompletesCleanly) { + AcceptAllLazySax sax; + Parser p; + const char* j = R"({"a":1,"b":[2,3]})"; + auto buf = pad_json_bytes(j, std::strlen(j)); + auto r = p.ParseLazy(buf.data(), std::strlen(j), sax); + EXPECT_EQ(r.Error(), kErrorNone); +} + +struct StringKeyCountingSAX { + int string_calls = 0; + int key_calls = 0; + bool Null() { return true; } + bool Bool(bool) { return true; } + bool Int(int64_t) { return true; } + bool Uint(uint64_t) { return true; } + bool Double(double) { return true; } + bool NumStr(StringView) { return true; } + bool Raw(const char*, size_t) { return true; } + bool Key(StringView) { + ++key_calls; + return true; + } + bool String(StringView) { + ++string_calls; + return true; + } + bool StartArray() { return true; } + bool EndArray(uint32_t) { return true; } + bool StartObject() { return true; } + bool EndObject(uint32_t) { return true; } +}; + +TEST(Parser, InvalidSurrogateInValueDoesNotInvokeStringCallback) { + const char* json = R"(["\uDC00"])"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + StringKeyCountingSAX sax; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_TRUE(res.Error() != kErrorNone); + EXPECT_EQ(0, sax.string_calls); +} + +TEST(Parser, InvalidSurrogateInKeyDoesNotInvokeKeyCallback) { + const char* json = R"({"\uDC00":1})"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + StringKeyCountingSAX sax; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_TRUE(res.Error() != kErrorNone); + EXPECT_EQ(0, sax.key_calls); +} + +} // namespace diff --git a/tests/writebuffer_test.cpp b/tests/writebuffer_test.cpp index 84c9129f..072ab5bb 100644 --- a/tests/writebuffer_test.cpp +++ b/tests/writebuffer_test.cpp @@ -18,6 +18,8 @@ #include +#include "sonic/internal/stack.h" + namespace { using namespace sonic_json; @@ -78,6 +80,17 @@ TEST(WriteBuffer, ToString) { wb.Push('c'); EXPECT_STREQ(wb.ToString(), "c"); } + // ToString()/ToStringView() must be callable on a const WriteBuffer&: + // they write the terminator into pre-reserved slack via a `mutable` + // stack, so the logical observable state does not change. This + // preserves v1.x source compatibility for callers that hold a const + // reference (e.g. CRTP str_impl() methods). + static_assert( + std::is_invocable_v, + "ToString() must be callable on a const WriteBuffer"); + static_assert(std::is_invocable_v, + "ToStringView() must be callable on a const WriteBuffer"); { const WriteBuffer cwb; EXPECT_STREQ(cwb.ToString(), ""); @@ -90,6 +103,36 @@ TEST(WriteBuffer, ToString) { } } +// Reserve must not update cap_ when realloc fails: callers check +// Size() < Capacity() to decide whether writes fit in the backing buffer. +TEST(Stack, ReservePreservesCapOnOom) { + internal::Stack s(256); + ASSERT_EQ(256u, s.Capacity()); + constexpr size_t kHuge = static_cast(1) << 60; + s.Reserve(kHuge); + EXPECT_EQ(256u, s.Capacity()); +} + +// ToString's fast path must not reallocate when one slack byte already +// covers the terminator: Grow(1) would otherwise fire at Size == Cap - 1 +// and invalidate any pointer a prior ToString()/Begin() handed out. +TEST(WriteBuffer, ToStringIsIdempotentWhenCapacityHasSlack) { + WriteBuffer wb(16); + const size_t cap_before = wb.Capacity(); + ASSERT_EQ(cap_before, 16u); + // Per-char pushes avoid Push(s, n)'s Grow(n+1), landing on Size == Cap - 1. + const char* text = "abcdefghijklmno"; + for (size_t i = 0; i < 15; ++i) wb.Push(text[i]); + ASSERT_EQ(wb.Size(), 15u); + ASSERT_EQ(wb.Capacity(), cap_before); + + const char* p1 = wb.ToString(); + EXPECT_STREQ(p1, "abcdefghijklmno"); + EXPECT_EQ(wb.Capacity(), cap_before); + const char* p2 = wb.ToString(); + EXPECT_EQ(p1, p2); +} + TEST(WriteBuffer, StringSize) { { WriteBuffer wb;