From 5dbf83cb6f5275e5f3b833d6ae024e4a35661d02 Mon Sep 17 00:00:00 2001 From: yangzhengguo Date: Wed, 22 Apr 2026 11:45:48 +0800 Subject: [PATCH] fix: handle OOM correctly in parser and DOM code fix OOM handling in parser, SAX, and DOM paths avoid null dereferences during mutation and parsing add tests for parser, lazy parse, and DOM OOM cases --- benchmark/sonic.hpp | 4 +- docs/usage.md | 21 + include/sonic/allocator.h | 30 +- include/sonic/dom/dynamicnode.h | 81 ++- include/sonic/dom/generic_document.h | 38 +- include/sonic/dom/handler.h | 96 ++- include/sonic/dom/parser.h | 240 ++++--- include/sonic/dom/schema_handler.h | 42 +- include/sonic/experiment/lazy_update.h | 24 +- .../internal/arch/common/unicode_common.h | 9 + include/sonic/internal/stack.h | 10 +- include/sonic/sonic.h | 8 +- include/sonic/writebuffer.h | 13 +- tests/allocator_test.cpp | 142 ++++ tests/document_test.cpp | 2 - tests/exp_update_test.cpp | 23 + tests/parser_oom_test.cpp | 650 ++++++++++++++++++ tests/writebuffer_test.cpp | 43 ++ 18 files changed, 1290 insertions(+), 186 deletions(-) create mode 100644 tests/parser_oom_test.cpp diff --git a/benchmark/sonic.hpp b/benchmark/sonic.hpp index fb36e859..76baa062 100644 --- a/benchmark/sonic.hpp +++ b/benchmark/sonic.hpp @@ -22,9 +22,7 @@ template class SonicStringResult : public StringResult> { public: - std::string_view str_impl() const { - return const_cast(wb).ToString(); - } + std::string_view str_impl() const { return wb.ToString(); } sonic_json::WriteBuffer wb; }; diff --git a/docs/usage.md b/docs/usage.md index 1e74916b..7b970b5b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -45,6 +45,10 @@ Document is the manager of Nodes. Sonic-Cpp organizes JSON value as a tree. Document also the root of JSON value tree. There is an allocator in Document, which you should use to allocate memory for Node and Document. +> **Note:** Re-parsing a `Document` discards the previous tree. Any raw +> pointers, iterators, or `DNode*` obtained from an earlier `Parse()` become +> invalid and must be re-acquired after each parse. + ### Query in object There are two ways to find members: `operator[]` or `FindMember`. We recommend using `FindMember`. @@ -209,6 +213,23 @@ using MyDoc = sonic_json::GenericDocument; Sonic uses rapidjson's allocator, you can define your own allocator follow [rapidjson allocaotr](http://rapidjson.org/md_doc_internals.html#InternalAllocator) +### Detecting OOM on Post-Parse Mutations + +DNode mutations like `PushBack`, `AddMember`, and `Reserve` do not return a +status code. When you use `MemoryPoolAllocator`, you can check +`HadOom()` / `ClearOom()` around these operations if you need to detect an +allocation failure: + +```c++ +auto& alloc = doc.GetAllocator(); +alloc.ClearOom(); +doc.PushBack(v, alloc); +if (alloc.HadOom()) { /* handle OOM */ } +``` + +The flag is sticky until cleared. This is a `MemoryPoolAllocator` feature, not +part of the abstract allocator concept. + ### JSON Pointer Sonic provides a JsonPointer class but doesn't support resolving the JSON pointer syntax of [RFC 6901](https://www.rfc-editor.org/rfc/rfc6901). We will support diff --git a/include/sonic/allocator.h b/include/sonic/allocator.h index 52958acc..6885502d 100644 --- a/include/sonic/allocator.h +++ b/include/sonic/allocator.h @@ -190,6 +190,9 @@ class MemoryPoolAllocator { ownBaseAllocator; //!< base allocator created by this object. size_t refcount; bool ownBuffer; + //!< Sticky OOM flag shared across refcounted copies. Atomic because + //!< the per-instance SpinLock does not synchronize different copies. + std::atomic hadOom; }; static const size_t SIZEOF_SHARED_DATA = SONIC_ALIGN(sizeof(SharedData)); @@ -226,6 +229,7 @@ class MemoryPoolAllocator { : 0)) { sonic_assert(baseAllocator_ != 0); sonic_assert(shared_ != 0); + new (&shared_->hadOom) std::atomic(false); if (baseAllocator) { shared_->ownBaseAllocator = 0; } else { @@ -258,12 +262,13 @@ class MemoryPoolAllocator { baseAllocator_(baseAllocator ? baseAllocator : new BaseAllocator()), shared_(static_cast(AlignBuffer(buffer, size))) { sonic_assert(size >= SIZEOF_SHARED_DATA + SIZEOF_CHUNK_HEADER); + new (&shared_->hadOom) std::atomic(false); shared_->chunkHead = GetChunkHead(shared_); shared_->chunkHead->capacity = size - SIZEOF_SHARED_DATA - SIZEOF_CHUNK_HEADER; shared_->chunkHead->size = 0; shared_->chunkHead->next = 0; - shared_->ownBaseAllocator = 0; + shared_->ownBaseAllocator = baseAllocator ? 0 : baseAllocator_; shared_->ownBuffer = false; shared_->refcount = 1; } @@ -312,6 +317,8 @@ class MemoryPoolAllocator { } Clear(); BaseAllocator* a = shared_->ownBaseAllocator; + using AtomicBool = std::atomic; + shared_->hadOom.~AtomicBool(); if (shared_->ownBuffer) { baseAllocator_->Free(shared_); } @@ -371,7 +378,10 @@ class MemoryPoolAllocator { LOCK_GUARD; if (sonic_unlikely(shared_->chunkHead->size + size > shared_->chunkHead->capacity)) { - if (!AddChunk(cp_.ChunkSize(size))) return NULL; + if (!AddChunk(cp_.ChunkSize(size))) { + shared_->hadOom.store(true, std::memory_order_release); + return NULL; + } } void* buffer = GetChunkBuffer(shared_) + shared_->chunkHead->size; @@ -412,9 +422,22 @@ class MemoryPoolAllocator { if (originalSize) std::memcpy(newBuffer, originalPtr, originalSize); return newBuffer; } + // Mark OOM even on the Malloc-copy fallback so the flag is set + // regardless of which internal path actually failed. + shared_->hadOom.store(true, std::memory_order_release); return nullptr; } + // Lets callers distinguish an OOM from a logical null (e.g. Malloc(0)). + bool HadOom() const { + sonic_assert(shared_->refcount > 0); + return shared_->hadOom.load(std::memory_order_acquire); + } + void ClearOom() { + sonic_assert(shared_->refcount > 0); + shared_->hadOom.store(false, std::memory_order_release); + } + //! Frees a memory block (concept Allocator) static void Free(void* ptr) noexcept { (void)ptr; } // Do nothing @@ -486,7 +509,8 @@ class MapAllocator { MapAllocator(const MapAllocator& rhs) : alloc_(rhs.alloc_) {} pointer allocate(size_type n, const void* = nullptr) { - return (T*)alloc_->Malloc(n * sizeof(T)); + if (alloc_ == nullptr || n == 0) return nullptr; + return static_cast(alloc_->Malloc(n * sizeof(T))); } void deallocate(void* p, size_type) { alloc_->Free(p); } diff --git a/include/sonic/dom/dynamicnode.h b/include/sonic/dom/dynamicnode.h index 74cc99aa..26786567 100644 --- a/include/sonic/dom/dynamicnode.h +++ b/include/sonic/dom/dynamicnode.h @@ -32,6 +32,9 @@ namespace sonic_json { +// OOM invariant: mutating operations (Reserve, AddMember, PushBack, ...) +// leave the node unchanged on allocation failure rather than propagating an +// error. Callers that need to detect OOM should use Allocator::HadOom(). template class DNode : public GenericNode> { public: @@ -82,6 +85,11 @@ class DNode : public GenericNode> { this->o.len = rhs.getTypeAndLen(); // Copy size and type. if (count > 0) { void* mem = containerMalloc(count, alloc); + if (sonic_unlikely(mem == nullptr)) { + this->setLength(0, kObject); + setChildren(nullptr); + break; + } rhsNodeType* rn = rhs.getObjChildrenFirst(); DNode* ln = (DNode*)((char*)mem + sizeof(MetaNode)); for (size_t i = 0; i < count * 2; i += 2) { @@ -98,8 +106,13 @@ class DNode : public GenericNode> { size_t a_size = rhs.Size(); this->a.len = rhs.getTypeAndLen(); // Copy size and type. if (a_size > 0) { - rhsNodeType* rn = rhs.getArrChildrenFirst(); void* mem = containerMalloc(a_size, alloc); + if (sonic_unlikely(mem == nullptr)) { + this->setLength(0, kArray); + setChildren(nullptr); + break; + } + rhsNodeType* rn = rhs.getArrChildrenFirst(); DNode* ln = (DNode*)((char*)mem + sizeof(MetaNode)); for (size_t i = 0; i < a_size; ++i) { new (ln + i) DNode(*(rn + i), alloc, copyString); @@ -302,14 +315,17 @@ class DNode : public GenericNode> { bool CreateMap(Allocator& alloc) { sonic_assert(this->IsObject()); sonic_assert(this->Capacity() >= this->Size()); - // if (this->Size() == 0) return false; + // Empty object: reserve meta storage first so children() is non-null. + // If the reserve OOMs, children() stays null and we bail instead of + // dereferencing it via getMapUnsafe() / setMap. if (nullptr == children()) { this->memberReserveImpl(16, alloc); + if (nullptr == children()) return false; } - if (getMapUnsfe()) return true; + if (getMapUnsafe()) return true; map_type* map = static_cast(alloc.Malloc(sizeof(map_type))); + if (nullptr == map) return false; new (map) map_type(MAType(&alloc)); - // SetMap(map); MemberNode* m = (MemberNode*)getObjChildrenFirstUnsafe(); for (size_t i = 0; i < this->Size(); ++i) { map->emplace(std::make_pair((m + i)->name.GetStringView(), i)); @@ -514,8 +530,9 @@ class DNode : public GenericNode> { DNode& reserveImpl(size_t new_cap, Allocator& alloc) { if (new_cap > this->Capacity()) { - setChildren(containerRealloc(children(), this->Capacity(), new_cap, - alloc)); + void* mem = + containerRealloc(children(), this->Capacity(), new_cap, alloc); + if (sonic_likely(mem != nullptr)) setChildren(mem); } return *this; } @@ -611,10 +628,13 @@ class DNode : public GenericNode> { if (new_cap > this->Capacity()) { void* old_ptr = children(); size_t old_cap = this->Capacity(); - setChildren( - containerRealloc(old_ptr, old_cap, new_cap, alloc)); - if (old_cap == 0) { - setMap(nullptr); // Set map as nullptr when first alloc memory. + void* mem = + containerRealloc(old_ptr, old_cap, new_cap, alloc); + if (sonic_likely(mem != nullptr)) { + setChildren(mem); + if (old_cap == 0) { + setMap(nullptr); // Set map as nullptr when first alloc memory. + } } } return *this; @@ -650,10 +670,9 @@ class DNode : public GenericNode> { sonic_force_inline void* containerMalloc(size_t cap, Allocator& alloc) { size_t alloc_size = cap * sizeof(T) + sizeof(MetaNode); void* mem = alloc.Malloc(alloc_size); - // init Metanode - MetaNode* meta = static_cast(mem); - new (meta) MetaNode(cap); - + if (sonic_likely(mem != nullptr)) { + new (static_cast(mem)) MetaNode(cap); + } return mem; } @@ -663,10 +682,9 @@ class DNode : public GenericNode> { size_t old_size = old_cap * sizeof(T) + sizeof(MetaNode); size_t new_size = new_cap * sizeof(T) + sizeof(MetaNode); void* mem = alloc.Realloc(old_ptr, old_size, new_size); - // init Metanode - MetaNode* meta = static_cast(mem); - meta->SetMetaCap(new_cap); - + if (sonic_likely(mem != nullptr)) { + static_cast(mem)->SetMetaCap(new_cap); + } return mem; } @@ -740,7 +758,7 @@ class DNode : public GenericNode> { return ((MetaNode*)(this->o.next.children))->map; } - sonic_force_inline map_type* getMapUnsfe() const { + sonic_force_inline map_type* getMapUnsafe() const { sonic_assert(this->IsObject()); return ((MetaNode*)(this->o.next.children))->map; } @@ -810,13 +828,17 @@ class DNode : public GenericNode> { size_t count = this->Size(); if (count >= this->Capacity()) { if (this->Capacity() == 0) { - setChildren(containerMalloc(k_default_obj_cap, alloc)); + void* mem = containerMalloc(k_default_obj_cap, alloc); + if (sonic_unlikely(mem == nullptr)) return this->MemberEnd(); + setChildren(mem); } else { size_t cap = this->Capacity(); cap += (cap + 1) / 2; // grow by factor 1.5 void* old_ptr = children(); - setChildren(containerRealloc(old_ptr, this->Capacity(), cap, - alloc)); + void* mem = + containerRealloc(old_ptr, this->Capacity(), cap, alloc); + if (sonic_unlikely(mem == nullptr)) return this->MemberEnd(); + setChildren(mem); } } @@ -824,6 +846,7 @@ class DNode : public GenericNode> { DNode name; if (copyKey) { name.SetString(key, alloc); + if (sonic_unlikely(name.IsNull())) return this->MemberEnd(); } else { name.SetString(key); } @@ -845,11 +868,11 @@ class DNode : public GenericNode> { if (nullptr == children()) { goto not_find; } - if (getMapUnsfe()) { - auto it = getMapUnsfe()->find(MSType(key.data(), key.size())); - if (it != getMapUnsfe()->end()) { + if (getMapUnsafe()) { + auto it = getMapUnsafe()->find(MSType(key.data(), key.size())); + if (it != getMapUnsafe()->end()) { m = memberBeginUnsafe() + it->second; - getMapUnsfe()->erase(it); + getMapUnsafe()->erase(it); goto find; } @@ -927,9 +950,9 @@ class DNode : public GenericNode> { if (this->Size() >= cap) { size_t new_cap = cap ? cap + (cap + 1) / 2 : k_default_array_cap; void* old_ptr = this->a.next.children; - DNode* new_child = - (DNode*)containerRealloc(old_ptr, cap, new_cap, alloc); - this->a.next.children = new_child; + void* new_ptr = containerRealloc(old_ptr, cap, new_cap, alloc); + if (sonic_unlikely(new_ptr == nullptr)) return *this; + this->a.next.children = new_ptr; } // add value to the last pos DNode& last = *(this->End()); diff --git a/include/sonic/dom/generic_document.h b/include/sonic/dom/generic_document.h index 2794344c..d3a0d22b 100644 --- a/include/sonic/dom/generic_document.h +++ b/include/sonic/dom/generic_document.h @@ -17,6 +17,8 @@ #pragma once #include +#include +#include #include "sonic/dom/dynamicnode.h" #include "sonic/dom/json_pointer.h" @@ -24,6 +26,14 @@ namespace sonic_json { +namespace internal { +template +struct has_clear : std::false_type {}; +template +struct has_clear().Clear())>> + : std::true_type {}; +} // namespace internal + template class Parser; template @@ -195,8 +205,15 @@ class GenericDocument : public NodeType { } void destroyDom() { - if (!Allocator::kNeedFree) { + if constexpr (!Allocator::kNeedFree) { this->setType(kNull); + if (own_alloc_) { + if constexpr (internal::has_clear::value) { + alloc_->Clear(); + } + str_ = nullptr; + schema_str_ = nullptr; + } return; } // NOTE: must free dynamic nodes at first @@ -213,15 +230,19 @@ class GenericDocument : public NodeType { GenericDocument& parseImpl(const char* json, size_t len) { Parser p; SAXHandler sax(*alloc_); + if (!sax.SetUp(StringView(json, len))) { + parse_result_ = kErrorNoMem; + return *this; + } parse_result_ = allocateStringBuffer(json, len); if (sonic_unlikely(HasParseError())) { return *this; } - if (!sax.SetUp(StringView(json, len))) { + parse_result_ = p.Parse(str_, len, sax); + if (sonic_unlikely(sax.oom_)) { parse_result_ = kErrorNoMem; return *this; } - parse_result_ = p.Parse(str_, len, sax); if (sonic_unlikely(HasParseError())) { return *this; } @@ -233,15 +254,22 @@ class GenericDocument : public NodeType { GenericDocument& parseSchemaImpl(const char* json, size_t len) { Parser p; SchemaHandler sax(this, *alloc_); + if (!sax.SetUp(StringView(json, len))) { + parse_result_ = kErrorNoMem; + return *this; + } parse_result_ = allocateSchemaStringBuffer(json, len); if (sonic_unlikely(HasParseError())) { return *this; } - if (!sax.SetUp(StringView(json, len))) { + parse_result_ = p.Parse(schema_str_, len, sax); + if (sonic_unlikely(sax.oom_)) { parse_result_ = kErrorNoMem; return *this; } - parse_result_ = p.Parse(schema_str_, len, sax); + if (sonic_unlikely(HasParseError())) { + return *this; + } return *this; } diff --git a/include/sonic/dom/handler.h b/include/sonic/dom/handler.h index c7475d37..d3d95544 100644 --- a/include/sonic/dom/handler.h +++ b/include/sonic/dom/handler.h @@ -34,13 +34,16 @@ class SAXHandler { using Allocator = typename NodeType::AllocatorType; using MemberType = typename NodeType::MemberNode; + bool oom_{false}; + SAXHandler() = default; SAXHandler(Allocator &alloc) : alloc_(&alloc) {} SAXHandler(const SAXHandler &) = delete; SAXHandler &operator=(const SAXHandler &rhs) = delete; SAXHandler(SAXHandler &&rhs) - : st_(rhs.st_), + : oom_(rhs.oom_), + st_(rhs.st_), np_(rhs.np_), cap_(rhs.cap_), parent_(rhs.parent_), @@ -49,6 +52,7 @@ class SAXHandler { rhs.cap_ = 0; rhs.np_ = 0; rhs.alloc_ = 0; + rhs.oom_ = false; } SAXHandler &operator=(SAXHandler &&rhs) { @@ -58,12 +62,14 @@ class SAXHandler { cap_ = rhs.cap_; parent_ = rhs.parent_; alloc_ = rhs.alloc_; + oom_ = rhs.oom_; rhs.st_ = nullptr; rhs.np_ = 0; rhs.cap_ = 0; rhs.parent_ = 0; rhs.alloc_ = 0; + rhs.oom_ = false; return *this; } @@ -74,9 +80,10 @@ class SAXHandler { size_t cap = len / 2 + 2; if (cap < 16) cap = 16; if (!st_ || cap_ < cap) { - st_ = static_cast( + NodeType *new_st = static_cast( std::realloc((void *)(st_), sizeof(NodeType) * cap)); - if (!st_) return false; + if (!new_st) return false; + st_ = new_st; cap_ = cap; } return true; @@ -170,9 +177,17 @@ class SAXHandler { obj.setLength(pairs, kObject); if (pairs) { void *mem = obj.template containerMalloc(pairs, *alloc_); - obj.setChildren(mem); - internal::Xmemcpy( - (void *)obj.getObjChildrenFirstUnsafe(), (void *)(&obj + 1), pairs); + if (sonic_unlikely(mem == nullptr)) { + NodeType *children = &obj + 1; + for (size_t i = 0; i < size_t(pairs) * 2; i++) children[i].~NodeType(); + obj.setLength(0, kObject); + obj.setChildren(nullptr); + oom_ = true; + } else { + obj.setChildren(mem); + internal::Xmemcpy( + (void *)obj.getObjChildrenFirstUnsafe(), (void *)(&obj + 1), pairs); + } } else { obj.setChildren(nullptr); } @@ -186,9 +201,18 @@ class SAXHandler { size_t old = arr.o.next.ofs; arr.setLength(count, kArray); if (count) { - arr.setChildren(arr.template containerMalloc(count, *alloc_)); - internal::Xmemcpy( - (void *)arr.getArrChildrenFirstUnsafe(), (void *)(&arr + 1), count); + void *mem = arr.template containerMalloc(count, *alloc_); + if (sonic_unlikely(mem == nullptr)) { + NodeType *children = &arr + 1; + for (size_t i = 0; i < count; i++) children[i].~NodeType(); + arr.setLength(0, kArray); + arr.setChildren(nullptr); + oom_ = true; + } else { + arr.setChildren(mem); + internal::Xmemcpy( + (void *)arr.getArrChildrenFirstUnsafe(), (void *)(&arr + 1), count); + } } else { arr.setChildren(nullptr); } @@ -214,14 +238,18 @@ class SAXHandler { if (sonic_likely(np_ < cap_)) { np_++; return true; - } else { - cap_ += cap_; - st_ = static_cast( - std::realloc((void *)(st_), sizeof(NodeType) * cap_)); - if (!st_) return false; - np_++; - return true; } + size_t new_cap = cap_ * 2; + NodeType *new_st = static_cast( + std::realloc((void *)(st_), sizeof(NodeType) * new_cap)); + if (!new_st) { + oom_ = true; + return false; + } + st_ = new_st; + cap_ = new_cap; + np_++; + return true; } NodeType *st_{nullptr}; @@ -262,10 +290,20 @@ class LazySAXHandler { NodeType &arr = *stack_.template Begin(); arr.setLength(count, kArray); if (count) { - arr.setChildren(arr.template containerMalloc(count, *alloc_)); - internal::Xmemcpy( - (void *)arr.getArrChildrenFirstUnsafe(), (void *)(&arr + 1), count); - stack_.Pop(count); + void *mem = arr.template containerMalloc(count, *alloc_); + if (sonic_unlikely(mem == nullptr)) { + NodeType *children = &arr + 1; + for (size_t i = 0; i < count; i++) children[i].~NodeType(); + stack_.Pop(count); + arr.setLength(0, kArray); + arr.setChildren(nullptr); + oom_ = true; + } else { + arr.setChildren(mem); + internal::Xmemcpy( + (void *)arr.getArrChildrenFirstUnsafe(), (void *)(&arr + 1), count); + stack_.Pop(count); + } } else { arr.setChildren(nullptr); } @@ -277,10 +315,19 @@ class LazySAXHandler { obj.setLength(pairs, kObject); if (pairs) { void *mem = obj.template containerMalloc(pairs, *alloc_); - obj.setChildren(mem); - internal::Xmemcpy( - (void *)obj.getObjChildrenFirstUnsafe(), (void *)(&obj + 1), pairs); - stack_.Pop(pairs); + if (sonic_unlikely(mem == nullptr)) { + NodeType *children = &obj + 1; + for (size_t i = 0; i < size_t(pairs) * 2; i++) children[i].~NodeType(); + stack_.Pop(pairs); + obj.setLength(0, kObject); + obj.setChildren(nullptr); + oom_ = true; + } else { + obj.setChildren(mem); + internal::Xmemcpy( + (void *)obj.getObjChildrenFirstUnsafe(), (void *)(&obj + 1), pairs); + stack_.Pop(pairs); + } } else { obj.setChildren(nullptr); } @@ -307,6 +354,7 @@ class LazySAXHandler { // allocator for node stack and string buffers Allocator *alloc_{nullptr}; internal::Stack stack_{}; + bool oom_{false}; }; } // namespace sonic_json diff --git a/include/sonic/dom/parser.h b/include/sonic/dom/parser.h index c1c7458b..3521e6c2 100644 --- a/include/sonic/dom/parser.h +++ b/include/sonic/dom/parser.h @@ -140,12 +140,16 @@ class Parser { template sonic_force_inline bool parseStrInPlace(SAX &sax) { - return sax.String(parseStringHelper()); + StringView sv = parseStringHelper(); + if (sonic_unlikely(err_ != kErrorNone)) return true; + return sax.String(sv); } template sonic_force_inline bool parseKeyInPlace(SAX &sax) { - return sax.Key(parseStringHelper()); + StringView sv = parseStringHelper(); + if (sonic_unlikely(err_ != kErrorNone)) return true; + return sax.Key(sv); } sonic_force_inline bool carry_one(char c, uint64_t &sum) const { @@ -185,7 +189,6 @@ class Parser { d /= internal::kPow10Tab[-exp10]; return true; } - return false; } SonicError parseFloatEiselLemire64(double &dbl, int exp10, uint64_t man, @@ -245,33 +248,33 @@ class Parser { } \ } while (0) -#define SET_INT_AND_RETURN(int_val) \ - do { \ - if (!sax.Int(int_val)) RETURN_SET_ERROR_CODE(kParseErrorInvalidChar); \ - RETURN_SET_ERROR_CODE(kErrorNone); \ +#define SET_INT_AND_RETURN(int_val) \ + do { \ + if (!sax.Int(int_val)) RETURN_SET_ERROR_CODE(kSaxTermination); \ + RETURN_SET_ERROR_CODE(kErrorNone); \ } while (0) -#define SET_UINT_AND_RETURN(int_val) \ - do { \ - if (!sax.Uint(int_val)) RETURN_SET_ERROR_CODE(kParseErrorInvalidChar); \ - RETURN_SET_ERROR_CODE(kErrorNone); \ +#define SET_UINT_AND_RETURN(int_val) \ + do { \ + if (!sax.Uint(int_val)) RETURN_SET_ERROR_CODE(kSaxTermination); \ + RETURN_SET_ERROR_CODE(kErrorNone); \ } while (0) -#define SET_DOUBLE_AND_RETURN(dbl) \ - do { \ - if (!sax.Double(dbl)) RETURN_SET_ERROR_CODE(kParseErrorInvalidChar); \ - RETURN_SET_ERROR_CODE(kErrorNone); \ +#define SET_DOUBLE_AND_RETURN(dbl) \ + do { \ + if (!sax.Double(dbl)) RETURN_SET_ERROR_CODE(kSaxTermination); \ + RETURN_SET_ERROR_CODE(kErrorNone); \ } while (0) -#define SET_U64_AS_DOUBLE_AND_RETURN(int_val) \ - do { \ - union { \ - double d; \ - uint64_t u; \ - } du; \ - du.u = int_val; \ - if (!sax.Double(du.d)) RETURN_SET_ERROR_CODE(kParseErrorInvalidChar); \ - RETURN_SET_ERROR_CODE(kErrorNone); \ +#define SET_U64_AS_DOUBLE_AND_RETURN(int_val) \ + do { \ + union { \ + double d; \ + uint64_t u; \ + } du; \ + du.u = int_val; \ + if (!sax.Double(du.d)) RETURN_SET_ERROR_CODE(kSaxTermination); \ + RETURN_SET_ERROR_CODE(kErrorNone); \ } while (0) static constexpr uint64_t kUint64Max = 0xFFFFFFFFFFFFFFFF; @@ -326,7 +329,7 @@ class Parser { // Zero Integer if constexpr (parseFlags & ParseFlags::kParseIntegerAsRaw) { if (!sax.Raw(s + start_idx, i - start_idx)) - RETURN_SET_ERROR_CODE(kParseErrorInvalidChar); + RETURN_SET_ERROR_CODE(kSaxTermination); RETURN_SET_ERROR_CODE(kErrorNone); } SET_UINT_AND_RETURN(0); @@ -367,7 +370,7 @@ class Parser { // Integer if constexpr (parseFlags & ParseFlags::kParseIntegerAsRaw) { if (!sax.Raw(s + start_idx, i - start_idx)) - RETURN_SET_ERROR_CODE(kParseErrorInvalidChar); + RETURN_SET_ERROR_CODE(kSaxTermination); RETURN_SET_ERROR_CODE(kErrorNone); } @@ -516,7 +519,7 @@ class Parser { return parseNumberAsString(sax); } } - if (!sax.Double(d)) RETURN_SET_ERROR_CODE(kParseErrorInvalidChar); + if (!sax.Double(d)) RETURN_SET_ERROR_CODE(kSaxTermination); RETURN_SET_ERROR_CODE(error_code); } @@ -556,22 +559,22 @@ class Parser { } \ } while (0) -#define SET_INT_AND_RETURN(int_val) \ - do { \ - if (!sax.Int(int_val)) RETURN_SET_ERROR_CODE(kParseErrorInvalidChar); \ - RETURN_SET_ERROR_CODE(kErrorNone); \ +#define SET_INT_AND_RETURN(int_val) \ + do { \ + if (!sax.Int(int_val)) RETURN_SET_ERROR_CODE(kSaxTermination); \ + RETURN_SET_ERROR_CODE(kErrorNone); \ } while (0) -#define SET_UINT_AND_RETURN(int_val) \ - do { \ - if (!sax.Uint(int_val)) RETURN_SET_ERROR_CODE(kParseErrorInvalidChar); \ - RETURN_SET_ERROR_CODE(kErrorNone); \ +#define SET_UINT_AND_RETURN(int_val) \ + do { \ + if (!sax.Uint(int_val)) RETURN_SET_ERROR_CODE(kSaxTermination); \ + RETURN_SET_ERROR_CODE(kErrorNone); \ } while (0) -#define SET_DOUBLE_AND_RETURN(dbl) \ - do { \ - if (!sax.Double(dbl)) RETURN_SET_ERROR_CODE(kParseErrorInvalidChar); \ - RETURN_SET_ERROR_CODE(kErrorNone); \ +#define SET_DOUBLE_AND_RETURN(dbl) \ + do { \ + if (!sax.Double(dbl)) RETURN_SET_ERROR_CODE(kSaxTermination); \ + RETURN_SET_ERROR_CODE(kErrorNone); \ } while (0) size_t i = pos_ - 1; @@ -689,7 +692,7 @@ class Parser { double_string_fast: // parse floating number as json string value if (!sax.NumStr(StringView(const_cast(s + start), i - start))) { - RETURN_SET_ERROR_CODE(kParseErrorInvalidChar); + RETURN_SET_ERROR_CODE(kSaxTermination); } RETURN_SET_ERROR_CODE(kErrorNone); @@ -702,6 +705,7 @@ class Parser { template void parsePrimitives(SAX &sax) { + bool ok = true; switch (json_buf_[pos_ - 1]) { case '0': case '1': @@ -715,26 +719,31 @@ class Parser { case '9': case '-': parseNumber(sax); - break; + return; case '"': - parseStrInPlace(sax); + ok = parseStrInPlace(sax); // only need check length when parsing string primitives, because the // padding "x\"x" makes parsing other invalid JSON always failed if (pos_ > len_) { setParseError(kParseErrorInvalidChar); + return; } break; case 'f': - parseFalse(sax); + ok = parseFalse(sax); break; case 't': - parseTrue(sax); + ok = parseTrue(sax); break; case 'n': - parseNull(sax); + ok = parseNull(sax); break; default: setParseError(kParseErrorInvalidChar); + return; + } + if (sonic_unlikely(!ok) && err_ == kErrorNone) { + err_ = kSaxTermination; } } @@ -753,6 +762,13 @@ class Parser { goto err_invalid_char; \ } \ } while (0) +#define sonic_sax_check(expr) \ + do { \ + if (sonic_unlikely(!(expr))) { \ + if (err_ == kErrorNone) err_ = kSaxTermination; \ + return; \ + } \ + } while (0) using namespace sonic_json::internal; // TODO (liuq19): vector is a temporary choice, will optimize in future. @@ -764,21 +780,21 @@ class Parser { uint8_t c = scan.SkipSpace(json_buf_, pos_); switch (c) { case '[': { - sax.StartArray(); + sonic_sax_check(sax.StartArray()); depth.push_back(kArrMask); c = scan.SkipSpace(json_buf_, pos_); if (c == ']') { - sax.EndArray(0); + sonic_sax_check(sax.EndArray(0)); goto scope_end; } goto arr_val; } case '{': { - sax.StartObject(); + sonic_sax_check(sax.StartObject()); depth.push_back(kObjMask); c = scan.SkipSpace(json_buf_, pos_); if (c == '}') { - sax.EndObject(0); + sonic_sax_check(sax.EndObject(0)); goto scope_end; } goto obj_key; @@ -807,30 +823,35 @@ class Parser { } if (c == '}') { pos_++; - sax.EndObject(depth.back()); + sonic_sax_check(sax.EndObject(depth.back())); goto scope_end; } goto err_invalid_char; } + } else if (sonic_unlikely(!found)) { + // Without CheckKeyReturn, `false` from Key() is a handler rejection + // (e.g. OOM), not a skip signal. + if (err_ == kErrorNone) err_ = kSaxTermination; + return; } c = scan.SkipSpace(json_buf_, pos_); switch (c) { case '{': { - sax.StartObject(); + sonic_sax_check(sax.StartObject()); depth.push_back(kObjMask); c = scan.SkipSpace(json_buf_, pos_); if (c == '}') { - sax.EndObject(0); + sonic_sax_check(sax.EndObject(0)); goto scope_end; } goto obj_key; } case '[': { - sax.StartArray(); + sonic_sax_check(sax.StartArray()); depth.push_back(kArrMask); c = scan.SkipSpace(json_buf_, pos_); if (c == ']') { - sax.EndArray(0); + sonic_sax_check(sax.EndArray(0)); goto scope_end; } goto arr_val; @@ -849,20 +870,35 @@ class Parser { parseNumber(sax); sonic_check_err(); break; - case 't': - parseTrue(sax); + case 't': { + bool ok = parseTrue(sax); sonic_check_err(); + if (sonic_unlikely(!ok)) { + err_ = kSaxTermination; + return; + } break; - case 'f': - parseFalse(sax); + } + case 'f': { + bool ok = parseFalse(sax); sonic_check_err(); + if (sonic_unlikely(!ok)) { + err_ = kSaxTermination; + return; + } break; - case 'n': - parseNull(sax); + } + case 'n': { + bool ok = parseNull(sax); sonic_check_err(); + if (sonic_unlikely(!ok)) { + err_ = kSaxTermination; + return; + } break; + } case '"': - parseStrInPlace(sax); + sonic_sax_check(parseStrInPlace(sax)); sonic_check_err(); break; default: @@ -879,7 +915,7 @@ class Parser { if (sonic_unlikely(c != '}')) { goto err_invalid_char; } - sax.EndObject(depth.back()); + sonic_sax_check(sax.EndObject(depth.back())); scope_end: sonic_check_err(); @@ -896,21 +932,21 @@ class Parser { arr_val: switch (c) { case '{': { - sax.StartObject(); + sonic_sax_check(sax.StartObject()); depth.push_back(kObjMask); c = scan.SkipSpace(json_buf_, pos_); if (c == '}') { - sax.EndObject(0); + sonic_sax_check(sax.EndObject(0)); goto scope_end; } goto obj_key; } case '[': { - sax.StartArray(); + sonic_sax_check(sax.StartArray()); depth.push_back(kArrMask); c = scan.SkipSpace(json_buf_, pos_); if (c == ']') { - sax.EndArray(0); + sonic_sax_check(sax.EndArray(0)); goto scope_end; } goto arr_val; @@ -929,20 +965,35 @@ class Parser { parseNumber(sax); sonic_check_err(); break; - case 't': - parseTrue(sax); + case 't': { + bool ok = parseTrue(sax); sonic_check_err(); + if (sonic_unlikely(!ok)) { + err_ = kSaxTermination; + return; + } break; - case 'f': - parseFalse(sax); + } + case 'f': { + bool ok = parseFalse(sax); sonic_check_err(); + if (sonic_unlikely(!ok)) { + err_ = kSaxTermination; + return; + } break; - case 'n': - parseNull(sax); + } + case 'n': { + bool ok = parseNull(sax); sonic_check_err(); + if (sonic_unlikely(!ok)) { + err_ = kSaxTermination; + return; + } break; + } case '"': - parseStrInPlace(sax); + sonic_sax_check(parseStrInPlace(sax)); sonic_check_err(); break; default: @@ -957,7 +1008,7 @@ class Parser { goto arr_val; } if (sonic_likely(c == ']')) { - sax.EndArray(depth.back() & (kArrMask - 1)); + sonic_sax_check(sax.EndArray(depth.back() & (kArrMask - 1))); goto scope_end; } goto err_invalid_char; @@ -965,8 +1016,10 @@ class Parser { doc_end: return; err_invalid_char: - err_ = kParseErrorInvalidChar; + if (err_ == kErrorNone) err_ = kParseErrorInvalidChar; return; +#undef sonic_sax_check +#undef sonic_check_err } // parseLazyImpl only mark the json positions, and not parse any more, even @@ -987,22 +1040,29 @@ class Parser { size_t sn = 0; const uint8_t *src, *sdst; +#define sonic_lazy_sax_check(expr) \ + do { \ + if (sonic_unlikely(!(expr))) { \ + return ParseResult(kSaxTermination, pos); \ + } \ + } while (0) + switch (c) { case '[': { - sax.StartArray(); + sonic_lazy_sax_check(sax.StartArray()); c = scan.SkipSpaceSafe(data, pos, len); if (c == ']') { - sax.EndArray(0); + sonic_lazy_sax_check(sax.EndArray(0)); return kErrorNone; } pos--; goto arr_val; } case '{': { - sax.StartObject(); + sonic_lazy_sax_check(sax.StartObject()); c = scan.SkipSpaceSafe(data, pos, len); if (c == '}') { - sax.EndObject(0); + sonic_lazy_sax_check(sax.EndObject(0)); return kErrorNone; } goto obj_key; @@ -1012,7 +1072,8 @@ class Parser { pos--; start = scan.SkipOne(data, pos, len); if (start < 0) goto skip_error; - sax.Raw(reinterpret_cast(data + start), pos - start); + sonic_lazy_sax_check( + sax.Raw(reinterpret_cast(data + start), pos - start)); return kErrorNone; } } @@ -1033,6 +1094,9 @@ class Parser { if (skips == 2) { // parse escaped strings uint8_t *dst = (uint8_t *)alloc.Malloc(sn + 32); + if (sonic_unlikely(dst == nullptr)) { + return ParseResult(kErrorNoMem, pos); + } sdst = dst; std::memcpy(dst, src, sn); sn = internal::parseStringInplace(dst, err); @@ -1046,7 +1110,8 @@ class Parser { } key = StringView(reinterpret_cast(sdst), sn); if (!sax.Key(key.data(), key.size(), allocated)) { - goto err_invalid_char; + if (allocated) Allocator::Free((void *)(sdst)); + return ParseResult(kSaxTermination, pos); } c = scan.SkipSpaceSafe(data, pos, len); if (sonic_unlikely(c != ':')) { @@ -1054,7 +1119,8 @@ class Parser { } start = scan.SkipOne(data, pos, len); if (start < 0) goto skip_error; - sax.Raw(reinterpret_cast(data + start), pos - start); + sonic_lazy_sax_check( + sax.Raw(reinterpret_cast(data + start), pos - start)); cnt++; c = scan.SkipSpaceSafe(data, pos, len); if (c == ',') { @@ -1064,13 +1130,14 @@ class Parser { if (sonic_unlikely(c != '}')) { goto err_invalid_char; } - sax.EndObject(cnt); + sonic_lazy_sax_check(sax.EndObject(cnt)); return kErrorNone; arr_val: start = scan.SkipOne(data, pos, len); if (start < 0) goto skip_error; - sax.Raw(reinterpret_cast(data + start), pos - start); + sonic_lazy_sax_check( + sax.Raw(reinterpret_cast(data + start), pos - start)); cnt++; c = scan.SkipSpaceSafe(data, pos, len); if (c == ',') { @@ -1079,17 +1146,16 @@ class Parser { if (sonic_unlikely(c != ']')) { goto err_invalid_char; } - sax.EndArray(cnt); + sonic_lazy_sax_check(sax.EndArray(cnt)); return kErrorNone; err_invalid_char: return ParseResult(kParseErrorInvalidChar, pos - 1); skip_error: return ParseResult(SonicError(-start), pos - 1); +#undef sonic_lazy_sax_check } -#undef sonic_check_err - private: sonic_force_inline void reset() { pos_ = 0; diff --git a/include/sonic/dom/schema_handler.h b/include/sonic/dom/schema_handler.h index 2b7f5dde..8ad0385c 100644 --- a/include/sonic/dom/schema_handler.h +++ b/include/sonic/dom/schema_handler.h @@ -34,6 +34,8 @@ class SchemaHandler { using Allocator = typename NodeType::AllocatorType; using MemberType = typename NodeType::MemberNode; + bool oom_{false}; + SchemaHandler() = default; SchemaHandler(NodeType *root, Allocator &alloc) : parent_node_(root), cur_node_(root), alloc_(&alloc) {} @@ -41,7 +43,8 @@ class SchemaHandler { SchemaHandler(const SchemaHandler &) = delete; SchemaHandler &operator=(const SchemaHandler &rhs) = delete; SchemaHandler(SchemaHandler &&rhs) - : st_(rhs.st_), + : oom_(rhs.oom_), + st_(rhs.st_), parent_node_(rhs.parent_node_), cur_node_(rhs.cur_node_), np_(rhs.np_), @@ -56,6 +59,7 @@ class SchemaHandler { rhs.np_ = 0; rhs.alloc_ = nullptr; rhs.found_node_count_ = 0; + rhs.oom_ = false; parent_st_ = std::move(rhs.parent_st_); found_count_st_ = std::move(rhs.found_count_st_); } @@ -70,6 +74,7 @@ class SchemaHandler { parent_ = rhs.parent_; found_node_count_ = rhs.found_node_count_; alloc_ = rhs.alloc_; + oom_ = rhs.oom_; rhs.st_ = nullptr; rhs.parent_node_ = nullptr; @@ -79,6 +84,7 @@ class SchemaHandler { rhs.parent_ = 0; rhs.alloc_ = nullptr; rhs.found_node_count_ = 0; + rhs.oom_ = false; parent_st_ = std::move(rhs.parent_st_); found_count_st_ = std::move(rhs.found_count_st_); return *this; @@ -91,9 +97,10 @@ class SchemaHandler { size_t cap = len / 2 + 2; if (cap < 16) cap = 16; if (!st_ || cap_ < cap) { - st_ = static_cast( + NodeType *new_st = static_cast( std::realloc((void *)(st_), sizeof(NodeType) * cap)); - if (!st_) return false; + if (!new_st) return false; + st_ = new_st; cap_ = cap; } return true; @@ -287,9 +294,17 @@ class SchemaHandler { obj.setLength(pairs, kObject); if (pairs) { void *mem = obj.template containerMalloc(pairs, *alloc_); - obj.setChildren(mem); - internal::Xmemcpy( - (void *)obj.getObjChildrenFirstUnsafe(), obj_member_ptr, pairs); + if (sonic_unlikely(mem == nullptr)) { + NodeType *children = static_cast(obj_member_ptr); + for (size_t i = 0; i < size_t(pairs) * 2; i++) children[i].~NodeType(); + obj.setLength(0, kObject); + obj.setChildren(nullptr); + oom_ = true; + } else { + obj.setChildren(mem); + internal::Xmemcpy( + (void *)obj.getObjChildrenFirstUnsafe(), obj_member_ptr, pairs); + } } else { obj.setChildren(nullptr); } @@ -317,9 +332,18 @@ class SchemaHandler { NodeType &arr = *arr_ptr; arr.setLength(count, kArray); if (count) { - arr.setChildren(arr.template containerMalloc(count, *alloc_)); - internal::Xmemcpy( - (void *)arr.getArrChildrenFirstUnsafe(), arr_element_ptr, count); + void *mem = arr.template containerMalloc(count, *alloc_); + if (sonic_unlikely(mem == nullptr)) { + NodeType *children = static_cast(arr_element_ptr); + for (size_t i = 0; i < count; i++) children[i].~NodeType(); + arr.setLength(0, kArray); + arr.setChildren(nullptr); + oom_ = true; + } else { + arr.setChildren(mem); + internal::Xmemcpy( + (void *)arr.getArrChildrenFirstUnsafe(), arr_element_ptr, count); + } } else { arr.setChildren(nullptr); } diff --git a/include/sonic/experiment/lazy_update.h b/include/sonic/experiment/lazy_update.h index 591b8fa4..4f46f96b 100644 --- a/include/sonic/experiment/lazy_update.h +++ b/include/sonic/experiment/lazy_update.h @@ -35,6 +35,9 @@ static inline ParseResult ParseLazy(NodeType &node, StringView json, if (ret.Error()) { return ret; } + if (sonic_unlikely(sax.oom_)) { + return ParseResult(kErrorNoMem, json.size()); + } NodeType *root = sax.stack_.template Begin(); node = std::move(*root); return ret; @@ -43,19 +46,18 @@ static inline ParseResult ParseLazy(NodeType &node, StringView json, template static inline SonicError UpdateNodeLazy(NodeType &target, NodeType &source, Allocator &alloc) { - ParseResult ret; SonicError err = kErrorNone; - // check the raw type - if (target.IsRaw() && *target.GetRaw().data() == '{') { - ret = ParseLazy(target, target.GetRaw(), - alloc); - } - if (source.IsRaw() && *source.GetRaw().data() == '{') { - ret = ParseLazy(source, source.GetRaw(), - alloc); + if (target.IsRaw() && !target.GetRaw().empty() && + *target.GetRaw().data() == '{') { + ParseResult ret = ParseLazy( + target, target.GetRaw(), alloc); + if (ret.Error()) return ret.Error(); } - if (ret.Error()) { - return ret.Error(); + if (source.IsRaw() && !source.GetRaw().empty() && + *source.GetRaw().data() == '{') { + ParseResult ret = ParseLazy( + source, source.GetRaw(), alloc); + if (ret.Error()) return ret.Error(); } // update the object type if (!target.IsObject() || !source.IsObject() || target.Empty()) { diff --git a/include/sonic/internal/arch/common/unicode_common.h b/include/sonic/internal/arch/common/unicode_common.h index 6c488fb7..104d6d0b 100644 --- a/include/sonic/internal/arch/common/unicode_common.h +++ b/include/sonic/internal/arch/common/unicode_common.h @@ -242,6 +242,10 @@ sonic_force_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, // inside the multilingual plane check uint32_t code_point = hex_to_u32_nocheck(*src_ptr + 2); *src_ptr += 6; + // Reject lone low surrogates: they are not valid Unicode scalar values. + if (code_point >= 0xdc00 && code_point <= 0xdfff) { + return false; + } // check for low surrogate for characters outside the Basic // Multilingual Plane. if (code_point >= 0xd800 && code_point < 0xdc00) { @@ -258,6 +262,11 @@ sonic_force_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, if ((code_point | code_point_2) >> 16) { return false; } + // The second escape must be a low surrogate; otherwise the subtraction + // below would wrap and produce a garbage code point. + if (code_point_2 < 0xdc00 || code_point_2 > 0xdfff) { + return false; + } code_point = (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; diff --git a/include/sonic/internal/stack.h b/include/sonic/internal/stack.h index add3c0a6..fa483065 100644 --- a/include/sonic/internal/stack.h +++ b/include/sonic/internal/stack.h @@ -27,7 +27,7 @@ namespace internal { class Stack { public: - Stack(size_t cap = defaultCapcity()) : cap_(cap) { + Stack(size_t cap = defaultCapcity()) : cap_(0) { buf_ = nullptr; top_ = nullptr; Reserve(cap); @@ -62,10 +62,10 @@ class Stack { size_t align_cap = SONIC_ALIGN(new_cap); size_t old_size = Size(); char* tmp = static_cast(std::realloc(buf_, align_cap)); + if (sonic_unlikely(tmp == nullptr)) return; top_ = tmp + old_size; buf_ = tmp; - sonic_assert(buf_ != NULL); - cap_ = buf_ ? new_cap : 0; + cap_ = new_cap; } /** @@ -169,8 +169,8 @@ class Stack { sonic_force_inline char* Grow(size_t cnt) { if (sonic_unlikely(top_ + cnt >= buf_ + cap_)) { if (sonic_unlikely((top_ + cnt) > buf_ + 2 * cap_)) { - cap_ = top_ - buf_ + cnt; - Reserve(cap_ + cap_ / 2); + size_t needed = (top_ - buf_) + cnt; + Reserve(needed + needed / 2); } else { Reserve(cap_ * 2); } diff --git a/include/sonic/sonic.h b/include/sonic/sonic.h index aeb803c1..c52faf19 100644 --- a/include/sonic/sonic.h +++ b/include/sonic/sonic.h @@ -24,8 +24,10 @@ #define SONIC_MAJOR_VERSION 1 #define SONIC_MINOR_VERSION 0 #define SONIC_PATCH_VERSION 2 -#define SONIC_STRS(s) #s -#define SONIC_VERSION_STRING \ - SONIC_STRS(SONIC_MAJOR_VERSION.SONIC_MINOR_VERSION.SONIC_PATCH.VERSION) +#define SONIC_STRS_IMPL(s) #s +#define SONIC_STRS(s) SONIC_STRS_IMPL(s) +#define SONIC_VERSION_STRING \ + SONIC_STRS(SONIC_MAJOR_VERSION) \ + "." SONIC_STRS(SONIC_MINOR_VERSION) "." SONIC_STRS(SONIC_PATCH_VERSION) namespace sonic_json {} // namespace sonic_json diff --git a/include/sonic/writebuffer.h b/include/sonic/writebuffer.h index 55631539..95a8053b 100644 --- a/include/sonic/writebuffer.h +++ b/include/sonic/writebuffer.h @@ -37,15 +37,18 @@ class WriteBuffer { } /** - * @brief Return the context in the buffer. - * @return a null-terminate string. - * @note a '\0' will be added in the ending, so, this function is not - * thread-safe. + * @brief Return the buffer contents as a null-terminated C string. + * @return pointer to the buffer, terminated with an appended '\0'. + * @note Not thread-safe. */ sonic_force_inline const char* ToString() const { + if (sonic_likely(stack_.Size() < stack_.Capacity())) { + *(stack_.template End()) = '\0'; + return stack_.template Begin(); + } stack_.Grow(1); *(stack_.template End()) = '\0'; - return stack_.Begin(); + return stack_.template Begin(); } sonic_force_inline StringView ToStringView() const { diff --git a/tests/allocator_test.cpp b/tests/allocator_test.cpp index 71740adb..5a44e5f4 100644 --- a/tests/allocator_test.cpp +++ b/tests/allocator_test.cpp @@ -16,7 +16,17 @@ #include "sonic/allocator.h" +#include +#include + #include "gtest/gtest.h" +#include "sonic/internal/stack.h" + +// Let huge-allocation OOM tests return null under ASAN instead of aborting. +// Dead code in non-ASAN builds; ASAN_OPTIONS still overrides it. +extern "C" __attribute__((used)) const char *__asan_default_options() { + return "allocator_may_return_null=1"; +} namespace { @@ -90,4 +100,136 @@ TEST(Allocator, MemoryPoolAllocatorMoveAndMapAllocator) { } } +TEST(Stack, ReservePreservesContents) { + // Verify that Reserve correctly grows the buffer and preserves existing + // data, and that Reserve(smaller) is a no-op. + sonic_json::internal::Stack s(8); + + s.Push('A'); + s.Push('B'); + ASSERT_EQ(2u, s.Size()); + ASSERT_EQ('B', *s.Top()); + + // Reserve with current cap: must be a no-op. + size_t old_cap = s.Capacity(); + s.Reserve(old_cap); + EXPECT_EQ(old_cap, s.Capacity()) << "Reserve(<=cap) must not change Capacity"; + EXPECT_EQ('B', *s.Top()) << "Reserve no-op must not touch Top"; + + // Reserve with larger cap: must grow and preserve data. + s.Reserve(old_cap * 4); + EXPECT_GE(s.Capacity(), old_cap * 4) + << "Reserve must grow to at least new_cap"; + EXPECT_EQ(2u, s.Size()) << "Reserve must not change Size"; + EXPECT_EQ('B', *s.Top()) << "Reserve must preserve existing data"; +} + +// Use a large-but-bounded request that fails via allocator bookkeeping +// instead of asking ASan's malloc interceptor for absurd virtual sizes. +// This still exceeds the default pool chunk capacity and exercises the +// "failed allocation sets hadOom" path without polluting test output. +struct FailAfterFirstChunkAllocator { + bool allow_ctor = true; + void *Malloc(size_t n) { + if (allow_ctor) { + allow_ctor = false; + return std::malloc(n); + } + return nullptr; + } + void *Realloc(void *, size_t, size_t) { return nullptr; } + static void Free(void *p) { std::free(p); } +}; + +TEST(Allocator, MemoryPoolAllocatorHadOomSignalsFailedMalloc) { + FailAfterFirstChunkAllocator base; + MemoryPoolAllocator pool(8, &base); + EXPECT_FALSE(pool.HadOom()); + void *p = pool.Malloc(16); + EXPECT_EQ(p, nullptr); + EXPECT_TRUE(pool.HadOom()); + pool.ClearOom(); + EXPECT_FALSE(pool.HadOom()); +} + +// Both MemoryPoolAllocator ctors place SharedData (incl. atomic +// hadOom) into raw storage. Assert the flag reads false on a freshly +// constructed allocator before any Malloc call — guards against a +// regression where hadOom is left in an indeterminate state by ctor init. +TEST(Allocator, MemoryPoolAllocatorHadOomStartsFalseOnConstruction) { + { + MemoryPoolAllocator<> pool; + EXPECT_FALSE(pool.HadOom()); + } + { + // Buffer ctor path: user-supplied storage, AlignBuffer instead of Malloc. + alignas(alignof(std::max_align_t)) unsigned char buf[4096]; + MemoryPoolAllocator<> pool(buf, sizeof(buf)); + EXPECT_FALSE(pool.HadOom()); + } +} + +TEST(Allocator, MemoryPoolAllocatorHadOomSharedAcrossCopies) { + // Flag lives on SharedData so refcounted copies see coherent state. + FailAfterFirstChunkAllocator base; + MemoryPoolAllocator a(8, &base); + MemoryPoolAllocator b( + a); // shares SharedData with a + EXPECT_FALSE(a.HadOom()); + EXPECT_FALSE(b.HadOom()); + (void)b.Malloc(16); + EXPECT_TRUE(a.HadOom()); // a sees b's failure + EXPECT_TRUE(b.HadOom()); + a.ClearOom(); + EXPECT_FALSE(b.HadOom()); +} + +// Writer on one refcounted copy, reader on another. The per-instance +// SpinLock does not synchronize different copies, so hadOom must be +// atomic for this to be race-free. +TEST(Allocator, MemoryPoolAllocatorHadOomIsThreadSafeAcrossCopies) { + FailAfterFirstChunkAllocator base; + MemoryPoolAllocator a(8, &base); + MemoryPoolAllocator b(a); // shares SharedData + + std::atomic stop{false}; + std::thread writer([&] { + for (int i = 0; i < 200 && !stop.load(); ++i) { + (void)a.Malloc(16); // sets hadOom + } + }); + std::thread reader([&] { + for (int i = 0; i < 200 && !stop.load(); ++i) { + (void)b.HadOom(); // observes hadOom + } + }); + writer.join(); + reader.join(); + stop.store(true); + + EXPECT_TRUE(b.HadOom()); + a.ClearOom(); + EXPECT_FALSE(b.HadOom()); +} + +TEST(Stack, ConstructorOomLeavesConsistentState) { + // If the ctor's initial Reserve() fails, cap_ must not lie about the + // (absent) buffer. Otherwise Grow()'s guard `top_+cnt >= buf_+cap_` reads + // as `1 >= cap_ + 0` and skips the re-allocation entirely, letting a + // subsequent Push() dereference a null top_. + constexpr size_t kHuge = (size_t{1} << 62); + sonic_json::internal::Stack s(kHuge); + + if (s.Begin() == nullptr) { + EXPECT_EQ(0u, s.Capacity()) + << "Capacity must be 0 when the ctor could not allocate a buffer"; + } + + // And a subsequent Push() must still work — Grow() re-allocates on demand. + s.Push('X'); + ASSERT_NE(s.Begin(), nullptr); + EXPECT_EQ(1u, s.Size()); + EXPECT_EQ('X', *s.Top()); +} + } // namespace diff --git a/tests/document_test.cpp b/tests/document_test.cpp index 3aed1c0e..5db2398b 100644 --- a/tests/document_test.cpp +++ b/tests/document_test.cpp @@ -577,8 +577,6 @@ TYPED_TEST(DocumentTest, SerializeOK) { } } -TYPED_TEST(DocumentTest, SerializeSort) {} - TYPED_TEST(DocumentTest, SonicErrorInvalidKey) { using DNode = typename TypeParam::NodeType; auto iter = this->doc_.MemberBegin(); diff --git a/tests/exp_update_test.cpp b/tests/exp_update_test.cpp index 3de8f1bb..76da60b7 100644 --- a/tests/exp_update_test.cpp +++ b/tests/exp_update_test.cpp @@ -148,4 +148,27 @@ TEST(UpdateLazy, InvalidJson) { } } +TEST(UpdateLazy, NestedInvalidTargetPropagates) { + // Nested invalid target merged with nested valid source: + // the error from the target-side lazy parse must be propagated and + // the update must fail ("{}"); it must NOT be silently overwritten + // by the source's successful parse. + { + std::string target = R"({"a":{"foo":}})"; // nested {"foo":} is invalid + std::string source = R"({"a":{"bar":5}})"; + auto ret = + sonic_json::UpdateLazy(target, source); + EXPECT_STREQ(ret.c_str(), "{}") + << "invalid nested target must propagate as update failure"; + } + { + std::string target = R"({"a":{"foo": @}})"; // invalid token inside nested + std::string source = R"({"a":{"bar":5}})"; + auto ret = + sonic_json::UpdateLazy(target, source); + EXPECT_STREQ(ret.c_str(), "{}") + << "invalid nested target must propagate as update failure"; + } +} + } // namespace diff --git a/tests/parser_oom_test.cpp b/tests/parser_oom_test.cpp new file mode 100644 index 00000000..3b455eb8 --- /dev/null +++ b/tests/parser_oom_test.cpp @@ -0,0 +1,650 @@ +/* + * Copyright ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include + +#include "sonic/dom/handler.h" +#include "sonic/dom/parser.h" +#include "sonic/sonic.h" + +namespace { + +using namespace sonic_json; + +TEST(Document, VersionStringExpandsMacros) { + const std::string v = SONIC_VERSION_STRING; + EXPECT_EQ(std::string::npos, v.find("SONIC_MAJOR_VERSION")); + EXPECT_EQ(std::string::npos, v.find("SONIC_PATCH")); + EXPECT_EQ(2u, std::count(v.begin(), v.end(), '.')); + for (char c : v) { + EXPECT_TRUE(c == '.' || (c >= '0' && c <= '9')); + } +} + +TEST(Document, RejectInvalidSurrogate) { + { + Document doc; + std::string json = "\"\\uDC00\""; + doc.Parse(json); + EXPECT_TRUE(doc.HasParseError()); + } + { + Document doc; + std::string json = "\"\\uD800\\u0041\""; + doc.Parse(json); + EXPECT_TRUE(doc.HasParseError()); + } + { + Document doc; + std::string json = "\"\\uDFFF\""; + doc.Parse(json); + EXPECT_TRUE(doc.HasParseError()); + } + { + Document doc; + std::string json = "\"\\uD83D\\uDE0A\""; + doc.Parse(json); + EXPECT_FALSE(doc.HasParseError()); + } +} + +TEST(Document, ReparseDoesNotLeakPoolMemory) { + Document doc; + doc.Parse(R"({"key":"value","num":42})"); + ASSERT_FALSE(doc.HasParseError()); + size_t size_after_first = doc.GetAllocator().Size(); + + doc.Parse(R"({"key":"value","num":42})"); + ASSERT_FALSE(doc.HasParseError()); + size_t size_after_second = doc.GetAllocator().Size(); + + EXPECT_EQ(size_after_first, size_after_second); +} + +struct AlwaysOomAllocator { + void* Malloc(size_t) { return nullptr; } + void* Realloc(void*, size_t, size_t) { return nullptr; } + static void Free(void*) {} + static constexpr bool kNeedFree = false; +}; + +static std::vector pad_json_bytes(const char* json, size_t len) { + std::vector buf(len + 64, 0); + std::memcpy(buf.data(), json, len); + buf[len] = 'x'; + buf[len + 1] = '"'; + buf[len + 2] = 'x'; + return buf; +} + +TEST(Document, OomDoesNotCrashPushBack) { + AlwaysOomAllocator alloc; + DNode arr; + arr.SetArray(); + DNode val; + val.SetInt64(42); + arr.PushBack(std::move(val), alloc); + EXPECT_EQ(0u, arr.Size()); +} + +TEST(Document, NoFreeAllocatorWithoutClearCompilesAndRuns) { + GenericDocument> doc; + doc.Parse("{}"); + EXPECT_TRUE(doc.HasParseError()); +} + +TEST(Document, CreateMapOomFromEmptyObjectReturnsFalse) { + AlwaysOomAllocator alloc; + DNode obj; + obj.SetObject(); + EXPECT_FALSE(obj.CreateMap(alloc)); +} + +TEST(Document, OomDoesNotCrashCopyObject) { + Document src; + src.Parse(R"({"a":1,"b":2,"c":3})"); + ASSERT_FALSE(src.HasParseError()); + + AlwaysOomAllocator alloc; + DNode dst(src, alloc); + EXPECT_TRUE(dst.IsObject()); + EXPECT_EQ(0u, dst.Size()); +} + +TEST(Document, OomDoesNotCrashCopyArray) { + Document src; + src.Parse(R"([1, 2, 3])"); + ASSERT_FALSE(src.HasParseError()); + + AlwaysOomAllocator alloc; + DNode dst(src, alloc); + EXPECT_TRUE(dst.IsArray()); + EXPECT_EQ(0u, dst.Size()); +} + +struct OomAfterNthAllocator { + size_t remaining = 0; + OomAfterNthAllocator() = default; + explicit OomAfterNthAllocator(size_t n) : remaining(n) {} + void* Malloc(size_t n) { + if (remaining == 0) return nullptr; + --remaining; + return std::malloc(n); + } + void* Realloc(void* p, size_t, size_t new_size) { + if (new_size == 0) { + std::free(p); + return nullptr; + } + if (remaining == 0) return nullptr; + if (p == nullptr) --remaining; + return std::realloc(p, new_size); + } + static void Free(void* p) { std::free(p); } + static constexpr bool kNeedFree = true; +}; + +TEST(Document, CreateMapOomForMapStorageReturnsFalse) { + OomAfterNthAllocator alloc(1); + DNode obj; + obj.SetObject(); + EXPECT_FALSE(obj.CreateMap(alloc)); +} + +TEST(Document, OomDoesNotCrashParseObject) { + OomAfterNthAllocator alloc(1); + GenericDocument> doc(&alloc); + doc.Parse(R"({"a":1,"b":2,"c":3})"); + EXPECT_TRUE(doc.HasParseError()); +} + +TEST(Document, OomDoesNotCrashParseArray) { + OomAfterNthAllocator alloc(1); + GenericDocument> doc(&alloc); + doc.Parse("[1,2,3]"); + EXPECT_TRUE(doc.HasParseError()); +} + +TEST(Document, ParseImplHandlesRepeatedOomCleanly) { + OomAfterNthAllocator alloc(0); + GenericDocument> doc(&alloc); + doc.Parse("{}"); + EXPECT_TRUE(doc.HasParseError()); + EXPECT_EQ(doc.GetParseError(), kErrorNoMem); + doc.Parse("[]"); + EXPECT_TRUE(doc.HasParseError()); + EXPECT_EQ(doc.GetParseError(), kErrorNoMem); +} + +struct SentinelTrackingAllocator { + static int balance; + void* Malloc(size_t n) { + ++balance; + void* p = std::malloc(n); + if (p) std::memset(p, '"', n); + return p; + } + void* Realloc(void* p, size_t, size_t n) { return std::realloc(p, n); } + static void Free(void* p) { + if (p) --balance; + std::free(p); + } +}; +int SentinelTrackingAllocator::balance = 0; + +struct RejectKeyLazySAX { + using Allocator = SentinelTrackingAllocator; + SentinelTrackingAllocator alloc; + bool key_called = false; + Allocator& GetAllocator() { return alloc; } + bool StartObject() { return true; } + bool EndObject(size_t) { return true; } + bool StartArray() { return true; } + bool EndArray(size_t) { return true; } + bool Key(const char*, size_t, size_t) { + key_called = true; + return false; + } + bool Raw(const char*, size_t) { return true; } +}; + +TEST(Document, ParseLazyEscapedKeyOomReportsNoMem) { + struct OomLazySAX { + using Allocator = OomAfterNthAllocator; + OomAfterNthAllocator alloc; + OomLazySAX() : alloc(0) {} + Allocator& GetAllocator() { return alloc; } + bool StartObject() { return true; } + bool EndObject(size_t) { return true; } + bool StartArray() { return true; } + bool EndArray(size_t) { return true; } + bool Key(const char*, size_t, size_t) { return true; } + bool Raw(const char*, size_t) { return true; } + }; + OomLazySAX sax; + Parser p; + const char* json = R"({"\n": 1})"; + auto buf = pad_json_bytes(json, std::strlen(json)); + auto res = p.ParseLazy(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kErrorNoMem, res.Error()); +} + +struct TrackingNthOomAllocator { + static int balance; + static size_t remaining; + void* Malloc(size_t n) { + if (remaining == 0) return nullptr; + --remaining; + ++balance; + return std::malloc(n); + } + void* Realloc(void* p, size_t, size_t n) { return std::realloc(p, n); } + static void Free(void* p) { + if (p) --balance; + std::free(p); + } + static constexpr bool kNeedFree = true; +}; +int TrackingNthOomAllocator::balance = 0; +size_t TrackingNthOomAllocator::remaining = 0; + +TEST(Document, AddMemberWithoutMapOnOomLeavesObjectEmpty) { + OomAfterNthAllocator alloc(0); + DNode obj; + obj.SetObject(); + DNode val; + val.SetInt64(1); + obj.AddMember("k", std::move(val), alloc); + EXPECT_EQ(0u, obj.Size()); +} + +TEST(LazySAXHandler, EndObjectOomLeavesStackMatchingSuccessArm) { + TrackingNthOomAllocator::balance = 0; + TrackingNthOomAllocator::remaining = 1; + TrackingNthOomAllocator alloc; + + using Node = DNode; + LazySAXHandler sax(alloc); + + ASSERT_TRUE(sax.StartObject()); + constexpr char kKey[] = "key"; + void* buf = alloc.Malloc(sizeof(kKey)); + ASSERT_NE(nullptr, buf); + std::memcpy(buf, kKey, sizeof(kKey)); + ASSERT_TRUE(sax.Key(static_cast(buf), sizeof(kKey) - 1, 1)); + ASSERT_TRUE(sax.Raw("1", 1)); + ASSERT_TRUE(sax.EndObject(1)); + EXPECT_TRUE(sax.oom_); + EXPECT_EQ(sizeof(Node), sax.stack_.Size()); +} + +TEST(Document, ParseLazyFreesEscapedKeyOnKeyFailure) { + SentinelTrackingAllocator::balance = 0; + + RejectKeyLazySAX sax; + Parser p; + const char* json = R"({"\n": 1})"; + auto buf = pad_json_bytes(json, std::strlen(json)); + p.ParseLazy(buf.data(), std::strlen(json), sax); + + ASSERT_TRUE(sax.key_called); + EXPECT_EQ(0, SentinelTrackingAllocator::balance); +} + +struct RejectingSAX { + bool reject_start_array = false; + bool reject_start_object = false; + bool reject_end_array = false; + bool reject_end_object = false; + bool reject_key = false; + bool reject_string = false; + bool reject_int = false; + bool reject_uint = false; + bool reject_double = false; + bool reject_numstr = false; + bool reject_raw = false; + bool reject_null = false; + bool reject_bool = false; + + bool Null() { return !reject_null; } + bool Bool(bool) { return !reject_bool; } + bool Int(int64_t) { return !reject_int; } + bool Uint(uint64_t) { return !reject_uint; } + bool Double(double) { return !reject_double; } + bool NumStr(StringView) { return !reject_numstr; } + bool Raw(const char*, size_t) { return !reject_raw; } + bool Key(StringView) { return !reject_key; } + bool String(StringView) { return !reject_string; } + bool StartArray() { return !reject_start_array; } + bool EndArray(uint32_t) { return !reject_end_array; } + bool StartObject() { return !reject_start_object; } + bool EndObject(uint32_t) { return !reject_end_object; } +}; + +static std::vector pad_json_for_parser(const char* json, size_t len) { + std::vector buf(len + 64, 0); + std::memcpy(buf.data(), json, len); + buf[len] = 'x'; + buf[len + 1] = '"'; + buf[len + 2] = 'x'; + return buf; +} + +TEST(Parser, StartArrayFalseAbortsParse) { + const char* json = "[1,2,3]"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_start_array = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +TEST(Parser, StartObjectFalseAbortsParse) { + const char* json = R"({"a":1})"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_start_object = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +TEST(Parser, EndArrayFalseAbortsParse) { + const char* json = "[1,2,3]"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_end_array = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +TEST(Parser, EndObjectFalseAbortsParse) { + const char* json = R"({"a":1})"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_end_object = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +TEST(Parser, KeyFalseAbortsParseWhenNotCheckKeyReturn) { + const char* json = R"({"a":1})"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_key = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +TEST(Parser, StringFalseAbortsParse) { + const char* json = R"(["x"])"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_string = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +TEST(Parser, NumberIntRejectionReportsSaxTermination) { + const char* json = "1"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_uint = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +TEST(Parser, NumberNegativeIntRejectionReportsSaxTermination) { + const char* json = "-1"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_int = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +TEST(Parser, NumberDoubleRejectionReportsSaxTermination) { + const char* json = "1.5"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_double = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +TEST(Parser, NumberIntegerAsRawRejectionReportsSaxTermination) { + const char* json = "123"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_raw = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +TEST(Parser, NestedNumberRejectionReportsSaxTermination) { + struct Case { + const char* json; + bool reject_double; + } cases[] = { + {R"({"a":1})", false}, + {R"([1])", false}, + {R"({"a":1.5})", true}, + }; + for (const auto& c : cases) { + auto buf = pad_json_for_parser(c.json, std::strlen(c.json)); + RejectingSAX sax; + if (c.reject_double) { + sax.reject_double = true; + } else { + sax.reject_uint = true; + } + Parser p; + auto res = p.Parse(buf.data(), std::strlen(c.json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); + } +} + +TEST(Parser, NumberOverflowAsNumStrRejectionReportsSaxTermination) { + const char* json = "18446744073709551616"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_numstr = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +TEST(Parser, NullRejectionReportsSaxTermination) { + const char* json = "null"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_null = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +TEST(Parser, BoolRejectionReportsSaxTermination) { + for (const char* json : {"true", "false"}) { + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_bool = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); + } +} + +TEST(Parser, PrimitiveRootStringRejectionReportsSaxTermination) { + const char* json = R"("x")"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + RejectingSAX sax; + sax.reject_string = true; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kSaxTermination, res.Error()); +} + +struct SkippingKeyCheckReturnSAX { + static constexpr bool check_key_return = true; + int keys_seen = 0; + bool Null() { return true; } + bool Bool(bool) { return true; } + bool Int(int64_t) { return true; } + bool Uint(uint64_t) { return true; } + bool Double(double) { return true; } + bool NumStr(StringView) { return true; } + bool Key(StringView) { + ++keys_seen; + return false; + } + bool String(StringView) { return true; } + bool StartArray() { return true; } + bool EndArray(uint32_t) { return true; } + bool StartObject() { return true; } + bool EndObject(uint32_t) { return true; } +}; + +TEST(Parser, KeyFalsePreservesSkipSemanticsUnderCheckKeyReturn) { + const char* json = R"({"a":1,"b":2})"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + SkippingKeyCheckReturnSAX sax; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_EQ(kErrorNone, res.Error()); + EXPECT_EQ(2, sax.keys_seen); +} + +struct RejectAllLazySax { + using Allocator = SONIC_DEFAULT_ALLOCATOR; + Allocator alloc_; + Allocator& GetAllocator() { return alloc_; } + bool StartArray() { return false; } + bool EndArray(size_t) { return false; } + bool StartObject() { return false; } + bool EndObject(size_t) { return false; } + bool Key(const char*, size_t, size_t) { return false; } + bool Raw(const char*, size_t) { return false; } +}; + +TEST(ParseLazy, RawRejectionReportsSaxTermination) { + RejectAllLazySax sax; + Parser p; + const char* j = "42"; + auto buf = pad_json_bytes(j, 2); + auto r = p.ParseLazy(buf.data(), 2, sax); + EXPECT_EQ(r.Error(), kSaxTermination); +} + +TEST(ParseLazy, StartArrayRejectionReportsSaxTermination) { + RejectAllLazySax sax; + Parser p; + const char* j = "[1,2,3]"; + auto buf = pad_json_bytes(j, 7); + auto r = p.ParseLazy(buf.data(), 7, sax); + EXPECT_EQ(r.Error(), kSaxTermination); +} + +TEST(ParseLazy, StartObjectRejectionReportsSaxTermination) { + RejectAllLazySax sax; + Parser p; + const char* j = R"({"k":1})"; + auto buf = pad_json_bytes(j, 7); + auto r = p.ParseLazy(buf.data(), 7, sax); + EXPECT_EQ(r.Error(), kSaxTermination); +} + +struct AcceptAllLazySax { + using Allocator = SONIC_DEFAULT_ALLOCATOR; + Allocator alloc_; + Allocator& GetAllocator() { return alloc_; } + bool StartArray() { return true; } + bool EndArray(size_t) { return true; } + bool StartObject() { return true; } + bool EndObject(size_t) { return true; } + bool Key(const char*, size_t, size_t) { return true; } + bool Raw(const char*, size_t) { return true; } +}; + +TEST(ParseLazy, AcceptAllStillCompletesCleanly) { + AcceptAllLazySax sax; + Parser p; + const char* j = R"({"a":1,"b":[2,3]})"; + auto buf = pad_json_bytes(j, std::strlen(j)); + auto r = p.ParseLazy(buf.data(), std::strlen(j), sax); + EXPECT_EQ(r.Error(), kErrorNone); +} + +struct StringKeyCountingSAX { + int string_calls = 0; + int key_calls = 0; + bool Null() { return true; } + bool Bool(bool) { return true; } + bool Int(int64_t) { return true; } + bool Uint(uint64_t) { return true; } + bool Double(double) { return true; } + bool NumStr(StringView) { return true; } + bool Raw(const char*, size_t) { return true; } + bool Key(StringView) { + ++key_calls; + return true; + } + bool String(StringView) { + ++string_calls; + return true; + } + bool StartArray() { return true; } + bool EndArray(uint32_t) { return true; } + bool StartObject() { return true; } + bool EndObject(uint32_t) { return true; } +}; + +TEST(Parser, InvalidSurrogateInValueDoesNotInvokeStringCallback) { + const char* json = R"(["\uDC00"])"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + StringKeyCountingSAX sax; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_TRUE(res.Error() != kErrorNone); + EXPECT_EQ(0, sax.string_calls); +} + +TEST(Parser, InvalidSurrogateInKeyDoesNotInvokeKeyCallback) { + const char* json = R"({"\uDC00":1})"; + auto buf = pad_json_for_parser(json, std::strlen(json)); + StringKeyCountingSAX sax; + Parser p; + auto res = p.Parse(buf.data(), std::strlen(json), sax); + EXPECT_TRUE(res.Error() != kErrorNone); + EXPECT_EQ(0, sax.key_calls); +} + +} // namespace diff --git a/tests/writebuffer_test.cpp b/tests/writebuffer_test.cpp index 84c9129f..072ab5bb 100644 --- a/tests/writebuffer_test.cpp +++ b/tests/writebuffer_test.cpp @@ -18,6 +18,8 @@ #include +#include "sonic/internal/stack.h" + namespace { using namespace sonic_json; @@ -78,6 +80,17 @@ TEST(WriteBuffer, ToString) { wb.Push('c'); EXPECT_STREQ(wb.ToString(), "c"); } + // ToString()/ToStringView() must be callable on a const WriteBuffer&: + // they write the terminator into pre-reserved slack via a `mutable` + // stack, so the logical observable state does not change. This + // preserves v1.x source compatibility for callers that hold a const + // reference (e.g. CRTP str_impl() methods). + static_assert( + std::is_invocable_v, + "ToString() must be callable on a const WriteBuffer"); + static_assert(std::is_invocable_v, + "ToStringView() must be callable on a const WriteBuffer"); { const WriteBuffer cwb; EXPECT_STREQ(cwb.ToString(), ""); @@ -90,6 +103,36 @@ TEST(WriteBuffer, ToString) { } } +// Reserve must not update cap_ when realloc fails: callers check +// Size() < Capacity() to decide whether writes fit in the backing buffer. +TEST(Stack, ReservePreservesCapOnOom) { + internal::Stack s(256); + ASSERT_EQ(256u, s.Capacity()); + constexpr size_t kHuge = static_cast(1) << 60; + s.Reserve(kHuge); + EXPECT_EQ(256u, s.Capacity()); +} + +// ToString's fast path must not reallocate when one slack byte already +// covers the terminator: Grow(1) would otherwise fire at Size == Cap - 1 +// and invalidate any pointer a prior ToString()/Begin() handed out. +TEST(WriteBuffer, ToStringIsIdempotentWhenCapacityHasSlack) { + WriteBuffer wb(16); + const size_t cap_before = wb.Capacity(); + ASSERT_EQ(cap_before, 16u); + // Per-char pushes avoid Push(s, n)'s Grow(n+1), landing on Size == Cap - 1. + const char* text = "abcdefghijklmno"; + for (size_t i = 0; i < 15; ++i) wb.Push(text[i]); + ASSERT_EQ(wb.Size(), 15u); + ASSERT_EQ(wb.Capacity(), cap_before); + + const char* p1 = wb.ToString(); + EXPECT_STREQ(p1, "abcdefghijklmno"); + EXPECT_EQ(wb.Capacity(), cap_before); + const char* p2 = wb.ToString(); + EXPECT_EQ(p1, p2); +} + TEST(WriteBuffer, StringSize) { { WriteBuffer wb;