@@ -809,23 +809,72 @@ static void jb_free(JsonBuf* jb) {
809809static void jb_append_json_string (JsonBuf * jb , const char * s ) {
810810 jb_append_char (jb , '"' );
811811 if (!s ) s = "" ;
812- for (const unsigned char * p = (const unsigned char * )s ; * p ; p ++ ) {
812+ const unsigned char * p = (const unsigned char * )s ;
813+ while (* p ) {
813814 unsigned char c = * p ;
814- switch (c ) {
815- case '"' : jb_append_str (jb , "\\\"" ); break ;
816- case '\\' : jb_append_str (jb , "\\\\" ); break ;
817- case '\b' : jb_append_str (jb , "\\b" ); break ;
818- case '\f' : jb_append_str (jb , "\\f" ); break ;
819- case '\n' : jb_append_str (jb , "\\n" ); break ;
820- case '\r' : jb_append_str (jb , "\\r" ); break ;
821- case '\t' : jb_append_str (jb , "\\t" ); break ;
822- default :
823- if (c < 0x20 || c >= 0x7f ) {
824- jb_append_fmt (jb , "\\u%04x" , (unsigned int )c );
825- } else {
826- jb_append_char (jb , (unsigned char )c );
827- }
828- break ;
815+ /* Common single-byte escapes */
816+ if (c == '"' ) { jb_append_str (jb , "\\\"" ); p ++ ; continue ; }
817+ if (c == '\\' ) { jb_append_str (jb , "\\\\" ); p ++ ; continue ; }
818+ if (c == '\b' ) { jb_append_str (jb , "\\b" ); p ++ ; continue ; }
819+ if (c == '\f' ) { jb_append_str (jb , "\\f" ); p ++ ; continue ; }
820+ if (c == '\n' ) { jb_append_str (jb , "\\n" ); p ++ ; continue ; }
821+ if (c == '\r' ) { jb_append_str (jb , "\\r" ); p ++ ; continue ; }
822+ if (c == '\t' ) { jb_append_str (jb , "\\t" ); p ++ ; continue ; }
823+
824+ /* Control characters must be escaped as \u00xx */
825+ if (c < 0x20 ) {
826+ jb_append_fmt (jb , "\\u%04x" , (unsigned int )c );
827+ p ++ ;
828+ continue ;
829+ }
830+
831+ /* Decode UTF-8 sequence into a Unicode code point. On invalid
832+ * sequences emit U+FFFD. For BMP code points emit \uHHHH, for
833+ * beyond-BMP emit \UHHHHHHHH per the specification. */
834+ uint32_t codepoint = 0 ;
835+ size_t seq_len = 0 ;
836+ if (c < 0x80 ) {
837+ codepoint = c ;
838+ seq_len = 1 ;
839+ } else if ((c & 0xE0 ) == 0xC0 ) {
840+ if (p [1 ] != '\0' && (p [1 ] & 0xC0 ) == 0x80 ) {
841+ codepoint = ((uint32_t )(c & 0x1F ) << 6 ) | (uint32_t )(p [1 ] & 0x3F );
842+ seq_len = 2 ;
843+ if (codepoint < 0x80 ) seq_len = 0 ; /* overlong */
844+ }
845+ } else if ((c & 0xF0 ) == 0xE0 ) {
846+ if (p [1 ] != '\0' && p [2 ] != '\0' && (p [1 ] & 0xC0 ) == 0x80 && (p [2 ] & 0xC0 ) == 0x80 ) {
847+ codepoint = ((uint32_t )(c & 0x0F ) << 12 ) | ((uint32_t )(p [1 ] & 0x3F ) << 6 ) | (uint32_t )(p [2 ] & 0x3F );
848+ seq_len = 3 ;
849+ if (codepoint < 0x800 ) seq_len = 0 ; /* overlong */
850+ }
851+ } else if ((c & 0xF8 ) == 0xF0 ) {
852+ if (p [1 ] != '\0' && p [2 ] != '\0' && p [3 ] != '\0' && (p [1 ] & 0xC0 ) == 0x80 && (p [2 ] & 0xC0 ) == 0x80 && (p [3 ] & 0xC0 ) == 0x80 ) {
853+ codepoint = ((uint32_t )(c & 0x07 ) << 18 ) | ((uint32_t )(p [1 ] & 0x3F ) << 12 ) | ((uint32_t )(p [2 ] & 0x3F ) << 6 ) | (uint32_t )(p [3 ] & 0x3F );
854+ seq_len = 4 ;
855+ if (codepoint < 0x10000 || codepoint > 0x10FFFF ) seq_len = 0 ; /* overlong or out of range */
856+ }
857+ }
858+
859+ if (seq_len == 0 ) {
860+ /* invalid UTF-8 -> replacement character */
861+ codepoint = 0xFFFD ;
862+ p ++ ;
863+ } else {
864+ /* reject surrogate halves */
865+ if (codepoint >= 0xD800 && codepoint <= 0xDFFF ) {
866+ codepoint = 0xFFFD ;
867+ } else {
868+ p += seq_len ;
869+ }
870+ }
871+
872+ if (codepoint < 0x80 ) {
873+ jb_append_char (jb , (char )codepoint );
874+ } else if (codepoint <= 0xFFFF ) {
875+ jb_append_fmt (jb , "\\u%04x" , (unsigned int )codepoint );
876+ } else {
877+ jb_append_fmt (jb , "\\U%08x" , (unsigned int )codepoint );
829878 }
830879 }
831880 jb_append_char (jb , '"' );
0 commit comments