diff --git a/.wolfssl_known_macro_extras b/.wolfssl_known_macro_extras
index 50377597a8..00dc36c5b7 100644
--- a/.wolfssl_known_macro_extras
+++ b/.wolfssl_known_macro_extras
@@ -196,6 +196,7 @@ CONFIG_WOLFSSL_TLS_VERSION_1_3
CONFIG_WOLFTPM
CONFIG_WOLFTPM_EXAMPLE_NAME_ESPRESSIF
CONFIG_X86
+CONFIG_X86_32
CONV_WITH_DIV
CPA_CY_API_VERSION_NUM_MAJOR
CPA_CY_API_VERSION_NUM_MINOR
@@ -246,6 +247,7 @@ ETHERNET_H
EV_TRIGGER
EXTERNAL_LOADER_APP
FD_CLOEXEC
+FIPS_CODE_REVIEW
FIPS_OPTEST_FULL_RUN_AT_MODULE_INIT
FORCE_FAILURE_GETRANDOM
FP_ECC_CONTROL
diff --git a/IDE/WIN-SRTP-KDF-140-3/test.vcxproj b/IDE/WIN-SRTP-KDF-140-3/test.vcxproj
index a41ff9ac49..2429f2fe9b 100644
--- a/IDE/WIN-SRTP-KDF-140-3/test.vcxproj
+++ b/IDE/WIN-SRTP-KDF-140-3/test.vcxproj
@@ -162,7 +162,13 @@
true
true
UseLinkTimeCodeGeneration
+
false
+ true
@@ -177,6 +183,10 @@
true
+
+ false
+ true
Console
ws2_32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
true
diff --git a/IDE/WIN-SRTP-KDF-140-3/wolfssl-fips.vcxproj b/IDE/WIN-SRTP-KDF-140-3/wolfssl-fips.vcxproj
index 65bb39fffa..390b38f0e9 100644
--- a/IDE/WIN-SRTP-KDF-140-3/wolfssl-fips.vcxproj
+++ b/IDE/WIN-SRTP-KDF-140-3/wolfssl-fips.vcxproj
@@ -314,6 +314,16 @@
+
+
+
+
+
+
+
+
+
diff --git a/configure.ac b/configure.ac
index 2e9fe76069..ad666be2e2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1502,6 +1502,15 @@ then
enable_aesgcm_stream=no
fi
+# FIPS 140-3 v7 guard support: capture whether DSA/DH were EXPLICITLY requested
+# (--enable-dsa / --enable-dh) here, before any default (enable_dsa=yes path,
+# --enable-all) sets them. The v7 out-of-scope guard hard-errors only on an
+# explicit request, otherwise silently forces the algorithm off. (FIPS 186-5
+# retires DSA; classic finite-field DH and DSA are out of scope for the FIPS
+# 140-3 v7 PQ module.)
+explicit_enable_dsa="$enable_dsa"
+explicit_enable_dh="$enable_dh"
+
# All wolfCrypt features:
AC_ARG_ENABLE([all-crypto],
[AS_HELP_STRING([--enable-all-crypto],[Enable all wolfcrypt algorithms (default: disabled)])],
@@ -3781,13 +3790,21 @@ then
AC_MSG_NOTICE([32bit ARMv4 found])
;;
*)
- AM_CPPFLAGS="$AM_CPPFLAGS -mfpu=crypto-neon-fp-armv8 -marm"
+ # AArch32 ARMv8 crypto-extension asm (armv8-32-*-asm.S: sha256h,
+ # aese/aesmc, pmull) needs an explicit -march=armv8-a+crypto: the
+ # ARMv8-A crypto extension is OPTIONAL, so "+crypto" gates these
+ # instructions -- not -mfpu alone, nor a bare -march=armv8-a (nor
+ # -mcpu=cortex-a53 on some toolchains). Cross toolchains defaulting
+ # to ARMv7 (e.g. Xilinx Vitis cortex-a9) otherwise reject them:
+ # "selected processor does not support sha256h.32 in ARM mode".
+ # Mirrors the in-kernel ARM armasm enablement (port/arm/*.S crypto).
+ AM_CPPFLAGS="$AM_CPPFLAGS -march=armv8-a+crypto -mfpu=crypto-neon-fp-armv8 -marm"
# Include options.h
AM_CCASFLAGS="$AM_CCASFLAGS -DEXTERNAL_OPTS_OPENVPN"
ENABLED_ARMASM_CRYPTO=yes
ENABLED_ARMASM_NEON=yes
ENABLED_ARM_32=yes
- AC_MSG_NOTICE([32bit ARMv8 found, setting mfpu to crypto-neon-fp-armv8])
+ AC_MSG_NOTICE([32bit ARMv8 found, setting -march=armv8-a+crypto + mfpu=crypto-neon-fp-armv8])
;;
esac
esac
@@ -5806,6 +5823,13 @@ if test "x$ENABLED_WPAS" = "xyes" || test "x$ENABLED_NGINX" = "xyes" || \
then
ENABLED_ANON=yes
fi
+# Anonymous ciphers require classic DH (enforced below and by settings.h:
+# HAVE_ANON => DH). DH is out of scope only for the LOCKED FIPS 140-3 v7
+# module, so force anon off there however it was enabled (--enable-all, a TLS-
+# integration option like wpas/nginx, or --enable-anon). dev/ready keep DH
+# (and thus anon) when --enable-all/--enable-dh ask.
+AS_IF([test "$FIPS_VERSION" = "v7"],
+ [ENABLED_ANON=no])
if test "x$ENABLED_ANON" = "xyes"
then
if test "$ENABLED_DH" = "no"
@@ -6349,13 +6373,7 @@ AS_CASE([$FIPS_VERSION],
-DWC_RSA_NO_PADDING \
-DECC_USER_CURVES \
-DHAVE_ECC384 \
- -DHAVE_ECC521 \
- -DWOLFSSL_VALIDATE_FFC_IMPORT \
- -DHAVE_FFDHE_Q \
- -DHAVE_FFDHE_3072 \
- -DHAVE_FFDHE_4096 \
- -DHAVE_FFDHE_6144 \
- -DHAVE_FFDHE_8192"
+ -DHAVE_ECC521"
# KCAPI API does not support custom k for sign, don't force enable ECC key sizes and don't use seed callback
AS_IF([test "x$ENABLED_KCAPI_ECC" = "xno"],
@@ -6369,6 +6387,28 @@ AS_CASE([$FIPS_VERSION],
-DHAVE_ECC256"])
DEFAULT_MAX_CLASSIC_ASYM_KEY_BITS=8192
+
+# Classic finite-field DH and DSA scope by FIPS mode (FIPS 186-5 retires DSA;
+# the v7 boundary keeps only ECDH/ECDSA + PQ KEM/DSA):
+# v7 -- LOCKED release module: DH/DSA OUT OF SCOPE. Hard-error on an
+# explicit --enable-dh/--enable-dsa, otherwise force off.
+# dev/ready -- pre-release: DH/DSA OFF BY DEFAULT but turn-on-able
+# (--enable-all, --enable-dh/--enable-dsa) for test coverage and
+# v6 migration.
+ AS_IF([test "$FIPS_VERSION" = "v7"],
+ [AS_IF([test "$explicit_enable_dh" = "yes"],
+ [AC_MSG_ERROR([--enable-dh is not supported with --enable-fips=v7. Classic finite-field DH is out of scope for the FIPS 140-3 v7 PQ module. Use --enable-fips=v6 if you need DH support.])],
+ [test "$ENABLED_DH" != "no"],
+ [ENABLED_DH="no"; enable_dh="no"; AM_CFLAGS="$AM_CFLAGS -DNO_DH"])
+ AS_IF([test "$explicit_enable_dsa" = "yes"],
+ [AC_MSG_ERROR([--enable-dsa is not supported with --enable-fips=v7. DSA is retired by FIPS 186-5 and is out of scope for the FIPS 140-3 v7 PQ module. Use --enable-fips=v6 if you need DSA support.])],
+ [test "$ENABLED_DSA" != "no"],
+ [ENABLED_DSA="no"; enable_dsa="no"; AM_CFLAGS="$AM_CFLAGS -DNO_DSA"])],
+ [AS_IF([test "$explicit_enable_dh" != "yes" && test "x$enable_all" != "xyes"],
+ [ENABLED_DH="no"; enable_dh="no"; AM_CFLAGS="$AM_CFLAGS -DNO_DH"])
+ AS_IF([test "$explicit_enable_dsa" != "yes" && test "x$enable_all" != "xyes"],
+ [ENABLED_DSA="no"; enable_dsa="no"; AM_CFLAGS="$AM_CFLAGS -DNO_DSA"])])
+
# optimizations section
# protocol section
@@ -8926,8 +8966,16 @@ then
fi
if test "x$ENABLED_DH" = "xno"
then
- ENABLED_DH="yes"
- AM_CFLAGS="$AM_CFLAGS -DHAVE_DH"
+ # Classic DH is out of scope for the FIPS 140-3 v7 PQ module. JNI
+ # normally auto-enables DH for legacy TLS suites; with FIPS v7+ we
+ # report and skip rather than silently re-enabling DH off-boundary.
+ if test "$FIPS_VERSION" = "v7" || test "$FIPS_VERSION" = "ready" || test "$FIPS_VERSION" = "dev"
+ then
+ AC_MSG_NOTICE([JNI enabled but FIPS is $FIPS_VERSION, NOT turning on DH with this module])
+ else
+ ENABLED_DH="yes"
+ AM_CFLAGS="$AM_CFLAGS -DHAVE_DH"
+ fi
fi
if test "x$ENABLED_PSK" = "xno"
then
diff --git a/fips-hash.sh b/fips-hash.sh
index 36f320c0bb..309be837e8 100755
--- a/fips-hash.sh
+++ b/fips-hash.sh
@@ -13,7 +13,10 @@ then
fi
OUT=$(./wolfcrypt/test/testwolfcrypt | sed -n 's/hash = \(.*\)/\1/p')
-NEWHASH=$(echo "$OUT" | cut -c1-64)
+# Take the whole hash: FIPS v7.0.0+ is HMAC-SHA-512 (128 hex), older is
+# HMAC-SHA-256 (64 hex). static_assert on sizeof(verifyCore) catches a
+# wrong length at compile time.
+NEWHASH=$(echo "$OUT" | head -n1 | tr -d '[:space:]')
if test -n "$NEWHASH"
then
cp wolfcrypt/src/fips_test.c wolfcrypt/src/fips_test.c.bak
diff --git a/linuxkm/Kbuild b/linuxkm/Kbuild
index e5974e4a93..eb77097a95 100644
--- a/linuxkm/Kbuild
+++ b/linuxkm/Kbuild
@@ -111,6 +111,16 @@ $(LIBWOLFSSL_NAME)-y := $(WOLFSSL_OBJ_FILES) linuxkm/module_hooks.o linuxkm/modu
ifeq "$(FIPS_OPTEST)" "1"
$(LIBWOLFSSL_NAME)-y += linuxkm/optest-140-3/linuxkm_optest_wrapper.o
+ # The optest wrapper (#includes test.c / invalid_tests.c) aggregates several
+ # AES contexts per invalid-input test fn. Under WOLFSSL_AESNI the Aes
+ # struct carries an inline ALIGN16 streamData[5*WC_AES_BLOCK_SIZE]
+ # (wolfssl/wolfcrypt/aes.h) plus use_aesni, so the
+ # aes_{,mac_,ofb_,cfb_,kw_}invalid_data_tests fns exceed the i386
+ # THREAD_SIZE/4 = 2048 frame *warning* (x86_64's 4096 already fits; runtime
+ # is fine in a kernel thread). Relax to 4096 for the wrapper ONLY: it is
+ # test/evidence tooling OUTSIDE the FIPS module boundary, so module objects
+ # keep the strict MAX_STACK_FRAME_SIZE. No effect on x86_64 (default 4096).
+ $(obj)/linuxkm/optest-140-3/linuxkm_optest_wrapper.o: ccflags-y += -Wframe-larger-than=4096
endif
WOLFSSL_CFLAGS_NO_VECTOR_INSNS := $(CFLAGS_SIMD_DISABLE) $(CFLAGS_FPU_DISABLE)
@@ -139,6 +149,10 @@ ifeq "$(ENABLED_LINUXKM_PIE)" "yes"
endif
endif
endif
+ ifeq ($(KERNEL_ARCH),i386)
+ NO_PIE_FLAG := 1
+ $(info Note: disabling -fPIE on 32-bit x86 -- i386 -fPIE routes every local symbol through the GOT (R_386_GOTOFF), which the wolfCrypt PIE containerization forbids.)
+ endif
endif
ifdef NO_PIE_FLAG
@@ -231,6 +245,24 @@ $(obj)/wolfcrypt/src/wc_mlkem_asm.o: OBJECT_FILES_NON_STANDARD := y
$(obj)/wolfcrypt/src/wc_mldsa_asm.o: asflags-y := $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_DISABLE_SIMD_ENABLE)
$(obj)/wolfcrypt/src/wc_mldsa_asm.o: OBJECT_FILES_NON_STANDARD := y
+# ARM/ARM64 crypto+NEON asm (wolfcrypt/src/port/arm/*.S) needs crypto/NEON
+# -march at assembly time. The asm carries no .arch/.fpu directives and
+# configure leaves ASFLAGS_*_SIMD_ENABLE empty on ARM: userspace inherits
+# +crypto from the toolchain -mcpu, but the kernel forces its own baseline
+# -march without it, so AES/SHA/PMULL are rejected ("selected processor does not
+# support `aesd ...'"). Supply the right -march per kernel arch. (Wrong-arch
+# port/arm files are #ifdef'd to empty objects, so it is a no-op for them;
+# OBJECT_FILES_NON_STANDARD silences objtool on the hand asm.)
+ifeq ($(CONFIG_ARM64),y)
+ WOLFSSL_ARM_ASM_MARCH := -march=armv8-a+crypto
+else ifeq ($(CONFIG_ARM),y)
+ WOLFSSL_ARM_ASM_MARCH := -march=armv8-a -mfpu=crypto-neon-fp-armv8
+endif
+ifdef WOLFSSL_ARM_ASM_MARCH
+$(obj)/wolfcrypt/src/port/arm/%.o: asflags-y := $(WOLFSSL_ASFLAGS) $(WOLFSSL_ARM_ASM_MARCH)
+$(obj)/wolfcrypt/src/port/arm/%.o: OBJECT_FILES_NON_STANDARD := y
+endif
+
ifndef READELF
READELF := readelf
endif
@@ -339,7 +371,7 @@ RENAME_PIE_TEXT_AND_DATA_SECTIONS := \
next; \
} \
else if ($$4 == "OBJECT") { \
- if (! ($$7 in wolfcrypt_data_sections)) { \
+ if (! ($$7 in wolfcrypt_data_sections) && ! ($$7 in wolfcrypt_text_sections)) { \
if ((other_sections[$$7] == ".printk_index") || \
(($$8 ~ /^_entry\.[0-9]+$$|^kernel_read_file_str$$/) && \
(other_sections[$$7] == ".data.rel.ro.local"))) \
diff --git a/linuxkm/Makefile b/linuxkm/Makefile
index 24a867b935..cccddf0cf5 100644
--- a/linuxkm/Makefile
+++ b/linuxkm/Makefile
@@ -45,7 +45,14 @@ ifndef SRC_TOP
SRC_TOP=$(shell dirname $(MODULE_TOP))
endif
-WOLFSSL_CFLAGS=-DHAVE_CONFIG_H -I$(SRC_TOP) -DBUILDING_WOLFSSL $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -Wno-declaration-after-statement -Wno-redundant-decls -DLIBWOLFSSL_GLOBAL_EXTRA_CFLAGS="\" $(KERNEL_EXTRA_CFLAGS)\""
+# -Wno-nested-externs: the kernel's _compiletime_assert (,
+# via atomic/per-CPU/printk-once macros in x86_vector_register_glue.c) emits an
+# "extern ... __compiletime_assert_N(void)" inside a function body, tripping
+# -Wnested-externs; with -Werror this breaks the i386 + AES-NI build (glue compiles
+# only when WOLFSSL_USE_SAVE_VECTOR_REGISTERS / a PAA is enabled). Strip it
+# build-wide like the other kernel-incompatible warnings; diagnostic-only, so object
+# code (and the FIPS in-core hash) stays byte-identical on every arch.
+WOLFSSL_CFLAGS=-DHAVE_CONFIG_H -I$(SRC_TOP) -DBUILDING_WOLFSSL $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -Wno-declaration-after-statement -Wno-redundant-decls -Wno-nested-externs -DLIBWOLFSSL_GLOBAL_EXTRA_CFLAGS="\" $(KERNEL_EXTRA_CFLAGS)\""
ifdef KERNEL_EXTRA_CFLAGS
WOLFSSL_CFLAGS += $(KERNEL_EXTRA_CFLAGS)
endif
@@ -55,7 +62,12 @@ endif
WOLFSSL_ASFLAGS=-DHAVE_CONFIG_H -I$(SRC_TOP) -DBUILDING_WOLFSSL $(AM_CCASFLAGS) $(CCASFLAGS)
-WOLFSSL_OBJ_FILES=$(patsubst %.lo, %.o, $(patsubst src/src_libwolfssl_la-%, src/%, $(patsubst src/libwolfssl_la-%, src/%, $(patsubst wolfcrypt/src/src_libwolfssl_la-%, wolfcrypt/src/%, $(src_libwolfssl_la_OBJECTS)))))
+# Strip libtool's per-target object prefix (src_libwolfssl_la-) so Kbuild sees the
+# real object names. The innermost patsubst covers wolfcrypt/src/port/arm/ asm
+# (armv8-*/armv8-32-*/thumb2-*), which the src/ and wolfcrypt/src/ patsubsts miss;
+# without it --enable-armasm kernel builds fail ("No rule to make target
+# .../src_libwolfssl_la-armv8-aes-asm.o").
+WOLFSSL_OBJ_FILES=$(patsubst %.lo, %.o, $(patsubst src/src_libwolfssl_la-%, src/%, $(patsubst src/libwolfssl_la-%, src/%, $(patsubst wolfcrypt/src/src_libwolfssl_la-%, wolfcrypt/src/%, $(patsubst wolfcrypt/src/port/arm/src_libwolfssl_la-%, wolfcrypt/src/port/arm/%, $(src_libwolfssl_la_OBJECTS))))))
ifeq "$(ENABLED_CRYPT_TESTS)" "yes"
WOLFSSL_OBJ_FILES+=wolfcrypt/test/test.o
@@ -160,7 +172,7 @@ GENERATE_RELOC_TAB := $(AWK) ' \
function open_seg(seg) { \
seen_seg[seg] = 1; \
printf("%s\n ", \
- "WOLFSSL_LOCAL const struct wc_reloc_table_ent wc_linuxkm_pie_" seg "_reloc_tab[] = { "); \
+ "WOLFSSL_LOCAL const struct wc_reloc_table_ent wc_linuxkm_pie_" seg "_reloc_tab[] = {"); \
cur_seg = seg; \
} \
function close_cur_seg() { \
@@ -197,7 +209,7 @@ GENERATE_RELOC_TAB := $(AWK) ' \
next; \
} \
/^0/ { \
- if ($$3 !~ "^(R_X86_.*|R_AARCH64_.*|R_ARM.*)$$") { \
+ if ($$3 !~ "^(R_X86_.*|R_386_.*|R_AARCH64_.*|R_ARM.*)$$") { \
print "Unexpected relocation type in " cur_seg ":\n" $$0 >"/dev/stderr"; \
++bad_relocs; \
} \
@@ -361,12 +373,12 @@ module-update-fips-hash: $(LIBWOLFSSL_NAME).ko
readarray -t verifyCore_attrs < <($(READELF) --wide --symbols "$<" | \
sed -E -n 's/^[[:space:]]*[0-9]+: ([0-9a-fA-F]+)[[:space:]]+([0-9]+)[[:space:]]+OBJECT[[:space:]]+[A-Z]+[[:space:]]+[A-Z]+[[:space:]]+'"$${rodata_segment[0]}"'[[:space:]]+verifyCore$$/\1\n\2/p'); \
if [[ $${#verifyCore_attrs[@]} != 2 ]]; then echo ' unexpected verifyCore_attrs.' >&2; exit 1; fi; \
- if [[ "$${verifyCore_attrs[1]}" != "65" ]]; then echo " verifyCore has unexpected length $${verifyCore_attrs[1]}." >&2; exit 1; fi; \
+ if [[ "$${verifyCore_attrs[1]}" != "129" ]]; then echo " verifyCore has unexpected length $${verifyCore_attrs[1]}." >&2; exit 1; fi; \
verifyCore_offset=$$((0x$${rodata_segment[1]} + 0x$${verifyCore_attrs[0]})); \
- current_verifyCore=$$(dd bs=1 if="$<" skip=$$verifyCore_offset count=64 status=none); \
+ current_verifyCore=$$(dd bs=1 if="$<" skip=$$verifyCore_offset count=128 status=none); \
if [[ ! "$$current_verifyCore" =~ [0-9a-fA-F]{64} ]]; then echo " verifyCore at offset $$verifyCore_offset has unexpected value." >&2; exit 1; fi; \
if [[ '$(FIPS_HASH)' == "$$current_verifyCore" ]]; then echo ' Supplied FIPS_HASH matches existing verifyCore -- no update needed.'; exit 0; fi; \
- echo -n '$(FIPS_HASH)' | dd bs=1 conv=notrunc of="$<" seek=$$verifyCore_offset count=64 status=none && \
+ echo -n '$(FIPS_HASH)' | dd bs=1 conv=notrunc of="$<" seek=$$verifyCore_offset count=128 status=none && \
echo " FIPS verifyCore updated successfully." && \
if [[ -f '$(LIBWOLFSSL_NAME).ko.signed' ]]; then $(MAKE) $(QFLAG) --no-print-directory --no-silent -C . '$(LIBWOLFSSL_NAME).ko.signed'; fi
diff --git a/linuxkm/linuxkm_memory.c b/linuxkm/linuxkm_memory.c
index 2f1b75e112..fc81280df1 100644
--- a/linuxkm/linuxkm_memory.c
+++ b/linuxkm/linuxkm_memory.c
@@ -52,6 +52,8 @@ static const struct reloc_layout_ent {
[WC_R_X86_64_64] = { "R_X86_64_64", ~0UL, 64, .is_signed = 0, .is_relative = 0 },
[WC_R_X86_64_PC32] = { "R_X86_64_PC32", ~0UL, 32, .is_signed = 1, .is_relative = 1 },
[WC_R_X86_64_PLT32] = { "R_X86_64_PLT32", ~0UL, 32, .is_signed = 1, .is_relative = 1 },
+ [WC_R_386_32] = { "R_386_32", ~0UL, 32, .is_signed = 0, .is_relative = 0 },
+ [WC_R_386_PC32] = { "R_386_PC32", ~0UL, 32, .is_signed = 1, .is_relative = 1 },
[WC_R_AARCH64_ABS32] = { "R_AARCH64_ABS32", ~0UL, 32, .is_signed = 1, .is_relative = 0, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 },
[WC_R_AARCH64_ABS64] = { "R_AARCH64_ABS64", ~0UL, 64, .is_signed = 1, .is_relative = 0, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 },
[WC_R_AARCH64_ADD_ABS_LO12_NC] = { "R_AARCH64_ADD_ABS_LO12_NC", 0b00000000001111111111110000000000, 32, .is_signed = 0, .is_relative = 0, .is_pages = 0, .is_pair_lo = 1, .is_pair_hi = 0 },
@@ -64,6 +66,10 @@ static const struct reloc_layout_ent {
[WC_R_AARCH64_LDST64_ABS_LO12_NC] = { "R_AARCH64_LDST64_ABS_LO12_NC", 0b00000000001111111111110000000000, 32, .is_signed = 0, .is_relative = 0, .is_pages = 0, .is_pair_lo = 1, .is_pair_hi = 0 },
[WC_R_AARCH64_PREL32] = { "R_AARCH64_PREL32", ~0UL, 32, .is_signed = 1, .is_relative = 1, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 },
[WC_R_ARM_ABS32] = { "R_ARM_ABS32", ~0UL, 32, .is_signed = 0, .is_relative = 0, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 },
+ /* ARM-mode BL/B: signed 24-bit word offset in bits [23:0] (cf. AARCH64_CALL26's
+ * 26-bit field). Emitted by the arm32 ARM-mode (non-Thumb) kernel module build. */
+ [WC_R_ARM_CALL] = { "R_ARM_CALL", 0b00000000111111111111111111111111, 32, .is_signed = 1, .is_relative = 1, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 },
+ [WC_R_ARM_JUMP24] = { "R_ARM_JUMP24", 0b00000000111111111111111111111111, 32, .is_signed = 1, .is_relative = 1, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 },
[WC_R_ARM_PREL31] = { "R_ARM_PREL31", 0b01111111111111111111111111111111, 32, .is_signed = 1, .is_relative = 1, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 },
[WC_R_ARM_REL32] = { "R_ARM_REL32", ~0UL, 32, .is_signed = 1, .is_relative = 1, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 },
[WC_R_ARM_THM_CALL] = { "R_ARM_THM_CALL", 0b00000111111111110010111111111111, 32, .is_signed = 1, .is_relative = 1, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 },
@@ -366,6 +372,12 @@ ssize_t wc_reloc_normalize_segment(
case WC_R_X86_64_32:
case WC_R_X86_64_32S:
case WC_R_X86_64_64:
+ /* i386 reuses the x86_64 path: math is driven by
+ * layout->is_relative/is_signed and stays width-correct via
+ * uintptr_t (32-bit on i386). R_386_32 is absolute
+ * (is_relative=0); R_386_PC32 is PC-relative (is_relative=1). */
+ case WC_R_386_32:
+ case WC_R_386_PC32:
if (dest_seg != WC_R_SEG_OTHER) {
#ifdef DEBUG_LINUXKM_PIE_SUPPORT
@@ -413,6 +425,8 @@ ssize_t wc_reloc_normalize_segment(
break;
case WC_R_ARM_ABS32:
+ case WC_R_ARM_CALL:
+ case WC_R_ARM_JUMP24:
case WC_R_ARM_PREL31:
case WC_R_ARM_REL32:
case WC_R_ARM_THM_CALL:
diff --git a/linuxkm/linuxkm_memory.h b/linuxkm/linuxkm_memory.h
index 76e681da80..1c8ef3f662 100644
--- a/linuxkm/linuxkm_memory.h
+++ b/linuxkm/linuxkm_memory.h
@@ -40,6 +40,11 @@ enum wc_reloc_type {
WC_R_X86_64_64,
WC_R_X86_64_PC32,
WC_R_X86_64_PLT32,
+ /* 32-bit x86 (i386): with NO_PIE_FLAG the container emits only R_386_32
+ * (absolute) and R_386_PC32 (PC-relative), equivalent to R_X86_64_32 /
+ * R_X86_64_PC32 and sharing their canonicalization case below. */
+ WC_R_386_32,
+ WC_R_386_PC32,
WC_R_AARCH64_ABS32,
WC_R_AARCH64_ABS64,
WC_R_AARCH64_ADD_ABS_LO12_NC,
@@ -52,6 +57,8 @@ enum wc_reloc_type {
WC_R_AARCH64_LDST64_ABS_LO12_NC,
WC_R_AARCH64_PREL32,
WC_R_ARM_ABS32,
+ WC_R_ARM_CALL,
+ WC_R_ARM_JUMP24,
WC_R_ARM_PREL31,
WC_R_ARM_REL32,
WC_R_ARM_THM_CALL,
diff --git a/linuxkm/linuxkm_wc_port.h b/linuxkm/linuxkm_wc_port.h
index 0e64c14be7..5f06206cc5 100644
--- a/linuxkm/linuxkm_wc_port.h
+++ b/linuxkm/linuxkm_wc_port.h
@@ -712,8 +712,11 @@
#define WOLFSSL_USE_SAVE_VECTOR_REGISTERS
#endif
+ /* x86 (kernel_fpu_*) and ARM/ARM64 (kernel_neon_*) share the arch-neutral
+ * tracker in x86_vector_register_glue.c; the glue keeps its wc_*_x86 names
+ * on all arches (outside-boundary glue via the PIE redirect table). */
#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && \
- defined(CONFIG_X86)
+ (defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64))
extern __must_check int allocate_wolfcrypt_linuxkm_fpu_states(void);
extern void free_wolfcrypt_linuxkm_fpu_states(void);
@@ -721,18 +724,23 @@
WOLFSSL_API __must_check int wc_save_vector_registers_x86(enum wc_svr_flags flags);
WOLFSSL_API void wc_restore_vector_registers_x86(enum wc_svr_flags flags);
- #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
- #include
- #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
- /* added by a62b01cd6c */
- #include
- #endif
- #else
- #include
- #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
- /* added by 266d051601 */
- #include
+ #ifdef CONFIG_X86
+ #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
+ #include
+ #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+ /* added by a62b01cd6c */
+ #include
+ #endif
+ #else
+ #include
+ #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+ /* added by 266d051601 */
+ #include
+ #endif
#endif
+ #else /* CONFIG_ARM || CONFIG_ARM64 */
+ #include /* may_use_simd() */
+ #include /* kernel_neon_begin() / kernel_neon_end() */
#endif
#ifndef CAN_SAVE_VECTOR_REGISTERS
#define CAN_SAVE_VECTOR_REGISTERS() wc_can_save_vector_registers_x86()
@@ -763,42 +771,6 @@
#define REENABLE_VECTOR_REGISTERS() wc_restore_vector_registers_x86(WC_SVR_FLAG_INHIBIT)
#endif
- #elif defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && (defined(CONFIG_ARM) || defined(CONFIG_ARM64))
-
- #error kernel module ARM SIMD is not yet tested or usable.
-
- #include
-
- static WARN_UNUSED_RESULT inline int save_vector_registers_arm(void)
- {
- preempt_disable();
- if (! may_use_simd()) {
- preempt_enable();
- return BAD_STATE_E;
- } else {
- fpsimd_preserve_current_state();
- return 0;
- }
- }
- static inline void restore_vector_registers_arm(void)
- {
- fpsimd_restore_current_state();
- preempt_enable();
- }
-
- #ifndef SAVE_VECTOR_REGISTERS
- #define SAVE_VECTOR_REGISTERS(fail_clause) { int _svr_ret = save_vector_registers_arm(); if (_svr_ret != 0) { fail_clause } }
- #endif
- #ifndef SAVE_VECTOR_REGISTERS2
- #define SAVE_VECTOR_REGISTERS2() save_vector_registers_arm()
- #endif
- #ifndef CAN_SAVE_VECTOR_REGISTERS
- #define CAN_SAVE_VECTOR_REGISTERS() can_save_vector_registers_arm()
- #endif
- #ifndef RESTORE_VECTOR_REGISTERS
- #define RESTORE_VECTOR_REGISTERS() restore_vector_registers_arm()
- #endif
-
#elif defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS)
#error WOLFSSL_USE_SAVE_VECTOR_REGISTERS is set for an unimplemented architecture.
#endif /* WOLFSSL_USE_SAVE_VECTOR_REGISTERS */
@@ -1008,6 +980,20 @@
extern int memcmp(const void *s1, const void *s2, size_t n);
#endif
+#ifdef CONFIG_X86_32
+ /* arch/x86/include/asm/string_32.h #defines memcpy/memcmp/memset as
+ * __builtin_* macros (x86_64's string_64.h uses plain functions, so this
+ * does not arise on K2). Left active they expand inside the PIE
+ * redirect-table member declarations below ("typeof(memcmp) *memcmp;" ->
+ * "... *__builtin_memcmp;"), dropping those members and breaking the
+ * WC_PIE_INDIRECT_SYM(memcmp) lookups. #undef before the struct;
+ * string_32.h still declares the functions so typeof() and the canonical
+ * names resolve. Mirrors the CONFIG_MIPS handling just above. */
+ #undef memcpy
+ #undef memcmp
+ #undef memset
+#endif
+
struct wolfssl_linuxkm_pie_redirect_table {
#ifdef HAVE_FIPS
typeof(wc_linuxkm_normalize_relocations) *wc_linuxkm_normalize_relocations;
@@ -1144,13 +1130,13 @@
#ifdef WOLFSSL_USE_SAVE_VECTOR_REGISTERS
- #ifdef CONFIG_X86
+ #if defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64)
typeof(allocate_wolfcrypt_linuxkm_fpu_states) *allocate_wolfcrypt_linuxkm_fpu_states;
typeof(wc_can_save_vector_registers_x86) *wc_can_save_vector_registers_x86;
typeof(free_wolfcrypt_linuxkm_fpu_states) *free_wolfcrypt_linuxkm_fpu_states;
typeof(wc_restore_vector_registers_x86) *wc_restore_vector_registers_x86;
typeof(wc_save_vector_registers_x86) *wc_save_vector_registers_x86;
- #else /* !CONFIG_X86 */
+ #else
#error WOLFSSL_USE_SAVE_VECTOR_REGISTERS is set for an unimplemented architecture.
#endif /* arch */
@@ -1501,7 +1487,8 @@
#undef get_current
#define get_current WC_PIE_INDIRECT_SYM(get_current)
- #if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && defined(CONFIG_X86)
+ #if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && \
+ (defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64))
#define allocate_wolfcrypt_linuxkm_fpu_states WC_PIE_INDIRECT_SYM(allocate_wolfcrypt_linuxkm_fpu_states)
#define wc_can_save_vector_registers_x86 WC_PIE_INDIRECT_SYM(wc_can_save_vector_registers_x86)
#define free_wolfcrypt_linuxkm_fpu_states WC_PIE_INDIRECT_SYM(free_wolfcrypt_linuxkm_fpu_states)
@@ -1831,7 +1818,7 @@
#if !defined(BUILDING_WOLFSSL)
/* some caller code needs these. */
#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS)
- #if defined(CONFIG_X86)
+ #if defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64)
WOLFSSL_API __must_check int wc_can_save_vector_registers_x86(void);
WOLFSSL_API __must_check int wc_save_vector_registers_x86(enum wc_svr_flags flags);
WOLFSSL_API void wc_restore_vector_registers_x86(enum wc_svr_flags flags);
@@ -1841,9 +1828,9 @@
#ifndef REENABLE_VECTOR_REGISTERS
#define REENABLE_VECTOR_REGISTERS() wc_restore_vector_registers_x86(WC_SVR_FLAG_INHIBIT)
#endif
- #else /* !CONFIG_X86 */
+ #else
#error WOLFSSL_USE_SAVE_VECTOR_REGISTERS is set for an unimplemented architecture.
- #endif /* !CONFIG_X86 */
+ #endif
#endif /* WOLFSSL_USE_SAVE_VECTOR_REGISTERS */
#ifdef WC_LINUXKM_USE_HEAP_WRAPPERS
WOLFSSL_API extern void *wc_linuxkm_malloc(size_t size);
diff --git a/linuxkm/module_hooks.c b/linuxkm/module_hooks.c
index 55fc70e0a7..1ed332be51 100644
--- a/linuxkm/module_hooks.c
+++ b/linuxkm/module_hooks.c
@@ -527,7 +527,9 @@ int wc_linuxkm_GenerateSeed_IntelRD(struct OS_Seed* os, byte* output, word32 sz)
#endif /* WC_LINUXKM_RDSEED_IN_GLUE_LAYER */
-#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && defined(CONFIG_X86)
+#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && \
+ (defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64))
+ /* arch-generic save/restore tracker (kernel_fpu_* on x86, kernel_neon_* on ARM) */
#include "linuxkm/x86_vector_register_glue.c"
#endif
@@ -1518,7 +1520,8 @@ static int set_up_wolfssl_linuxkm_pie_redirect_table(void) {
wolfssl_linuxkm_pie_redirect_table.get_current = my_get_current_thread;
-#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && defined(CONFIG_X86)
+#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && \
+ (defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64))
wolfssl_linuxkm_pie_redirect_table.allocate_wolfcrypt_linuxkm_fpu_states = allocate_wolfcrypt_linuxkm_fpu_states;
wolfssl_linuxkm_pie_redirect_table.wc_can_save_vector_registers_x86 = wc_can_save_vector_registers_x86;
wolfssl_linuxkm_pie_redirect_table.free_wolfcrypt_linuxkm_fpu_states = free_wolfcrypt_linuxkm_fpu_states;
@@ -2047,7 +2050,10 @@ static ssize_t FIPS_optest_trig_handler(struct kobject *kobj, struct kobj_attrib
int ret;
int argc;
const char *argv[3];
- char code_buf[5];
+ /* Textual sysfs error code + NUL, plus headroom. Fits the v7.0.0 5-char
+ * codes (-1015 ML_KEM_PCT_E, -1016 ML_DSA_PCT_E, -1017
+ * DRBG_SHA512_KAT_FIPS_E) that the old [5] rejected via the guard below. */
+ char code_buf[8];
size_t corrected_count;
int i;
@@ -2063,7 +2069,7 @@ static ssize_t FIPS_optest_trig_handler(struct kobject *kobj, struct kobj_attrib
corrected_count = count - 1;
else
corrected_count = count;
- if ((corrected_count < 1) || (corrected_count > 4))
+ if ((corrected_count < 1) || (corrected_count > (sizeof(code_buf) - 1)))
return -EINVAL;
XMEMCPY(code_buf, buf, corrected_count);
code_buf[corrected_count] = 0;
diff --git a/linuxkm/pie_redirect_table.c b/linuxkm/pie_redirect_table.c
index 03be2e04fa..1211c7f9d6 100644
--- a/linuxkm/pie_redirect_table.c
+++ b/linuxkm/pie_redirect_table.c
@@ -53,8 +53,15 @@ const struct wolfssl_linuxkm_pie_redirect_table
return &wolfssl_linuxkm_pie_redirect_table;
}
-/* placeholder implementations for missing functions. */
-#if defined(CONFIG_MIPS)
+/* placeholder implementations for missing functions.
+ *
+ * ARM/ARM64 need these like MIPS: --enable-armasm omits -mgeneral-regs-only,
+ * so gcc auto-emits raw memcpy/memset libcalls for aggregate copies in the
+ * PIE FIPS container. WC_PIE_INDIRECT_SYM only redirects source-level
+ * XMEMCPY/XMEMSET, not compiler-emitted libcalls, and the in-core integrity
+ * check forbids ANY undefined symbol, so define them here. (The pure-C C1
+ * build does not auto-vectorize and never references these.) */
+#if defined(CONFIG_MIPS) || defined(CONFIG_ARM) || defined(CONFIG_ARM64)
#undef memcpy
void *memcpy(void *dest, const void *src, size_t n) {
char *dest_i = (char *)dest;
@@ -74,3 +81,68 @@ const struct wolfssl_linuxkm_pie_redirect_table
return dest;
}
#endif
+
+#if defined(CONFIG_ARM)
+ /* 32-bit ARM's baseline ISA has no integer-divide, so gcc emits these EABI
+ * helpers for '/' and '%'. The kernel exports them
+ * (arch/arm/lib/lib1funcs.S), but the self-contained PIE FIPS container may
+ * not reference external symbols (in-core integrity forbids ANY undefined
+ * symbol), so provide them here. Restoring (bit-at-a-time) division --
+ * correctness over speed; crypto-path divisions are on small
+ * sizes/indices. Per the EABI, __aeabi_*idivmod return a little-endian
+ * 64-bit value: quotient in r0 (low word), remainder in r1 (high word). */
+ unsigned int __aeabi_uidiv(unsigned int n, unsigned int d);
+ unsigned int __aeabi_uidiv(unsigned int n, unsigned int d) {
+ unsigned int q = 0, r = 0;
+ int i;
+ if (d == 0)
+ return ~0u;
+ for (i = 31; i >= 0; i--) {
+ r = (r << 1) | ((n >> i) & 1u);
+ if (r >= d) {
+ r -= d;
+ q |= (1u << i);
+ }
+ }
+ return q;
+ }
+
+ unsigned long long __aeabi_uidivmod(unsigned int n, unsigned int d);
+ unsigned long long __aeabi_uidivmod(unsigned int n, unsigned int d) {
+ unsigned int q = 0, r = 0;
+ int i;
+ if (d == 0)
+ return (unsigned long long)n << 32; /* quot=0, rem=n */
+ for (i = 31; i >= 0; i--) {
+ r = (r << 1) | ((n >> i) & 1u);
+ if (r >= d) {
+ r -= d;
+ q |= (1u << i);
+ }
+ }
+ return ((unsigned long long)r << 32) | q;
+ }
+
+ int __aeabi_idiv(int n, int d);
+ int __aeabi_idiv(int n, int d) {
+ int neg = (n < 0) ^ (d < 0);
+ unsigned int un = (n < 0) ? (unsigned int)(-(long)n) : (unsigned int)n;
+ unsigned int ud = (d < 0) ? (unsigned int)(-(long)d) : (unsigned int)d;
+ unsigned int uq = __aeabi_uidiv(un, ud);
+ return neg ? -(int)uq : (int)uq;
+ }
+
+ unsigned long long __aeabi_idivmod(int n, int d);
+ unsigned long long __aeabi_idivmod(int n, int d) {
+ int nneg = (n < 0);
+ int qneg = (n < 0) ^ (d < 0);
+ unsigned int un = nneg ? (unsigned int)(-(long)n) : (unsigned int)n;
+ unsigned int ud = (d < 0) ? (unsigned int)(-(long)d) : (unsigned int)d;
+ unsigned long long um = __aeabi_uidivmod(un, ud);
+ unsigned int uq = (unsigned int)um;
+ unsigned int ur = (unsigned int)(um >> 32);
+ int q = qneg ? -(int)uq : (int)uq;
+ int r = nneg ? -(int)ur : (int)ur;
+ return ((unsigned long long)(unsigned int)r << 32) | (unsigned int)q;
+ }
+#endif /* CONFIG_ARM */
diff --git a/linuxkm/x86_vector_register_glue.c b/linuxkm/x86_vector_register_glue.c
index e33c3d719e..51f508e1d2 100644
--- a/linuxkm/x86_vector_register_glue.c
+++ b/linuxkm/x86_vector_register_glue.c
@@ -23,8 +23,28 @@
/* included by linuxkm/module_hooks.c */
#ifndef WC_SKIP_INCLUDED_C_FILES
-#if !defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) || !defined(CONFIG_X86)
- #error x86_vector_register_glue.c included in non-vectorized/non-x86 project.
+#if !defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) || \
+ !(defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64))
+ #error vector register glue included in non-vectorized or unsupported-arch project.
+#endif
+
+/* The per-CPU tracker below is arch-neutral except for the call that
+ * claims/releases the SIMD/FP unit:
+ * x86 -> kernel_fpu_begin()/kernel_fpu_end() ( via
+ * in linuxkm_wc_port.h)
+ * ARM/ARM64 -> kernel_neon_begin()/kernel_neon_end() ()
+ * Both obey the same context rules the tracker enforces (may_use_simd(),
+ * hard-IRQ/NMI rejection, preempt/bh/migration disable). The wc_*_x86 names
+ * are kept: this glue lives OUTSIDE the FIPS module boundary (reached only via
+ * the PIE redirect table), so the validated x86 symbol set stays byte-for-byte
+ * unchanged. */
+#if defined(CONFIG_X86)
+ #define WC_LINUXKM_FPU_BEGIN() kernel_fpu_begin()
+ #define WC_LINUXKM_FPU_END() kernel_fpu_end()
+#elif defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+ #include
+ #define WC_LINUXKM_FPU_BEGIN() kernel_neon_begin()
+ #define WC_LINUXKM_FPU_END() kernel_neon_end()
#endif
#ifdef WOLFSSL_LINUXKM_VERBOSE_DEBUG
@@ -70,9 +90,11 @@ WARN_UNUSED_RESULT int allocate_wolfcrypt_linuxkm_fpu_states(void)
wc_linuxkm_fpu_states_n_tracked * sizeof(wc_linuxkm_fpu_states[0]));
if (! wc_linuxkm_fpu_states) {
+ /* cast to unsigned long to match %lu: size_t is 32-bit on arm32 but
+ * 64-bit on x86_64/arm64, so the product type is arch-dependent. */
pr_err("ERROR: allocation of %lu bytes for "
"wc_linuxkm_fpu_states failed.\n",
- nr_cpu_ids * sizeof(wc_linuxkm_fpu_states[0]));
+ (unsigned long)(nr_cpu_ids * sizeof(wc_linuxkm_fpu_states[0])));
return MEMORY_E;
}
@@ -454,10 +476,10 @@ WARN_UNUSED_RESULT int wc_save_vector_registers_x86(enum wc_svr_flags flags)
#if IS_ENABLED(CONFIG_PREEMPT_RT)
preempt_disable();
#endif
- kernel_fpu_begin();
+ WC_LINUXKM_FPU_BEGIN();
pstate = wc_linuxkm_fpu_state_assoc(1, 1);
if (pstate == NULL) {
- kernel_fpu_end();
+ WC_LINUXKM_FPU_END();
#if IS_ENABLED(CONFIG_PREEMPT_RT)
preempt_enable();
#endif
@@ -521,7 +543,7 @@ void wc_restore_vector_registers_x86(enum wc_svr_flags flags)
if (pstate->fpu_state == 0U) {
wc_linuxkm_fpu_state_release(pstate);
- kernel_fpu_end();
+ WC_LINUXKM_FPU_END();
#if IS_ENABLED(CONFIG_PREEMPT_RT)
preempt_enable();
#endif
diff --git a/src/include.am b/src/include.am
index 632feb67c1..5b44bc34f8 100644
--- a/src/include.am
+++ b/src/include.am
@@ -111,6 +111,9 @@ if BUILD_AESNI
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_asm.S
if BUILD_X86_ASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S
+if BUILD_AESXTS
+src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_xts_x86_asm.S
+endif
else
if BUILD_AESGCM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S
@@ -273,6 +276,9 @@ if BUILD_AESNI
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_asm.S
if BUILD_X86_ASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S
+if BUILD_AESXTS
+src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_xts_x86_asm.S
+endif
else
if BUILD_AESGCM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S
@@ -604,6 +610,9 @@ if BUILD_AESNI
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_asm.S
if BUILD_X86_ASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S
+if BUILD_AESXTS
+src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_xts_x86_asm.S
+endif
else
if BUILD_AESGCM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S
@@ -986,6 +995,9 @@ src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_asm.S
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_x86_64_asm.S
if BUILD_X86_ASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S
+if BUILD_AESXTS
+src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_xts_x86_asm.S
+endif
else
if BUILD_AESGCM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S
@@ -1932,6 +1944,9 @@ src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_asm.S
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_x86_64_asm.S
if BUILD_X86_ASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S
+if BUILD_AESXTS
+src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_xts_x86_asm.S
+endif
else
if BUILD_AESGCM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S
diff --git a/tests/api/test_aes.c b/tests/api/test_aes.c
index 72221cd04a..c55630b828 100644
--- a/tests/api/test_aes.c
+++ b/tests/api/test_aes.c
@@ -693,7 +693,12 @@ static int test_wc_AesCbcEncryptDecrypt_WithKey(Aes* aes, byte* key,
ExpectIntEQ(wc_AesCbcEncrypt(aes, cipher, vector, vector_len),
0);
ExpectBufEQ(cipher, vector_enc, vector_len);
-#ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS
+ /* BAD_LENGTH_E enforcement lives behind WOLFSSL_AES_CBC_LENGTH_CHECKS in
+ * non-FIPS aes.c. FIPSv2 (cert3389) uses wc_AesCbcEncrypt_fips, which
+ * predates the check and returns 0 on unaligned input; only v5.x+ carry
+ * the wrapper-level check. Skip the assertion for FIPSv2. */
+#if defined(WOLFSSL_AES_CBC_LENGTH_CHECKS) && \
+ (!defined(HAVE_FIPS) || FIPS_VERSION_GE(5,0))
ExpectIntEQ(wc_AesCbcEncrypt(aes, cipher, vector, vector_len - 1),
WC_NO_ERR_TRACE(BAD_LENGTH_E));
#endif
@@ -703,7 +708,9 @@ static int test_wc_AesCbcEncryptDecrypt_WithKey(Aes* aes, byte* key,
ExpectIntEQ(wc_AesCbcDecrypt(aes, decrypted, cipher,
WC_AES_BLOCK_SIZE * 2), 0);
ExpectBufEQ(decrypted, vector, vector_len);
-#ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS
+#if defined(WOLFSSL_AES_CBC_LENGTH_CHECKS) && \
+ (!defined(HAVE_FIPS) || FIPS_VERSION_GE(5,0))
+ /* Same FIPSv2 vs v5+ rationale as the encrypt assertion above. */
ExpectIntEQ(wc_AesCbcDecrypt(aes, decrypted, cipher,
WC_AES_BLOCK_SIZE * 2 - 1), WC_NO_ERR_TRACE(BAD_LENGTH_E));
#else
diff --git a/tests/api/test_evp_pkey.c b/tests/api/test_evp_pkey.c
index 9bdd5b9339..2e106d16d6 100644
--- a/tests/api/test_evp_pkey.c
+++ b/tests/api/test_evp_pkey.c
@@ -1526,7 +1526,7 @@ static int test_wolfSSL_EVP_PKEY_sign_verify(int keyType)
!defined(HAVE_SELFTEST)
#if !defined(HAVE_FIPS) || (defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION>2))
{
- ExpectNotNull(rsa = RSA_generate_key(2048, 3, NULL, NULL));
+ ExpectNotNull(rsa = RSA_generate_key(2048, 65537, NULL, NULL));
ExpectIntEQ(EVP_PKEY_assign_RSA(pkey, rsa), WOLFSSL_SUCCESS);
}
#endif
@@ -2159,7 +2159,7 @@ int test_wolfSSL_EVP_PKEY_encrypt(void)
XMEMSET(outDec, 0, rsaKeySz);
}
- ExpectNotNull(rsa = RSA_generate_key(2048, 3, NULL, NULL));
+ ExpectNotNull(rsa = RSA_generate_key(2048, 65537, NULL, NULL));
ExpectNotNull(pkey = wolfSSL_EVP_PKEY_new());
ExpectIntEQ(EVP_PKEY_assign_RSA(pkey, rsa), WOLFSSL_SUCCESS);
if (EXPECT_FAIL()) {
diff --git a/tests/api/test_ossl_rsa.c b/tests/api/test_ossl_rsa.c
index dc0cee665b..250d1df007 100644
--- a/tests/api/test_ossl_rsa.c
+++ b/tests/api/test_ossl_rsa.c
@@ -65,7 +65,7 @@ int test_wolfSSL_RSA(void)
RSA_free(rsa);
rsa = NULL;
- ExpectNotNull(rsa = RSA_generate_key(2048, 3, NULL, NULL));
+ ExpectNotNull(rsa = RSA_generate_key(2048, 65537, NULL, NULL));
ExpectIntEQ(RSA_size(rsa), 256);
#if (!defined(HAVE_FIPS) || FIPS_VERSION3_GT(6,0,0)) && !defined(HAVE_SELFTEST)
@@ -306,7 +306,7 @@ int test_wolfSSL_RSA(void)
rsa = NULL;
#if !defined(USE_FAST_MATH) || (FP_MAX_BITS >= (3072*2))
- ExpectNotNull(rsa = RSA_generate_key(3072, 17, NULL, NULL));
+ ExpectNotNull(rsa = RSA_generate_key(3072, 65537, NULL, NULL));
ExpectIntEQ(RSA_size(rsa), 384);
ExpectIntEQ(RSA_bits(rsa), 3072);
RSA_free(rsa);
@@ -461,7 +461,7 @@ int test_wolfSSL_RSA_print(void)
RSA_free(rsa);
rsa = NULL;
- ExpectNotNull(rsa = RSA_generate_key(2048, 3, NULL, NULL));
+ ExpectNotNull(rsa = RSA_generate_key(2048, 65537, NULL, NULL));
ExpectIntEQ(RSA_print(bio, rsa, 0), 1);
ExpectIntEQ(RSA_print(bio, rsa, 4), 1);
@@ -644,11 +644,11 @@ int test_wolfSSL_RSA_meth(void)
RSA_METHOD *rsa_meth = NULL;
#ifdef WOLFSSL_KEY_GEN
- ExpectNotNull(rsa = RSA_generate_key(2048, 3, NULL, NULL));
+ ExpectNotNull(rsa = RSA_generate_key(2048, 65537, NULL, NULL));
RSA_free(rsa);
rsa = NULL;
#else
- ExpectNull(rsa = RSA_generate_key(2048, 3, NULL, NULL));
+ ExpectNull(rsa = RSA_generate_key(2048, 65537, NULL, NULL));
#endif
ExpectNotNull(RSA_get_default_method());
diff --git a/tests/api/test_slhdsa.c b/tests/api/test_slhdsa.c
index 988bbc579e..922fc686da 100644
--- a/tests/api/test_slhdsa.c
+++ b/tests/api/test_slhdsa.c
@@ -1081,12 +1081,14 @@ int test_wc_slhdsa_sign_hash(void)
WC_HASH_TYPE_SHA256, sig, sigLen),
WC_NO_ERR_TRACE(BAD_LENGTH_E));
- /* Unsupported hashType (FIPS 205 doesn't list WC_HASH_TYPE_NONE) hits
- * the default branch of slhdsakey_validate_prehash. */
+ /* WC_HASH_TYPE_NONE (pure SLH-DSA sentinel) is never a valid pre-hash
+ * (FIPS 205 Section 10.2.2 / Table 9), so HashSLH-DSA signing rejects it
+ * with an explicit early check (BAD_FUNC_ARG), not via the
+ * slhdsa_check_hash_for_n() switch default. */
sigLen = WC_SLHDSA_MAX_SIG_LEN;
ExpectIntEQ(wc_SlhDsaKey_SignHash(&key, ctx, sizeof(ctx), hash, 32,
WC_HASH_TYPE_NONE, sig, &sigLen, &rng),
- WC_NO_ERR_TRACE(NOT_COMPILED_IN));
+ WC_NO_ERR_TRACE(BAD_FUNC_ARG));
/* Test SignHash with SHA-256. */
sigLen = WC_SLHDSA_MAX_SIG_LEN;
diff --git a/wolfcrypt/benchmark/fips_cast_bench.c b/wolfcrypt/benchmark/fips_cast_bench.c
new file mode 100644
index 0000000000..19b0d7c1bf
--- /dev/null
+++ b/wolfcrypt/benchmark/fips_cast_bench.c
@@ -0,0 +1,354 @@
+/* fips_cast_bench.c
+ *
+ * Copyright (C) 2006-2026 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* FIPS CAST benchmark.
+ *
+ * Measures the wall-clock cost of each Conditional Algorithm Self-Test (CAST)
+ * in the wolfCrypt v7.0.0 FIPS module, so operators can budget power-on
+ * latency on constrained OEs (DSP, MCU) where each CAST is a boot-time delay.
+ *
+ * Compiled only under HAVE_FIPS (include.am BUILD_FIPS gate). Runs
+ * wc_RunCast_fips(id) per CAST, reporting mean/stddev/min/max plus the total
+ * for one wc_RunAllCast_fips() pass (the cost callers pay at app start).
+ *
+ * Citations:
+ * FIPS 140-3 sec 7.10 (Self-Tests) - CAST framework
+ * FIPS 140-3 IG 10.3.A - Algorithm-by-algorithm CAST coverage
+ * ISO/IEC 19790:2012 sec 7.10.2 - Conditional self-test execution
+ */
+
+#ifdef HAVE_CONFIG_H
+ #include
+#endif
+
+#if !defined(WOLFSSL_USER_SETTINGS) && !defined(WOLFSSL_NO_OPTIONS_H)
+ #include
+#endif
+#include /* also picks up user_settings.h */
+
+/* wc_RunCast_fips() / wc_RunAllCast_fips() are v7.0.0-only; older 140-3
+ * modules (v5.x, v6.0.0) and FIPSv2 do not export them, so an older-flavor
+ * fips/ tree swapped in by fips-check.sh would fail to link. Gate on
+ * FIPS_VERSION3_GE(7,0,0); older flavors use the empty-main stub below so the
+ * build still produces an executable. */
+#if defined(HAVE_FIPS) && FIPS_VERSION3_GE(7,0,0)
+
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+#ifdef _WIN32
+ #define WIN32_LEAN_AND_MEAN
+ #include
+#else
+ #include
+#endif
+
+
+#define BENCH_DEFAULT_ITERS 10
+
+/* Map FIPS_CAST_* enum value to a printable name. Kept in sync with
+ * wolfssl/wolfcrypt/fips_test.h FipsCastId enum. */
+static const char* cast_name(int id)
+{
+ switch (id) {
+ case FIPS_CAST_AES_CBC: return "AES-CBC";
+ case FIPS_CAST_AES_GCM: return "AES-GCM";
+ case FIPS_CAST_HMAC_SHA1: return "HMAC-SHA-1";
+ case FIPS_CAST_HMAC_SHA2_256: return "HMAC-SHA2-256";
+ case FIPS_CAST_HMAC_SHA2_512: return "HMAC-SHA2-512";
+ case FIPS_CAST_HMAC_SHA3_256: return "HMAC-SHA3-256";
+ case FIPS_CAST_DRBG: return "DRBG (SHA-256)";
+ case FIPS_CAST_RSA_SIGN_PKCS1v15: return "RSA-SIGN-PKCS1v15";
+ case FIPS_CAST_ECC_CDH: return "ECC-CDH";
+ case FIPS_CAST_ECC_PRIMITIVE_Z: return "ECC-Primitive-Z";
+ case FIPS_CAST_DH_PRIMITIVE_Z: return "DH-Primitive-Z";
+ case FIPS_CAST_ECDSA: return "ECDSA";
+ case FIPS_CAST_KDF_TLS12: return "KDF-TLS12";
+ case FIPS_CAST_KDF_TLS13: return "KDF-TLS13";
+ case FIPS_CAST_KDF_SSH: return "KDF-SSH";
+#if defined(FIPS_VERSION_GE) && FIPS_VERSION_GE(6,0)
+ case FIPS_CAST_KDF_SRTP: return "KDF-SRTP";
+ case FIPS_CAST_ED25519: return "Ed25519";
+ case FIPS_CAST_ED448: return "Ed448";
+ case FIPS_CAST_PBKDF2: return "PBKDF2";
+#endif
+#if defined(FIPS_VERSION_GE) && FIPS_VERSION_GE(7,0)
+ case FIPS_CAST_AES_ECB: return "AES-ECB";
+ case FIPS_CAST_ML_KEM: return "ML-KEM";
+ case FIPS_CAST_ML_DSA: return "ML-DSA";
+ case FIPS_CAST_LMS: return "LMS";
+ case FIPS_CAST_XMSS: return "XMSS";
+ case FIPS_CAST_DRBG_SHA512: return "DRBG (SHA-512)";
+ case FIPS_CAST_SLH_DSA: return "SLH-DSA";
+ case FIPS_CAST_AES_CMAC: return "AES-CMAC";
+ case FIPS_CAST_SHAKE: return "SHAKE";
+ case FIPS_CAST_AES_KW: return "AES-KW";
+#endif
+ default: return "(unknown)";
+ }
+}
+
+
+/* Monotonic clock in nanoseconds. POSIX clock_gettime(CLOCK_MONOTONIC) on
+ * Unix-like systems; QueryPerformanceCounter on Windows. */
+static long long now_ns(void)
+{
+#ifdef _WIN32
+ static LARGE_INTEGER freq = { 0 };
+ LARGE_INTEGER count;
+ if (freq.QuadPart == 0)
+ QueryPerformanceFrequency(&freq);
+ QueryPerformanceCounter(&count);
+ /* Multiply before divide to keep precision; freq is typically 10MHz. */
+ return (long long)((count.QuadPart * 1000000000LL) / freq.QuadPart);
+#else
+ struct timespec ts;
+ if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0)
+ return 0;
+ return (long long)ts.tv_sec * 1000000000LL + (long long)ts.tv_nsec;
+#endif
+}
+
+
+/* Run a single CAST iters times, populate stats (in milliseconds).
+ * Returns 0 on success, non-zero on first CAST failure. */
+static int run_one_cast(int id, int iters,
+ double* out_mean_ms, double* out_stddev_ms,
+ double* out_min_ms, double* out_max_ms)
+{
+ int i;
+ long long total = 0;
+ long long mn = LLONG_MAX;
+ long long mx = 0;
+ long long* samples;
+ double mean_ns;
+ double variance_acc = 0.0;
+
+ if (iters <= 0)
+ return BAD_FUNC_ARG;
+
+ samples = (long long*)XMALLOC((size_t)iters * sizeof(long long), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (samples == NULL)
+ return MEMORY_E;
+
+ for (i = 0; i < iters; i++) {
+ long long t0, t1, dt;
+ int rc;
+
+ t0 = now_ns();
+ rc = wc_RunCast_fips(id);
+ t1 = now_ns();
+ if (rc != 0) {
+ XFREE(samples, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ return rc;
+ }
+ dt = t1 - t0;
+ if (dt < 0)
+ dt = 0;
+ samples[i] = dt;
+ total += dt;
+ if (dt < mn)
+ mn = dt;
+ if (dt > mx)
+ mx = dt;
+ }
+
+ mean_ns = (double)total / (double)iters;
+ for (i = 0; i < iters; i++) {
+ double d = (double)samples[i] - mean_ns;
+ variance_acc += d * d;
+ }
+ XFREE(samples, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+
+ *out_mean_ms = mean_ns / 1.0e6;
+ *out_stddev_ms = sqrt(variance_acc / (double)iters) / 1.0e6;
+ *out_min_ms = (double)mn / 1.0e6;
+ *out_max_ms = (double)mx / 1.0e6;
+ return 0;
+}
+
+
+static void usage(const char* prog)
+{
+ printf("usage: %s [-i ITERS] [-c CAST_ID] [-l]\n", prog);
+ printf(" -i ITERS iterations per CAST (default %d)\n",
+ BENCH_DEFAULT_ITERS);
+ printf(" -c CAST_ID benchmark only the named CAST id\n");
+ printf(" -l list CAST ids and names; do not run\n");
+ printf(" -h show this help\n");
+}
+
+
+int main(int argc, char** argv)
+{
+ int iters = BENCH_DEFAULT_ITERS;
+ int single = -1;
+ int list_only = 0;
+ int i;
+ int first, last;
+ int failures = 0;
+ int run_count = 0;
+ double total_mean_ms = 0.0;
+
+ for (i = 1; i < argc; i++) {
+ if (XSTRCMP(argv[i], "-i") == 0 && i + 1 < argc) {
+ iters = atoi(argv[++i]);
+ if (iters <= 0) {
+ fprintf(stderr, "-i requires a positive iteration count\n");
+ return 2;
+ }
+ } else if (XSTRCMP(argv[i], "-c") == 0 && i + 1 < argc) {
+ single = atoi(argv[++i]);
+ } else if (XSTRCMP(argv[i], "-l") == 0) {
+ list_only = 1;
+ } else if (XSTRCMP(argv[i], "-h") == 0
+ || XSTRCMP(argv[i], "--help") == 0) {
+ usage(argv[0]);
+ return 0;
+ } else {
+ fprintf(stderr, "unknown argument: %s\n", argv[i]);
+ usage(argv[0]);
+ return 2;
+ }
+ }
+
+ if (list_only) {
+ printf("FIPS CAST IDs (FIPS_CAST_COUNT = %d):\n", FIPS_CAST_COUNT);
+ for (i = 0; i < FIPS_CAST_COUNT; i++)
+ printf(" %2d %s\n", i, cast_name(i));
+ return 0;
+ }
+
+ if (single >= 0 && single >= FIPS_CAST_COUNT) {
+ fprintf(stderr, "CAST id %d out of range (0..%d)\n",
+ single, FIPS_CAST_COUNT - 1);
+ return 2;
+ }
+
+ printf("wolfCrypt FIPS CAST benchmark\n");
+ printf("Library version: %s\n", LIBWOLFSSL_VERSION_STRING);
+ printf("FIPS_CAST_COUNT: %d\n", FIPS_CAST_COUNT);
+ printf("Iterations per CAST: %d\n", iters);
+ printf("Clock: %s\n",
+#ifdef _WIN32
+ "QueryPerformanceCounter"
+#else
+ "clock_gettime(CLOCK_MONOTONIC)"
+#endif
+ );
+ printf("\n");
+
+ /* Register the default DRBG seed callback (mirrors benchmark.c and
+ * wolfcrypt/test/test.c). Under WC_RNG_SEED_CB (set by the FIPS optest
+ * CFLAGS) the RNG needs a seed generator before _InitRng can build a
+ * working DRBG; without it, wc_InitRng in the ECC_PRIMITIVE_Z and ECDSA
+ * CASTs returns -199 (RNG_FAILURE_E) and dependent CASTs cascade-fail. */
+#ifdef WC_RNG_SEED_CB
+ {
+ int seed_cb_rc = wc_SetSeed_Cb(WC_GENERATE_SEED_DEFAULT);
+ if (seed_cb_rc != 0) {
+ fprintf(stderr,
+ "wc_SetSeed_Cb returned %d - DRBG-using CASTs will fail.\n",
+ seed_cb_rc);
+ }
+ }
+#endif
+
+ /* Prime every CAST once via wc_RunAllCast_fips() so each reaches
+ * FIPS_CAST_STATE_SUCCESS before measuring. This isolates per-CAST KAT
+ * runtime from the recursive-CAST init chain a cold CAST triggers when
+ * its KAT calls FIPS primitives whose own CASTs are still in INIT.
+ * Customers calling wc_RunAllCast_fips() at boot pay this once, so
+ * priming matches that real-world workflow. */
+ {
+ int prime_rc = wc_RunAllCast_fips();
+ if (prime_rc != 0) {
+ fprintf(stderr,
+ "wc_RunAllCast_fips() prime returned %d - some CASTs may have failed.\n"
+ "Per-CAST measurements continue but failed CASTs will report errors.\n\n",
+ prime_rc);
+ }
+ }
+
+ printf("ID | Name | Mean(ms) | StdDev(ms) | Min(ms) "
+ "| Max(ms)\n");
+ printf("---+---------------------+----------+------------+---------"
+ "+---------\n");
+
+ first = (single >= 0) ? single : 0;
+ last = (single >= 0) ? single + 1 : FIPS_CAST_COUNT;
+
+ for (i = first; i < last; i++) {
+ double mean_ms = 0, sd_ms = 0, mn_ms = 0, mx_ms = 0;
+ int rc = run_one_cast(i, iters, &mean_ms, &sd_ms, &mn_ms, &mx_ms);
+ if (rc != 0) {
+ printf("%2d | %-19s | FAILED rc=%d (%s)\n",
+ i, cast_name(i), rc, wc_GetErrorString(rc));
+ failures++;
+ continue;
+ }
+ printf("%2d | %-19s | %8.3f | %10.3f | %7.3f | %7.3f\n",
+ i, cast_name(i), mean_ms, sd_ms, mn_ms, mx_ms);
+ total_mean_ms += mean_ms;
+ run_count++;
+ }
+
+ printf("\n");
+ if (run_count > 0) {
+ printf("Sum of mean CAST times (one wc_RunAllCast_fips() pass): "
+ "%.3f ms\n", total_mean_ms);
+ }
+ if (failures > 0) {
+ printf("WARN: %d CAST(s) failed.\n", failures);
+ return 1;
+ }
+ return 0;
+}
+
+#else /* !(HAVE_FIPS && FIPS_VERSION3_GE(7,0,0)) */
+
+#include
+
+int main(void)
+{
+#ifndef HAVE_FIPS
+ fprintf(stderr,
+ "fips_cast_bench: built without HAVE_FIPS - nothing to measure\n");
+#else
+ fprintf(stderr,
+ "fips_cast_bench: requires v7.0.0+ FIPS module "
+ "(wc_RunCast_fips / wc_RunAllCast_fips were added in v7) - "
+ "nothing to measure on this older module flavor\n");
+#endif
+ return 0;
+}
+
+#endif /* HAVE_FIPS && FIPS_VERSION3_GE(7,0,0) */
diff --git a/wolfcrypt/benchmark/include.am b/wolfcrypt/benchmark/include.am
index 22cecbdaef..130343a14e 100644
--- a/wolfcrypt/benchmark/include.am
+++ b/wolfcrypt/benchmark/include.am
@@ -10,6 +10,16 @@ wolfcrypt_benchmark_benchmark_LDADD = src/libwolfssl@LIBSUFFIX@.la $(LIB_
wolfcrypt_benchmark_benchmark_DEPENDENCIES = src/libwolfssl@LIBSUFFIX@.la
noinst_HEADERS += wolfcrypt/benchmark/benchmark.h
+# FIPS CAST benchmark - measures wc_RunCast_fips() execution time per CAST.
+# Helps operators of resource-constrained operational environments budget
+# module power-on latency. Compiled only when FIPS is enabled.
+if BUILD_FIPS
+noinst_PROGRAMS += wolfcrypt/benchmark/fips_cast_bench
+wolfcrypt_benchmark_fips_cast_bench_SOURCES = wolfcrypt/benchmark/fips_cast_bench.c
+wolfcrypt_benchmark_fips_cast_bench_LDADD = src/libwolfssl@LIBSUFFIX@.la $(LIB_STATIC_ADD) -lm
+wolfcrypt_benchmark_fips_cast_bench_DEPENDENCIES = src/libwolfssl@LIBSUFFIX@.la
+endif
+
endif
endif
diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c
index 80b94efa14..11bb62be64 100644
--- a/wolfcrypt/src/aes.c
+++ b/wolfcrypt/src/aes.c
@@ -138,6 +138,15 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits
#include
+/* Dedicated GCM (PCLMUL/GHASH) asm is x86_64-only: the 32-bit aes_gcm_x86_asm.S
+ * is not position-independent and its .text relocations break the FIPS module
+ * in-core integrity in a shared object. 32-bit x86 GCM uses portable-C GHASH
+ * with AES-NI block encryption -- mirrors x86_64-gating of the GCM-AVX path.
+ * Kaleb 2026-06-17; see memory 32bit-x86-aesni-enablement. */
+#if defined(WOLFSSL_AESNI) && defined(WOLFSSL_X86_64_BUILD)
+ #define WC_AESNI_GCM
+#endif
+
#ifdef WOLF_CRYPTO_CB
#include
#endif
@@ -1161,6 +1170,122 @@ static void Check_CPU_support_HwCrypto(Aes* aes)
}
#endif /* __aarch64__ && !WOLFSSL_ARMASM_NO_HW_CRYPTO */
+/* In a Linux kernel module the 32-bit ARM AES asm (ARMv8 AArch32 AES/PMULL +
+ * NEON) MUST run between kernel_neon_begin()/end() or the first SIMD instruction
+ * faults "undefined instruction" (arm64 tolerates it, so wolfSSL never bracketed
+ * it). Wrap every AES_*_AARCH32 entry with SAVE/RESTORE_VECTOR_REGISTERS and
+ * #define-redirect the call sites below (defined first to avoid recursion).
+ * Scoped to !__aarch64__ so the aarch64 path is byte-identical. (FIPS 197 AES,
+ * SP 800-38D AES-GCM.) On save failure (process-context only) the op is
+ * skipped, not crashed -- never reached by POST/optest/harness. */
+#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && !defined(__aarch64__) && \
+ !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
+ static WC_INLINE void wc_svr_AES_set_key_AARCH32(const byte* userKey,
+ int keylen, byte* key, int dir) {
+ if (SAVE_VECTOR_REGISTERS2() != 0) return;
+ AES_set_key_AARCH32(userKey, keylen, key, dir);
+ RESTORE_VECTOR_REGISTERS();
+ }
+ static WC_INLINE void wc_svr_AES_encrypt_AARCH32(const byte* inBlock,
+ byte* outBlock, byte* key, int nr) {
+ if (SAVE_VECTOR_REGISTERS2() != 0) return;
+ AES_encrypt_AARCH32(inBlock, outBlock, key, nr);
+ RESTORE_VECTOR_REGISTERS();
+ }
+ static WC_INLINE void wc_svr_AES_decrypt_AARCH32(const byte* inBlock,
+ byte* outBlock, byte* key, int nr) {
+ if (SAVE_VECTOR_REGISTERS2() != 0) return;
+ AES_decrypt_AARCH32(inBlock, outBlock, key, nr);
+ RESTORE_VECTOR_REGISTERS();
+ }
+ static WC_INLINE void wc_svr_AES_encrypt_blocks_AARCH32(const byte* in,
+ byte* out, word32 sz, byte* key, int nr) {
+ if (SAVE_VECTOR_REGISTERS2() != 0) return;
+ AES_encrypt_blocks_AARCH32(in, out, sz, key, nr);
+ RESTORE_VECTOR_REGISTERS();
+ }
+ static WC_INLINE void wc_svr_AES_decrypt_blocks_AARCH32(const byte* in,
+ byte* out, word32 sz, byte* key, int nr) {
+ if (SAVE_VECTOR_REGISTERS2() != 0) return;
+ AES_decrypt_blocks_AARCH32(in, out, sz, key, nr);
+ RESTORE_VECTOR_REGISTERS();
+ }
+ static WC_INLINE void wc_svr_AES_CBC_encrypt_AARCH32(const byte* in,
+ byte* out, word32 sz, byte* reg, byte* key, int rounds) {
+ if (SAVE_VECTOR_REGISTERS2() != 0) return;
+ AES_CBC_encrypt_AARCH32(in, out, sz, reg, key, rounds);
+ RESTORE_VECTOR_REGISTERS();
+ }
+ static WC_INLINE void wc_svr_AES_CBC_decrypt_AARCH32(const byte* in,
+ byte* out, word32 sz, byte* reg, byte* key, int rounds) {
+ if (SAVE_VECTOR_REGISTERS2() != 0) return;
+ AES_CBC_decrypt_AARCH32(in, out, sz, reg, key, rounds);
+ RESTORE_VECTOR_REGISTERS();
+ }
+ static WC_INLINE void wc_svr_AES_CTR_encrypt_AARCH32(const byte* in,
+ byte* out, word32 sz, byte* reg, byte* key, byte* tmp, word32* left,
+ word32 rounds) {
+ if (SAVE_VECTOR_REGISTERS2() != 0) return;
+ AES_CTR_encrypt_AARCH32(in, out, sz, reg, key, tmp, left, rounds);
+ RESTORE_VECTOR_REGISTERS();
+ }
+ static WC_INLINE void wc_svr_AES_GCM_set_key_AARCH32(const byte* nonce,
+ const byte* key, byte* gcm_h, int nr) {
+ if (SAVE_VECTOR_REGISTERS2() != 0) return;
+ AES_GCM_set_key_AARCH32(nonce, key, gcm_h, nr);
+ RESTORE_VECTOR_REGISTERS();
+ }
+ static WC_INLINE void wc_svr_AES_GCM_encrypt_AARCH32(const byte* in,
+ byte* out, word32 sz, const byte* nonce, word32 nonceSz, byte* tag,
+ word32 tagSz, const byte* aad, word32 aadSz, byte* key, byte* gcm_h,
+ byte* tmp, byte* reg, int nr) {
+ if (SAVE_VECTOR_REGISTERS2() != 0) return;
+ AES_GCM_encrypt_AARCH32(in, out, sz, nonce, nonceSz, tag, tagSz, aad,
+ aadSz, key, gcm_h, tmp, reg, nr);
+ RESTORE_VECTOR_REGISTERS();
+ }
+ static WC_INLINE int wc_svr_AES_GCM_decrypt_AARCH32(const byte* in,
+ byte* out, word32 sz, const byte* nonce, word32 nonceSz, const byte* tag,
+ word32 tagSz, const byte* aad, word32 aadSz, byte* key, byte* gcm_h,
+ byte* tmp, byte* reg, int nr) {
+ int _ret, _svr = SAVE_VECTOR_REGISTERS2();
+ if (_svr != 0) return _svr;
+ _ret = AES_GCM_decrypt_AARCH32(in, out, sz, nonce, nonceSz, tag, tagSz,
+ aad, aadSz, key, gcm_h, tmp, reg, nr);
+ RESTORE_VECTOR_REGISTERS();
+ return _ret;
+ }
+ #define AES_set_key_AARCH32 wc_svr_AES_set_key_AARCH32
+ #define AES_encrypt_AARCH32 wc_svr_AES_encrypt_AARCH32
+ #define AES_decrypt_AARCH32 wc_svr_AES_decrypt_AARCH32
+ #define AES_encrypt_blocks_AARCH32 wc_svr_AES_encrypt_blocks_AARCH32
+ #define AES_decrypt_blocks_AARCH32 wc_svr_AES_decrypt_blocks_AARCH32
+ #define AES_CBC_encrypt_AARCH32 wc_svr_AES_CBC_encrypt_AARCH32
+ #define AES_CBC_decrypt_AARCH32 wc_svr_AES_CBC_decrypt_AARCH32
+ #define AES_CTR_encrypt_AARCH32 wc_svr_AES_CTR_encrypt_AARCH32
+ #define AES_GCM_set_key_AARCH32 wc_svr_AES_GCM_set_key_AARCH32
+ #define AES_GCM_encrypt_AARCH32 wc_svr_AES_GCM_encrypt_AARCH32
+ #define AES_GCM_decrypt_AARCH32 wc_svr_AES_GCM_decrypt_AARCH32
+ #ifdef WOLFSSL_AES_XTS
+ static WC_INLINE void wc_svr_AES_XTS_encrypt_AARCH32(const byte* in,
+ byte* out, word32 sz, const byte* i, byte* key, byte* key2, byte* tmp,
+ int nr) {
+ if (SAVE_VECTOR_REGISTERS2() != 0) return;
+ AES_XTS_encrypt_AARCH32(in, out, sz, i, key, key2, tmp, nr);
+ RESTORE_VECTOR_REGISTERS();
+ }
+ static WC_INLINE void wc_svr_AES_XTS_decrypt_AARCH32(const byte* in,
+ byte* out, word32 sz, const byte* i, byte* key, byte* key2, byte* tmp,
+ int nr) {
+ if (SAVE_VECTOR_REGISTERS2() != 0) return;
+ AES_XTS_decrypt_AARCH32(in, out, sz, i, key, key2, tmp, nr);
+ RESTORE_VECTOR_REGISTERS();
+ }
+ #define AES_XTS_encrypt_AARCH32 wc_svr_AES_XTS_encrypt_AARCH32
+ #define AES_XTS_decrypt_AARCH32 wc_svr_AES_XTS_decrypt_AARCH32
+ #endif /* WOLFSSL_AES_XTS */
+#endif /* WOLFSSL_USE_SAVE_VECTOR_REGISTERS && !__aarch64__ && !NO_HW_CRYPTO */
+
#if defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESCCM) || \
defined(WOLFSSL_AESGCM_STREAM)
static WARN_UNUSED_RESULT int wc_AesEncrypt(Aes* aes, const byte* inBlock,
@@ -4819,6 +4944,13 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt(Aes* aes, const byte* inBlock,
static int AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
const byte* iv, int dir)
{
+ /* Reject invalid AES key lengths early (FIPS 197: 128/192/256 only).
+ * wc_AesSetKeyDirect only bounds-checks keylen, so without this a
+ * zero/invalid keylen reaches here on 32-bit ARM armasm; the C path
+ * rejects it in wc_AesSetKeyLocal; check early and BAD_FUNC_ARG out. */
+ if (userKey == NULL || (keylen != 16 && keylen != 24 && keylen != 32)) {
+ return BAD_FUNC_ARG;
+ }
#if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB) || \
defined(WOLFSSL_AES_OFB) || defined(WOLFSSL_AES_XTS) || \
defined(WOLFSSL_AES_CTS)
@@ -8082,8 +8214,15 @@ static WC_INLINE void IncrementGcmCounter(byte* inOutCtr)
#endif
#endif /* !FREESCALE_LTC_AES_GCM */
+/* SP 800-38D AES-GCM software GHASH (FlattenSzInBits length block, RIGHTSHIFTX,
+ * GCM table GMULT/GHASH below). On 32-bit ARM --enable-armasm the HW GCM only
+ * accelerates the one-shot path; the streaming GHASH has no 32-bit asm (only
+ * __aarch64__ + PMULL), so it uses this software path. Compile this block when
+ * WOLFSSL_AESGCM_STREAM is set even for arm32 armasm, else GHASH_FINAL fallback
+ * GHASH_LEN_BLOCK references an undefined FlattenSzInBits (arm64 gets it via
+ * __aarch64__). */
#if !defined(WOLFSSL_ARMASM) || defined(__aarch64__) || \
- defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
+ defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) || defined(WOLFSSL_AESGCM_STREAM)
#if defined(GCM_SMALL) || defined(GCM_TABLE) || defined(GCM_TABLE_4BIT)
static WC_INLINE void FlattenSzInBits(byte* buf, word32 sz)
@@ -8254,7 +8393,7 @@ void GenerateM0(Gcm* gcm)
#endif
#endif
-#if defined(WOLFSSL_AESNI) && defined(GCM_TABLE_4BIT) && \
+#if defined(WC_AESNI_GCM) && defined(GCM_TABLE_4BIT) && \
defined(WC_C_DYNAMIC_FALLBACK)
void GCM_generate_m0_aesni(const unsigned char *h, unsigned char *m)
XASM_LINK("GCM_generate_m0_aesni");
@@ -8342,6 +8481,25 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
#if !defined(__aarch64__)
AES_GCM_set_key_AARCH32(iv, (byte*)aes->key, aes->gcm.H, aes->rounds);
+ #if defined(GCM_TABLE) || defined(GCM_TABLE_4BIT)
+ {
+ /* The 32-bit ARM asm GCM keeps gcm->H in the PMULL (bit-reflected)
+ * form for its one-shot GHASH (vmull.p64). The C streaming GHASH
+ * uses gcm->M0, which GenerateM0() builds from the *standard*
+ * H = E_K(0) order -- not derivable from the PMULL H. Recompute the
+ * standard H into gcm->H, build M0, then restore the PMULL H for the
+ * one-shot path; without this streaming AES-GCM mis-authenticates
+ * (AES_GCM_AUTH_E). SP 800-38D AES-GCM GHASH. */
+ ALIGN16 byte gcmStdH[WC_AES_BLOCK_SIZE];
+ ALIGN16 byte gcmZero[WC_AES_BLOCK_SIZE];
+ XMEMSET(gcmZero, 0, WC_AES_BLOCK_SIZE);
+ XMEMCPY(gcmStdH, aes->gcm.H, WC_AES_BLOCK_SIZE);
+ AES_encrypt_AARCH32(gcmZero, aes->gcm.H, (byte*)aes->key,
+ (int)aes->rounds);
+ GenerateM0(&aes->gcm);
+ XMEMCPY(aes->gcm.H, gcmStdH, WC_AES_BLOCK_SIZE);
+ }
+ #endif
#else
if (aes->use_aes_hw_crypto && aes->use_pmull_hw_crypto) {
AES_GCM_set_key_AARCH64(iv, (byte*)aes->key, aes->gcm.H,
@@ -8384,7 +8542,7 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
if (ret == 0) {
#if defined(GCM_TABLE) || defined(GCM_TABLE_4BIT)
- #if defined(WOLFSSL_AESNI) && defined(GCM_TABLE_4BIT)
+ #if defined(WC_AESNI_GCM) && defined(GCM_TABLE_4BIT)
if (aes->use_aesni) {
#if defined(WC_C_DYNAMIC_FALLBACK)
#ifdef HAVE_INTEL_AVX2
@@ -8445,7 +8603,8 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
}
-#ifdef WOLFSSL_AESNI
+
+#ifdef WC_AESNI_GCM
void AES_GCM_encrypt_aesni(const unsigned char *in, unsigned char *out,
const unsigned char* addt, const unsigned char* ivec,
@@ -8533,8 +8692,13 @@ void AES_GCM_decrypt_vaes(const unsigned char *in, unsigned char *out,
#endif /* WOLFSSL_AESNI */
+/* SP 800-38D software GHASH (GMULT / GHASH / GHASH_ONE_BLOCK_SW per table mode).
+ * As with FlattenSzInBits above, 32-bit ARM --enable-armasm has no asm streaming
+ * GHASH (only __aarch64__ + PMULL), so WOLFSSL_AESGCM_STREAM needs these symbols
+ * compiled even for arm32 armasm. Widen the guard (no effect on x86 / arm64,
+ * which already satisfy it -> their in-core hash is unchanged). */
#if !defined(WOLFSSL_ARMASM) || defined(__aarch64__) || \
- defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
+ defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) || defined(WOLFSSL_AESGCM_STREAM)
#if defined(GCM_SMALL)
static void GMULT(byte* X, byte* Y)
{
@@ -10905,7 +11069,7 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
ret = AES_GCM_encrypt_ASM(aes, out, in, sz, iv, ivSz, authTag, authTagSz,
authIn, authInSz);
#else
-#ifdef WOLFSSL_AESNI
+#ifdef WC_AESNI_GCM
if (aes->use_aesni) {
#ifdef HAVE_INTEL_AVX512
if ((sz >= WC_AES_BLOCK_SIZE * WC_VAES_GCM_MIN_BLOCKS) &&
@@ -11538,7 +11702,7 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
const byte* authIn, word32 authInSz)
{
int ret;
-#ifdef WOLFSSL_AESNI
+#ifdef WC_AESNI_GCM
int res = WC_NO_ERR_TRACE(AES_GCM_AUTH_E);
#endif
@@ -11687,7 +11851,7 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
authTagSz, authIn, authInSz);
}
#else
-#ifdef WOLFSSL_AESNI
+#ifdef WC_AESNI_GCM
if (aes->use_aesni) {
#ifdef HAVE_INTEL_AVX512
if ((sz >= WC_AES_BLOCK_SIZE * WC_VAES_GCM_MIN_BLOCKS) &&
@@ -11754,6 +11918,15 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
VECTOR_REGISTERS_POP;
+ /* FIPS 140-3 / SP 800-38D: on authentication failure, decrypted-but-
+ * unauthenticated plaintext in `out` must not be released to the caller.
+ * Wipe it so a caller that ignores the return value cannot observe plaintext
+ * from forged ciphertext. All paths (AES-NI, AVX1/2, ARM HW/NEON, C
+ * fallback) funnel through `ret` here, covering every sub-implementation. */
+ if (ret == WC_NO_ERR_TRACE(AES_GCM_AUTH_E) && out != NULL && sz > 0) {
+ ForceZero(out, sz);
+ }
+
return ret;
}
#endif
@@ -11922,7 +12095,7 @@ static WARN_UNUSED_RESULT int AesGcmFinal_C(
return 0;
}
-#ifdef WOLFSSL_AESNI
+#ifdef WC_AESNI_GCM
#ifdef __cplusplus
extern "C" {
@@ -13403,7 +13576,7 @@ int wc_AesGcmInit(Aes* aes, const byte* key, word32 len, const byte* iv,
if (iv != NULL) {
/* Initialize with the IV. */
- #ifdef WOLFSSL_AESNI
+ #ifdef WC_AESNI_GCM
if (aes->use_aesni) {
SAVE_VECTOR_REGISTERS(return _svr_ret;);
ret = AesGcmInit_aesni(aes, iv, ivSz);
@@ -13530,7 +13703,7 @@ int wc_AesGcmEncryptUpdate(Aes* aes, byte* out, const byte* in, word32 sz,
if (ret == 0) {
/* Encrypt with AAD and/or plaintext. */
- #ifdef WOLFSSL_AESNI
+ #ifdef WC_AESNI_GCM
if (aes->use_aesni) {
SAVE_VECTOR_REGISTERS(return _svr_ret;);
ret = AesGcmEncryptUpdate_aesni(aes, out, in, sz, authIn, authInSz);
@@ -13592,7 +13765,7 @@ int wc_AesGcmEncryptFinal(Aes* aes, byte* authTag, word32 authTagSz)
if (ret == 0) {
/* Calculate authentication tag. */
- #ifdef WOLFSSL_AESNI
+ #ifdef WC_AESNI_GCM
if (aes->use_aesni) {
SAVE_VECTOR_REGISTERS(return _svr_ret;);
ret = AesGcmEncryptFinal_aesni(aes, authTag, authTagSz);
@@ -13676,7 +13849,7 @@ int wc_AesGcmDecryptUpdate(Aes* aes, byte* out, const byte* in, word32 sz,
if (ret == 0) {
/* Decrypt with AAD and/or cipher text. */
- #ifdef WOLFSSL_AESNI
+ #ifdef WC_AESNI_GCM
if (aes->use_aesni) {
SAVE_VECTOR_REGISTERS(return _svr_ret;);
ret = AesGcmDecryptUpdate_aesni(aes, out, in, sz, authIn, authInSz);
@@ -13736,7 +13909,7 @@ int wc_AesGcmDecryptFinal(Aes* aes, const byte* authTag, word32 authTagSz)
if (ret == 0) {
/* Calculate authentication tag and compare with one passed in.. */
- #ifdef WOLFSSL_AESNI
+ #ifdef WC_AESNI_GCM
if (aes->use_aesni) {
SAVE_VECTOR_REGISTERS(return _svr_ret;);
ret = AesGcmDecryptFinal_aesni(aes, authTag, authTagSz);
@@ -13763,6 +13936,10 @@ int wc_AesGcmDecryptFinal(Aes* aes, const byte* authTag, word32 authTagSz)
}
}
+ /* Final cannot zeroize prior Update output buffers (it does not see them).
+ * On AES_GCM_AUTH_E the caller must treat all Update-produced plaintext as
+ * invalid and wipe it. See PL-R34 Security Policy section 8 (Operational
+ * Rules). */
return ret;
}
#endif /* HAVE_AES_DECRYPT || HAVE_AESGCM_DECRYPT */
diff --git a/wolfcrypt/src/aes_xts_x86_asm.S b/wolfcrypt/src/aes_xts_x86_asm.S
new file mode 100644
index 0000000000..253d755fd6
--- /dev/null
+++ b/wolfcrypt/src/aes_xts_x86_asm.S
@@ -0,0 +1,840 @@
+/* aes_xts_x86_asm
+ *
+ * Copyright (C) 2006-2026 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+#ifdef WOLFSSL_USER_SETTINGS
+#include "wolfssl/wolfcrypt/settings.h"
+#endif
+
+#ifndef HAVE_INTEL_AVX1
+#define HAVE_INTEL_AVX1
+#endif /* HAVE_INTEL_AVX1 */
+#ifndef NO_AVX2_SUPPORT
+#ifndef HAVE_INTEL_AVX2
+#define HAVE_INTEL_AVX2
+#endif /* HAVE_INTEL_AVX2 */
+#endif /* NO_AVX2_SUPPORT */
+
+#ifdef WOLFSSL_AES_XTS
+#ifdef WOLFSSL_X86_BUILD
+ # 32-bit Intel (i386) AES-NI AES-XTS: single-block ports of the x86_64
+ # AES_XTS_*_aesni routines, same algorithm and KAT output but limited to
+ # xmm0-7 and the i386 cdecl stack ABI (no xmm8-15/r8-r15, no 4-block
+ # pipeline). Still AES-NI-accelerated. Added 2026-06-17 so 32-bit Intel
+ # builds (host_cpu=x86 -> WOLFSSL_X86_BUILD) link and run AES-XTS. The
+ # GF(2^128) constant {0x87,1,1,1} is built on the stack (PIC-safe; a 32-bit
+ # shared object would otherwise need a GOT relocation for a .data constant).
+ # void AES_XTS_init_aesni(unsigned char* i, const unsigned char* tweak_key,
+ # int tweak_nr);
+.text
+.globl AES_XTS_init_aesni
+.type AES_XTS_init_aesni,@function
+.align 16
+AES_XTS_init_aesni:
+ movl 4(%esp), %eax
+ movdqu (%eax), %xmm2
+ movl 8(%esp), %ecx
+ pxor (%ecx), %xmm2
+ movdqu 16(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ movdqu 32(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ movdqu 48(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ movdqu 64(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ movdqu 80(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ movdqu 96(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ movdqu 112(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ movdqu 128(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ movdqu 144(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ cmpl $11, 12(%esp)
+ movdqu 160(%ecx), %xmm0
+ jl L_AES_XTS_init_aesni_enclast_1
+ aesenc %xmm0, %xmm2
+ movdqu 176(%ecx), %xmm1
+ aesenc %xmm1, %xmm2
+ cmpl $13, 12(%esp)
+ movdqu 192(%ecx), %xmm0
+ jl L_AES_XTS_init_aesni_enclast_1
+ aesenc %xmm0, %xmm2
+ movdqu 208(%ecx), %xmm1
+ aesenc %xmm1, %xmm2
+ movdqu 224(%ecx), %xmm0
+L_AES_XTS_init_aesni_enclast_1:
+ aesenclast %xmm0, %xmm2
+ movdqu %xmm2, (%eax)
+ ret
+.size AES_XTS_init_aesni,.-AES_XTS_init_aesni
+ # void AES_XTS_encrypt_aesni(const unsigned char* in, unsigned char* out,
+ # word32 sz, const unsigned char* i, const unsigned char* key,
+ # const unsigned char* key2, int nr);
+.text
+.globl AES_XTS_encrypt_aesni
+.type AES_XTS_encrypt_aesni,@function
+.align 16
+AES_XTS_encrypt_aesni:
+ pushl %edi
+ pushl %ebx
+ subl $16, %esp
+ movl $0x87, (%esp)
+ movl $0x01, 4(%esp)
+ movl $0x01, 8(%esp)
+ movl $0x01, 12(%esp)
+ movdqu (%esp), %xmm6
+ movl 40(%esp), %eax
+ movdqu (%eax), %xmm2
+ movl 48(%esp), %ecx
+ pxor (%ecx), %xmm2
+ movdqu 16(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ movdqu 32(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ movdqu 48(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ movdqu 64(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ movdqu 80(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ movdqu 96(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ movdqu 112(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ movdqu 128(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ movdqu 144(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ cmpl $11, 52(%esp)
+ movdqu 160(%ecx), %xmm0
+ jl L_AES_XTS_encrypt_aesni_enclast_2
+ aesenc %xmm0, %xmm2
+ movdqu 176(%ecx), %xmm1
+ aesenc %xmm1, %xmm2
+ cmpl $13, 52(%esp)
+ movdqu 192(%ecx), %xmm0
+ jl L_AES_XTS_encrypt_aesni_enclast_2
+ aesenc %xmm0, %xmm2
+ movdqu 208(%ecx), %xmm1
+ aesenc %xmm1, %xmm2
+ movdqu 224(%ecx), %xmm0
+L_AES_XTS_encrypt_aesni_enclast_2:
+ aesenclast %xmm0, %xmm2
+ xorl %edi, %edi
+ movl 36(%esp), %edx
+ andl $0xfffffff0, %edx
+L_AES_XTS_encrypt_aesni_loop:
+ cmpl %edx, %edi
+ jge L_AES_XTS_encrypt_aesni_loop_done
+ movl 28(%esp), %eax
+ movdqu (%eax,%edi,1), %xmm3
+ pxor %xmm2, %xmm3
+ movl 44(%esp), %ecx
+ pxor (%ecx), %xmm3
+ movdqu 16(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 32(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 48(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 64(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 80(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 96(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 112(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 128(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 144(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ cmpl $11, 52(%esp)
+ movdqu 160(%ecx), %xmm0
+ jl L_AES_XTS_encrypt_aesni_enclast_3
+ aesenc %xmm0, %xmm3
+ movdqu 176(%ecx), %xmm1
+ aesenc %xmm1, %xmm3
+ cmpl $13, 52(%esp)
+ movdqu 192(%ecx), %xmm0
+ jl L_AES_XTS_encrypt_aesni_enclast_3
+ aesenc %xmm0, %xmm3
+ movdqu 208(%ecx), %xmm1
+ aesenc %xmm1, %xmm3
+ movdqu 224(%ecx), %xmm0
+L_AES_XTS_encrypt_aesni_enclast_3:
+ aesenclast %xmm0, %xmm3
+ pxor %xmm2, %xmm3
+ movl 32(%esp), %eax
+ movdqu %xmm3, (%eax,%edi,1)
+ movdqa %xmm2, %xmm4
+ psrad $31, %xmm4
+ pslld $0x01, %xmm2
+ pshufd $0x93, %xmm4, %xmm4
+ pand %xmm6, %xmm4
+ pxor %xmm4, %xmm2
+ addl $16, %edi
+ jmp L_AES_XTS_encrypt_aesni_loop
+L_AES_XTS_encrypt_aesni_loop_done:
+ movl 36(%esp), %eax
+ cmpl %eax, %edi
+ je L_AES_XTS_encrypt_aesni_done
+ subl $16, %edi
+ movl 32(%esp), %eax
+ movdqu (%eax,%edi,1), %xmm5
+ addl $16, %edi
+ movdqu %xmm5, (%esp)
+ xorl %edx, %edx
+L_AES_XTS_encrypt_aesni_cts:
+ movzbl (%esp,%edx,1), %ecx
+ movl 28(%esp), %eax
+ movzbl (%eax,%edi,1), %ebx
+ movl 32(%esp), %eax
+ movb %cl, (%eax,%edi,1)
+ movb %bl, (%esp,%edx,1)
+ incl %edi
+ incl %edx
+ movl 36(%esp), %eax
+ cmpl %eax, %edi
+ jl L_AES_XTS_encrypt_aesni_cts
+ subl %edx, %edi
+ movdqu (%esp), %xmm3
+ subl $16, %edi
+ pxor %xmm2, %xmm3
+ movl 44(%esp), %ecx
+ pxor (%ecx), %xmm3
+ movdqu 16(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 32(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 48(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 64(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 80(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 96(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 112(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 128(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 144(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ cmpl $11, 52(%esp)
+ movdqu 160(%ecx), %xmm0
+ jl L_AES_XTS_encrypt_aesni_enclast_4
+ aesenc %xmm0, %xmm3
+ movdqu 176(%ecx), %xmm1
+ aesenc %xmm1, %xmm3
+ cmpl $13, 52(%esp)
+ movdqu 192(%ecx), %xmm0
+ jl L_AES_XTS_encrypt_aesni_enclast_4
+ aesenc %xmm0, %xmm3
+ movdqu 208(%ecx), %xmm1
+ aesenc %xmm1, %xmm3
+ movdqu 224(%ecx), %xmm0
+L_AES_XTS_encrypt_aesni_enclast_4:
+ aesenclast %xmm0, %xmm3
+ pxor %xmm2, %xmm3
+ movl 32(%esp), %eax
+ movdqu %xmm3, (%eax,%edi,1)
+L_AES_XTS_encrypt_aesni_done:
+ addl $16, %esp
+ popl %ebx
+ popl %edi
+ ret
+.size AES_XTS_encrypt_aesni,.-AES_XTS_encrypt_aesni
+ # void AES_XTS_encrypt_update_aesni(const unsigned char* in,
+ # unsigned char* out, word32 sz, const unsigned char* key,
+ # unsigned char* i, int nr); Tweak is read (already encrypted) from *i
+ # and the advanced tweak written back to *i.
+.text
+.globl AES_XTS_encrypt_update_aesni
+.type AES_XTS_encrypt_update_aesni,@function
+.align 16
+AES_XTS_encrypt_update_aesni:
+ pushl %edi
+ pushl %ebx
+ subl $16, %esp
+ movl $0x87, (%esp)
+ movl $0x01, 4(%esp)
+ movl $0x01, 8(%esp)
+ movl $0x01, 12(%esp)
+ movdqu (%esp), %xmm6
+ movl 44(%esp), %eax
+ movdqu (%eax), %xmm2
+ xorl %edi, %edi
+ movl 36(%esp), %edx
+ andl $0xfffffff0, %edx
+L_AES_XTS_encrypt_update_aesni_loop:
+ cmpl %edx, %edi
+ jge L_AES_XTS_encrypt_update_aesni_loop_done
+ movl 28(%esp), %eax
+ movdqu (%eax,%edi,1), %xmm3
+ pxor %xmm2, %xmm3
+ movl 40(%esp), %ecx
+ pxor (%ecx), %xmm3
+ movdqu 16(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 32(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 48(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 64(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 80(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 96(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 112(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 128(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 144(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ cmpl $11, 48(%esp)
+ movdqu 160(%ecx), %xmm0
+ jl L_AES_XTS_encrypt_update_aesni_enclast_5
+ aesenc %xmm0, %xmm3
+ movdqu 176(%ecx), %xmm1
+ aesenc %xmm1, %xmm3
+ cmpl $13, 48(%esp)
+ movdqu 192(%ecx), %xmm0
+ jl L_AES_XTS_encrypt_update_aesni_enclast_5
+ aesenc %xmm0, %xmm3
+ movdqu 208(%ecx), %xmm1
+ aesenc %xmm1, %xmm3
+ movdqu 224(%ecx), %xmm0
+L_AES_XTS_encrypt_update_aesni_enclast_5:
+ aesenclast %xmm0, %xmm3
+ pxor %xmm2, %xmm3
+ movl 32(%esp), %eax
+ movdqu %xmm3, (%eax,%edi,1)
+ movdqa %xmm2, %xmm4
+ psrad $31, %xmm4
+ pslld $0x01, %xmm2
+ pshufd $0x93, %xmm4, %xmm4
+ pand %xmm6, %xmm4
+ pxor %xmm4, %xmm2
+ addl $16, %edi
+ jmp L_AES_XTS_encrypt_update_aesni_loop
+L_AES_XTS_encrypt_update_aesni_loop_done:
+ movl 36(%esp), %eax
+ cmpl %eax, %edi
+ je L_AES_XTS_encrypt_update_aesni_done
+ subl $16, %edi
+ movl 32(%esp), %eax
+ movdqu (%eax,%edi,1), %xmm5
+ addl $16, %edi
+ movdqu %xmm5, (%esp)
+ xorl %edx, %edx
+L_AES_XTS_encrypt_update_aesni_cts:
+ movzbl (%esp,%edx,1), %ecx
+ movl 28(%esp), %eax
+ movzbl (%eax,%edi,1), %ebx
+ movl 32(%esp), %eax
+ movb %cl, (%eax,%edi,1)
+ movb %bl, (%esp,%edx,1)
+ incl %edi
+ incl %edx
+ movl 36(%esp), %eax
+ cmpl %eax, %edi
+ jl L_AES_XTS_encrypt_update_aesni_cts
+ subl %edx, %edi
+ movdqu (%esp), %xmm3
+ subl $16, %edi
+ pxor %xmm2, %xmm3
+ movl 40(%esp), %ecx
+ pxor (%ecx), %xmm3
+ movdqu 16(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 32(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 48(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 64(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 80(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 96(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 112(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 128(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ movdqu 144(%ecx), %xmm0
+ aesenc %xmm0, %xmm3
+ cmpl $11, 48(%esp)
+ movdqu 160(%ecx), %xmm0
+ jl L_AES_XTS_encrypt_update_aesni_enclast_6
+ aesenc %xmm0, %xmm3
+ movdqu 176(%ecx), %xmm1
+ aesenc %xmm1, %xmm3
+ cmpl $13, 48(%esp)
+ movdqu 192(%ecx), %xmm0
+ jl L_AES_XTS_encrypt_update_aesni_enclast_6
+ aesenc %xmm0, %xmm3
+ movdqu 208(%ecx), %xmm1
+ aesenc %xmm1, %xmm3
+ movdqu 224(%ecx), %xmm0
+L_AES_XTS_encrypt_update_aesni_enclast_6:
+ aesenclast %xmm0, %xmm3
+ pxor %xmm2, %xmm3
+ movl 32(%esp), %eax
+ movdqu %xmm3, (%eax,%edi,1)
+L_AES_XTS_encrypt_update_aesni_done:
+ movl 44(%esp), %eax
+ movdqu %xmm2, (%eax)
+ addl $16, %esp
+ popl %ebx
+ popl %edi
+ ret
+.size AES_XTS_encrypt_update_aesni,.-AES_XTS_encrypt_update_aesni
+ # void AES_XTS_decrypt_aesni(const unsigned char* in, unsigned char* out,
+ # word32 sz, const unsigned char* i, const unsigned char* key,
+ # const unsigned char* key2, int nr);
+.text
+.globl AES_XTS_decrypt_aesni
+.type AES_XTS_decrypt_aesni,@function
+.align 16
+AES_XTS_decrypt_aesni:
+ pushl %edi
+ pushl %ebx
+ subl $16, %esp
+ movl $0x87, (%esp)
+ movl $0x01, 4(%esp)
+ movl $0x01, 8(%esp)
+ movl $0x01, 12(%esp)
+ movdqu (%esp), %xmm6
+ movl 40(%esp), %eax
+ movdqu (%eax), %xmm2
+ movl 48(%esp), %ecx
+ pxor (%ecx), %xmm2
+ movdqu 16(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ movdqu 32(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ movdqu 48(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ movdqu 64(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ movdqu 80(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ movdqu 96(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ movdqu 112(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ movdqu 128(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ movdqu 144(%ecx), %xmm0
+ aesenc %xmm0, %xmm2
+ cmpl $11, 52(%esp)
+ movdqu 160(%ecx), %xmm0
+ jl L_AES_XTS_decrypt_aesni_enclast_7
+ aesenc %xmm0, %xmm2
+ movdqu 176(%ecx), %xmm1
+ aesenc %xmm1, %xmm2
+ cmpl $13, 52(%esp)
+ movdqu 192(%ecx), %xmm0
+ jl L_AES_XTS_decrypt_aesni_enclast_7
+ aesenc %xmm0, %xmm2
+ movdqu 208(%ecx), %xmm1
+ aesenc %xmm1, %xmm2
+ movdqu 224(%ecx), %xmm0
+L_AES_XTS_decrypt_aesni_enclast_7:
+ aesenclast %xmm0, %xmm2
+ xorl %edi, %edi
+ movl 36(%esp), %eax
+ movl %eax, %edx
+ andl $0xfffffff0, %edx
+ cmpl %eax, %edx
+ je L_AES_XTS_decrypt_aesni_bound
+ subl $16, %edx
+L_AES_XTS_decrypt_aesni_bound:
+L_AES_XTS_decrypt_aesni_loop:
+ cmpl %edx, %edi
+ jge L_AES_XTS_decrypt_aesni_loop_done
+ movl 28(%esp), %eax
+ movdqu (%eax,%edi,1), %xmm3
+ pxor %xmm2, %xmm3
+ movl 44(%esp), %ecx
+ pxor (%ecx), %xmm3
+ movdqu 16(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 32(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 48(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 64(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 80(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 96(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 112(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 128(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 144(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ cmpl $11, 52(%esp)
+ movdqu 160(%ecx), %xmm0
+ jl L_AES_XTS_decrypt_aesni_declast_8
+ aesdec %xmm0, %xmm3
+ movdqu 176(%ecx), %xmm1
+ aesdec %xmm1, %xmm3
+ cmpl $13, 52(%esp)
+ movdqu 192(%ecx), %xmm0
+ jl L_AES_XTS_decrypt_aesni_declast_8
+ aesdec %xmm0, %xmm3
+ movdqu 208(%ecx), %xmm1
+ aesdec %xmm1, %xmm3
+ movdqu 224(%ecx), %xmm0
+L_AES_XTS_decrypt_aesni_declast_8:
+ aesdeclast %xmm0, %xmm3
+ pxor %xmm2, %xmm3
+ movl 32(%esp), %eax
+ movdqu %xmm3, (%eax,%edi,1)
+ movdqa %xmm2, %xmm4
+ psrad $31, %xmm4
+ pslld $0x01, %xmm2
+ pshufd $0x93, %xmm4, %xmm4
+ pand %xmm6, %xmm4
+ pxor %xmm4, %xmm2
+ addl $16, %edi
+ jmp L_AES_XTS_decrypt_aesni_loop
+L_AES_XTS_decrypt_aesni_loop_done:
+ movl 36(%esp), %eax
+ cmpl %eax, %edi
+ je L_AES_XTS_decrypt_aesni_done
+ movdqa %xmm2, %xmm4
+ movdqa %xmm2, %xmm5
+ psrad $31, %xmm4
+ pslld $0x01, %xmm5
+ pshufd $0x93, %xmm4, %xmm4
+ pand %xmm6, %xmm4
+ pxor %xmm4, %xmm5
+ movl 28(%esp), %eax
+ movdqu (%eax,%edi,1), %xmm3
+ pxor %xmm5, %xmm3
+ movl 44(%esp), %ecx
+ pxor (%ecx), %xmm3
+ movdqu 16(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 32(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 48(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 64(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 80(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 96(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 112(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 128(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 144(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ cmpl $11, 52(%esp)
+ movdqu 160(%ecx), %xmm0
+ jl L_AES_XTS_decrypt_aesni_declast_9
+ aesdec %xmm0, %xmm3
+ movdqu 176(%ecx), %xmm1
+ aesdec %xmm1, %xmm3
+ cmpl $13, 52(%esp)
+ movdqu 192(%ecx), %xmm0
+ jl L_AES_XTS_decrypt_aesni_declast_9
+ aesdec %xmm0, %xmm3
+ movdqu 208(%ecx), %xmm1
+ aesdec %xmm1, %xmm3
+ movdqu 224(%ecx), %xmm0
+L_AES_XTS_decrypt_aesni_declast_9:
+ aesdeclast %xmm0, %xmm3
+ pxor %xmm5, %xmm3
+ movdqu %xmm3, (%esp)
+ addl $16, %edi
+ xorl %edx, %edx
+L_AES_XTS_decrypt_aesni_cts:
+ movzbl (%esp,%edx,1), %ecx
+ movl 28(%esp), %eax
+ movzbl (%eax,%edi,1), %ebx
+ movl 32(%esp), %eax
+ movb %cl, (%eax,%edi,1)
+ movb %bl, (%esp,%edx,1)
+ incl %edi
+ incl %edx
+ movl 36(%esp), %eax
+ cmpl %eax, %edi
+ jl L_AES_XTS_decrypt_aesni_cts
+ subl %edx, %edi
+ movdqu (%esp), %xmm3
+ pxor %xmm2, %xmm3
+ movl 44(%esp), %ecx
+ pxor (%ecx), %xmm3
+ movdqu 16(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 32(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 48(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 64(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 80(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 96(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 112(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 128(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 144(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ cmpl $11, 52(%esp)
+ movdqu 160(%ecx), %xmm0
+ jl L_AES_XTS_decrypt_aesni_declast_10
+ aesdec %xmm0, %xmm3
+ movdqu 176(%ecx), %xmm1
+ aesdec %xmm1, %xmm3
+ cmpl $13, 52(%esp)
+ movdqu 192(%ecx), %xmm0
+ jl L_AES_XTS_decrypt_aesni_declast_10
+ aesdec %xmm0, %xmm3
+ movdqu 208(%ecx), %xmm1
+ aesdec %xmm1, %xmm3
+ movdqu 224(%ecx), %xmm0
+L_AES_XTS_decrypt_aesni_declast_10:
+ aesdeclast %xmm0, %xmm3
+ pxor %xmm2, %xmm3
+ subl $16, %edi
+ movl 32(%esp), %eax
+ movdqu %xmm3, (%eax,%edi,1)
+L_AES_XTS_decrypt_aesni_done:
+ addl $16, %esp
+ popl %ebx
+ popl %edi
+ ret
+.size AES_XTS_decrypt_aesni,.-AES_XTS_decrypt_aesni
+ # void AES_XTS_decrypt_update_aesni(const unsigned char* in,
+ # unsigned char* out, word32 sz, const unsigned char* key,
+ # unsigned char* i, int nr); Tweak is read from *i and the advanced
+ # tweak written back to *i.
+.text
+.globl AES_XTS_decrypt_update_aesni
+.type AES_XTS_decrypt_update_aesni,@function
+.align 16
+AES_XTS_decrypt_update_aesni:
+ pushl %edi
+ pushl %ebx
+ subl $16, %esp
+ movl $0x87, (%esp)
+ movl $0x01, 4(%esp)
+ movl $0x01, 8(%esp)
+ movl $0x01, 12(%esp)
+ movdqu (%esp), %xmm6
+ movl 44(%esp), %eax
+ movdqu (%eax), %xmm2
+ xorl %edi, %edi
+ movl 36(%esp), %eax
+ movl %eax, %edx
+ andl $0xfffffff0, %edx
+ cmpl %eax, %edx
+ je L_AES_XTS_decrypt_update_aesni_bound
+ subl $16, %edx
+L_AES_XTS_decrypt_update_aesni_bound:
+L_AES_XTS_decrypt_update_aesni_loop:
+ cmpl %edx, %edi
+ jge L_AES_XTS_decrypt_update_aesni_loop_done
+ movl 28(%esp), %eax
+ movdqu (%eax,%edi,1), %xmm3
+ pxor %xmm2, %xmm3
+ movl 40(%esp), %ecx
+ pxor (%ecx), %xmm3
+ movdqu 16(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 32(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 48(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 64(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 80(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 96(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 112(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 128(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 144(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ cmpl $11, 48(%esp)
+ movdqu 160(%ecx), %xmm0
+ jl L_AES_XTS_decrypt_update_aesni_declast_11
+ aesdec %xmm0, %xmm3
+ movdqu 176(%ecx), %xmm1
+ aesdec %xmm1, %xmm3
+ cmpl $13, 48(%esp)
+ movdqu 192(%ecx), %xmm0
+ jl L_AES_XTS_decrypt_update_aesni_declast_11
+ aesdec %xmm0, %xmm3
+ movdqu 208(%ecx), %xmm1
+ aesdec %xmm1, %xmm3
+ movdqu 224(%ecx), %xmm0
+L_AES_XTS_decrypt_update_aesni_declast_11:
+ aesdeclast %xmm0, %xmm3
+ pxor %xmm2, %xmm3
+ movl 32(%esp), %eax
+ movdqu %xmm3, (%eax,%edi,1)
+ movdqa %xmm2, %xmm4
+ psrad $31, %xmm4
+ pslld $0x01, %xmm2
+ pshufd $0x93, %xmm4, %xmm4
+ pand %xmm6, %xmm4
+ pxor %xmm4, %xmm2
+ addl $16, %edi
+ jmp L_AES_XTS_decrypt_update_aesni_loop
+L_AES_XTS_decrypt_update_aesni_loop_done:
+ movl 36(%esp), %eax
+ cmpl %eax, %edi
+ je L_AES_XTS_decrypt_update_aesni_done
+ movdqa %xmm2, %xmm4
+ movdqa %xmm2, %xmm5
+ psrad $31, %xmm4
+ pslld $0x01, %xmm5
+ pshufd $0x93, %xmm4, %xmm4
+ pand %xmm6, %xmm4
+ pxor %xmm4, %xmm5
+ movl 28(%esp), %eax
+ movdqu (%eax,%edi,1), %xmm3
+ pxor %xmm5, %xmm3
+ movl 40(%esp), %ecx
+ pxor (%ecx), %xmm3
+ movdqu 16(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 32(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 48(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 64(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 80(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 96(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 112(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 128(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 144(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ cmpl $11, 48(%esp)
+ movdqu 160(%ecx), %xmm0
+ jl L_AES_XTS_decrypt_update_aesni_declast_12
+ aesdec %xmm0, %xmm3
+ movdqu 176(%ecx), %xmm1
+ aesdec %xmm1, %xmm3
+ cmpl $13, 48(%esp)
+ movdqu 192(%ecx), %xmm0
+ jl L_AES_XTS_decrypt_update_aesni_declast_12
+ aesdec %xmm0, %xmm3
+ movdqu 208(%ecx), %xmm1
+ aesdec %xmm1, %xmm3
+ movdqu 224(%ecx), %xmm0
+L_AES_XTS_decrypt_update_aesni_declast_12:
+ aesdeclast %xmm0, %xmm3
+ pxor %xmm5, %xmm3
+ movdqu %xmm3, (%esp)
+ addl $16, %edi
+ xorl %edx, %edx
+L_AES_XTS_decrypt_update_aesni_cts:
+ movzbl (%esp,%edx,1), %ecx
+ movl 28(%esp), %eax
+ movzbl (%eax,%edi,1), %ebx
+ movl 32(%esp), %eax
+ movb %cl, (%eax,%edi,1)
+ movb %bl, (%esp,%edx,1)
+ incl %edi
+ incl %edx
+ movl 36(%esp), %eax
+ cmpl %eax, %edi
+ jl L_AES_XTS_decrypt_update_aesni_cts
+ subl %edx, %edi
+ movdqu (%esp), %xmm3
+ pxor %xmm2, %xmm3
+ movl 40(%esp), %ecx
+ pxor (%ecx), %xmm3
+ movdqu 16(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 32(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 48(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 64(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 80(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 96(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 112(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 128(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ movdqu 144(%ecx), %xmm0
+ aesdec %xmm0, %xmm3
+ cmpl $11, 48(%esp)
+ movdqu 160(%ecx), %xmm0
+ jl L_AES_XTS_decrypt_update_aesni_declast_13
+ aesdec %xmm0, %xmm3
+ movdqu 176(%ecx), %xmm1
+ aesdec %xmm1, %xmm3
+ cmpl $13, 48(%esp)
+ movdqu 192(%ecx), %xmm0
+ jl L_AES_XTS_decrypt_update_aesni_declast_13
+ aesdec %xmm0, %xmm3
+ movdqu 208(%ecx), %xmm1
+ aesdec %xmm1, %xmm3
+ movdqu 224(%ecx), %xmm0
+L_AES_XTS_decrypt_update_aesni_declast_13:
+ aesdeclast %xmm0, %xmm3
+ pxor %xmm2, %xmm3
+ subl $16, %edi
+ movl 32(%esp), %eax
+ movdqu %xmm3, (%eax,%edi,1)
+L_AES_XTS_decrypt_update_aesni_done:
+ movl 44(%esp), %eax
+ movdqu %xmm2, (%eax)
+ addl $16, %esp
+ popl %ebx
+ popl %edi
+ ret
+.size AES_XTS_decrypt_update_aesni,.-AES_XTS_decrypt_update_aesni
+#endif /* WOLFSSL_X86_BUILD */
+#endif /* WOLFSSL_AES_XTS */
+
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/wolfcrypt/src/cpuid.c b/wolfcrypt/src/cpuid.c
index b1ac7e1136..69aa26a5dc 100644
--- a/wolfcrypt/src/cpuid.c
+++ b/wolfcrypt/src/cpuid.c
@@ -75,6 +75,20 @@
#define cpuid(a,b,c) __cpuidex((int*)a,b,c)
#endif /* _MSC_VER */
+ /* On the 32-bit x86 Linux kernel (WOLFSSL_LINUXKM + WOLFSSL_X86_BUILD),
+ * (pulled in via processor.h -> math_emu.h ->
+ * ptrace.h on i386 only) #defines EAX/EBX/ECX/EDX as ptrace register
+ * indices (EAX=6, EBX=0, ECX=1, EDX=2). We reuse these names as
+ * cpuid()-result array indices (0..3), so the clash is a real bug, not a
+ * cosmetic redefinition: leaving the kernel's values in place would
+ * otherwise index reg[6] (past "unsigned int reg[5]") and mis-compare the
+ * vendor string. #undef so our indices win. No-op where the names are
+ * not predefined (x86_64
+ * kernel, all user-space), so i386-kernel codegen is byte-identical. */
+ #undef EAX
+ #undef EBX
+ #undef ECX
+ #undef EDX
#define EAX 0
#define EBX 1
#define ECX 2
diff --git a/wolfcrypt/src/dh.c b/wolfcrypt/src/dh.c
index 55d82c0e23..a537eceb4d 100644
--- a/wolfcrypt/src/dh.c
+++ b/wolfcrypt/src/dh.c
@@ -1424,8 +1424,18 @@ int wc_DhGeneratePublic(DhKey* key, byte* priv, word32 privSz,
#if FIPS_VERSION_GE(5,0) || defined(WOLFSSL_VALIDATE_DH_KEYGEN)
if (ret == 0)
ret = _ffc_validate_public_key(key, pub, *pubSz, NULL, 0, 0);
- if (ret == 0)
- ret = _ffc_pairwise_consistency_test(key, pub, *pubSz, priv, privSz);
+ if (ret == 0) {
+ /* FFC key-pair PCT per SP 800-56A r3 sec 5.6.2.1.4, required
+ * after KeyGen by FIPS 140-3 IG 10.3.B. Under FIPS, failure is
+ * remapped to DH_PCT_E so DEGRADE_STATE moves
+ * FIPS_CAST_DH_PRIMITIVE_Z to the error state. */
+ ret = _ffc_pairwise_consistency_test(key, pub, *pubSz, priv,
+ privSz);
+ #ifdef HAVE_FIPS
+ if (ret != 0)
+ ret = DH_PCT_E;
+ #endif
+ }
#endif /* FIPS V5 or later || WOLFSSL_VALIDATE_DH_KEYGEN */
return ret;
@@ -1448,8 +1458,18 @@ static int wc_DhGenerateKeyPair_Sync(DhKey* key, WC_RNG* rng,
#if FIPS_VERSION_GE(5,0) || defined(WOLFSSL_VALIDATE_DH_KEYGEN)
if (ret == 0)
ret = _ffc_validate_public_key(key, pub, *pubSz, NULL, 0, 0);
- if (ret == 0)
- ret = _ffc_pairwise_consistency_test(key, pub, *pubSz, priv, *privSz);
+ if (ret == 0) {
+ /* FFC key-pair PCT per SP 800-56A r3 sec 5.6.2.1.4, required
+ * after KeyGen by FIPS 140-3 IG 10.3.B. Under FIPS, failure is
+ * remapped to DH_PCT_E so DEGRADE_STATE moves
+ * FIPS_CAST_DH_PRIMITIVE_Z to the error state. */
+ ret = _ffc_pairwise_consistency_test(key, pub, *pubSz, priv,
+ *privSz);
+ #ifdef HAVE_FIPS
+ if (ret != 0)
+ ret = DH_PCT_E;
+ #endif
+ }
#endif /* FIPS V5 or later || WOLFSSL_VALIDATE_DH_KEYGEN */
return ret;
diff --git a/wolfcrypt/src/error.c b/wolfcrypt/src/error.c
index 0f70a84cc8..be0acd033d 100644
--- a/wolfcrypt/src/error.c
+++ b/wolfcrypt/src/error.c
@@ -692,6 +692,21 @@ const char* wc_GetErrorString(int error)
case SLH_DSA_KAT_FIPS_E:
return "SLH-DSA Known Answer Test check FIPS error";
+ case SLH_DSA_PCT_E:
+ return "wolfcrypt SLH-DSA Pairwise Consistency Test Failure";
+
+ case CMAC_KAT_FIPS_E:
+ return "AES-CMAC Known Answer Test FIPS error";
+
+ case SHAKE_KAT_FIPS_E:
+ return "SHAKE Known Answer Test FIPS error";
+
+ case DH_PCT_E:
+ return "wolfcrypt DH (FFC) Pairwise Consistency Test Failure";
+
+ case AES_KW_KAT_FIPS_E:
+ return "AES-KW Known Answer Test FIPS error";
+
case SEQ_OVERFLOW_E:
return "Sequence counter would overflow";
diff --git a/wolfcrypt/src/ge_operations.c b/wolfcrypt/src/ge_operations.c
index 5d3157628d..3c2eea5160 100644
--- a/wolfcrypt/src/ge_operations.c
+++ b/wolfcrypt/src/ge_operations.c
@@ -10196,9 +10196,13 @@ void ge_tobytes_nct(unsigned char *s,const ge_p2 *h)
/* if HAVE_ED25519 but not HAVE_CURVE25519, and an asm implementation is built,
* then curve25519() won't get its WOLFSSL_LOCAL attribute unless we dummy-call
* it here.
- */
+ * Requires the asm port to emit curve25519() when X25519 is off -- true for
+ * x86 and 64-bit ARM, but the 32-bit ARM port gates curve25519() on
+ * HAVE_CURVE25519, so the dummy-call would be an undefined symbol there.
+ * Exclude arm32 armasm (RFC 7748 / SP 800-186 X25519). */
#if defined(CURVED25519_ASM) && defined(WOLFSSL_API_PREFIX_MAP) && \
- !defined(HAVE_CURVE25519) && !defined(FREESCALE_LTC_ECC)
+ !defined(HAVE_CURVE25519) && !defined(FREESCALE_LTC_ECC) && \
+ (!defined(WOLFSSL_ARMASM) || defined(__aarch64__))
WOLFSSL_LOCAL void _wc_curve25519_dummy(void);
WOLFSSL_LOCAL void _wc_curve25519_dummy(void) {
(void)curve25519((byte *)0, (byte *)0, (const byte *)0);
diff --git a/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S b/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S
index 6d2f017299..fd817fd247 100644
--- a/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S
+++ b/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S
@@ -27,6 +27,14 @@
#include
+/* Honor WC_SHA3_NO_ASM as sha3.c does (set by KERNEL_MODE_DEFAULTS): suppress
+ * this NEON BlockSha3 so it doesn't multiply-define against sha3.c's C BlockSha3
+ * on arm32. arm64's asm is gated on WOLFSSL_ARMASM_CRYPTO_SHA3 so never hit this;
+ * the arm32 NEON path was only gated on WOLFSSL_ARMASM_NO_NEON. FIPS 202. */
+#ifdef WC_SHA3_NO_ASM
+ #undef WOLFSSL_ARMASM
+#endif
+
#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && !defined(WOLFSSL_ARMASM_THUMB2)
#ifndef WOLFSSL_ARMASM_INLINE
diff --git a/wolfcrypt/src/random.c b/wolfcrypt/src/random.c
index eb51965024..cfd415f53b 100644
--- a/wolfcrypt/src/random.c
+++ b/wolfcrypt/src/random.c
@@ -418,6 +418,20 @@ int wc_DrbgState_MutexFree(void)
static int LockDrbgState(void)
{
#ifndef SINGLE_THREADED
+#ifndef WOLFSSL_MUTEX_INITIALIZER
+ /* drbgStateMutex needs run-time init on platforms without a static mutex
+ * initializer (e.g. Windows CRITICAL_SECTION). The FIPS pre-operational
+ * self-test locks the DRBG from a load-time constructor that runs before
+ * wolfCrypt_Init(), and locking an uninitialized CRITICAL_SECTION is UB
+ * (faults on the degraded CAST re-run). Init on demand here -- idempotent,
+ * and the first lock is the single-threaded POST so it is race-free.
+ * Guards the SP 800-90A DRBG enable/disable state. */
+ {
+ int initRet = wc_DrbgState_MutexInit();
+ if (initRet != 0)
+ return initRet;
+ }
+#endif
return wc_LockMutex(&drbgStateMutex);
#else
return 0;
@@ -1825,7 +1839,7 @@ static int _InitRng(WC_RNG* rng, byte* nonce, word32 nonceSz,
#endif
#ifdef HAVE_INTEL_RDRAND
- /* if CPU supports RDRAND, use it directly and by-pass DRBG init */
+ /* if CPU supports RDRAND, use it directly and bypass DRBG init */
if (IS_INTEL_RDRAND(intel_flags)) {
#ifdef HAVE_HASHDRBG
rng->status = DRBG_OK;
@@ -3619,23 +3633,70 @@ int wc_FreeNetRandom(void)
#if defined(HAVE_INTEL_RDRAND) || defined(HAVE_INTEL_RDSEED) || \
defined(HAVE_AMD_RDSEED)
-#ifdef WOLFSSL_ASYNC_CRYPT
- /* need more retries if multiple cores */
- #define INTELRD_RETRY (32 * 8)
-#else
- #define INTELRD_RETRY 32
+/* Bounds the RDRAND/RDSEED retry loop below. RDSEED legitimately sets CF=0
+ * until the on-chip entropy is replenished; per Intel's DRNG guidance software
+ * must retry. Overridable via -D for OEs needing a different budget. */
+#ifndef INTELRD_RETRY
+ #if defined(WOLFSSL_LINUXKM)
+ /* Linux kernel module: boot-time FIPS CASTs poll RDSEED during
+ * module_init while the RNG is warming up and RDSEED is contended
+ * (especially virtualized, funnelled to a busy host CPU). CF=0 then
+ * far exceeds the 32-retry userspace default, making
+ * --enable-{amd,intel}rdseed modules fail the ECDSA CAST and refuse to
+ * load. The budget is a ceiling, not a fixed cost -- RDSEED succeeds in
+ * ~1 read once entropy is up, so post-boot use is unaffected. */
+ #define INTELRD_RETRY 100000
+ #elif defined(WOLFSSL_ASYNC_CRYPT)
+ /* need more retries if multiple cores */
+ #define INTELRD_RETRY (32 * 8)
+ #else
+ #define INTELRD_RETRY 32
+ #endif
#endif
#if defined(HAVE_INTEL_RDSEED) || defined(HAVE_AMD_RDSEED)
+/* Vendor tag for the optional FIPS_CODE_REVIEW evidence prints below. Intel
+ * and AMD RDSEED share the one x86 RDSEED primitive; exactly one of
+ * HAVE_INTEL_RDSEED / HAVE_AMD_RDSEED is set per OE, so this resolves cleanly. */
+#if defined(HAVE_AMD_RDSEED)
+#define WC_RDSEED_VENDOR "AMD"
+#else
+#define WC_RDSEED_VENDOR "Intel"
+#endif
+
#ifndef USE_INTEL_INTRINSICS
- /* return 0 on success */
+ /* return 0 on success. Per the E27 Public Use Document (CMVP entropy
+ * disclosure), wolfSSL polls the x86 Carry Flag to check each RDSEED:
+ * CF=1 -> dest holds 64 bits of conditioned entropy, usable;
+ * CF=0 -> seed pool empty this cycle, dest unusable, must retry
+ * (IntelRDseed64_r below loops up to INTELRD_RETRY times).
+ * "setc %1" materialises CF into (ok); the "=qm" constraint pins it to a
+ * q-class register so setc can target its low byte. */
static WC_INLINE int IntelRDseed64(word64* seed)
{
unsigned char ok;
__asm__ volatile("rdseed %0; setc %1":"=r"(*seed), "=qm"(ok));
+#ifdef FIPS_CODE_REVIEW
+ /* One-shot tracer: confirm this path is alive on the first call, then
+ * go silent -- RDSEED fires per 64-bit chunk, so per-chunk prints would
+ * flood the sanity-log. Per-request volume is shown by the outer
+ * wc_GenerateSeed_IntelRD print below. */
+ {
+ static int printed_asm = 0;
+ if (!printed_asm) {
+ printed_asm = 1;
+ printf("FIPS_CODE_REVIEW IntelRDseed64 [asm path, %s] "
+ "(one-shot): delivered %u bits, CF=%u\n",
+ WC_RDSEED_VENDOR, (unsigned)(sizeof(word64) * 8u),
+ (unsigned)ok);
+ }
+ }
+#endif
+ /* CF set (ok != 0) -> 64 bits captured in *seed, return 0; CF clear ->
+ * sample invalid, return -1 so IntelRDseed64_r() retries. */
return (ok) ? 0 : -1;
}
@@ -3643,7 +3704,14 @@ int wc_FreeNetRandom(void)
/* The compiler Visual Studio uses does not allow inline assembly.
* It does allow for Intel intrinsic functions. */
- /* return 0 on success */
+ /* return 0 on success.
+ *
+ * E27 PUD (NIST CMVP) cited path: _rdseed64_step is the compiler intrinsic
+ * front-end for the same RDSEED instruction documented in the asm path
+ * above. The intrinsic returns 1 when CF was set by the underlying RDSEED
+ * (i.e. the 64-bit conditioned entropy sample in *seed is valid this
+ * cycle) and 0 when CF was clear (caller MUST retry; *seed MUST NOT be
+ * consumed). */
# ifdef __GNUC__
__attribute__((target("rdseed")))
# endif
@@ -3652,6 +3720,23 @@ int wc_FreeNetRandom(void)
int ok;
ok = _rdseed64_step((unsigned long long*) seed);
+#ifdef FIPS_CODE_REVIEW
+ /* One-shot tracer; see asm-path comment above for rationale. */
+ {
+ static int printed_intrinsic = 0;
+ if (!printed_intrinsic) {
+ printed_intrinsic = 1;
+ printf("FIPS_CODE_REVIEW IntelRDseed64 [intrinsic path, %s] "
+ "(one-shot): delivered %u bits, "
+ "intrinsic_ret=%d (== CF)\n",
+ WC_RDSEED_VENDOR, (unsigned)(sizeof(word64) * 8u), ok);
+ }
+ }
+#endif
+ /* intrinsic_ret == 1 -> CF was set, 64 bits of conditioned entropy
+ * captured in *seed; return 0 to signal success to the retry wrapper.
+ * intrinsic_ret == 0 -> CF was clear; return -1 so the retry wrapper
+ * re-attempts. */
return (ok) ? 0 : -1;
}
@@ -3664,6 +3749,12 @@ static WC_INLINE int IntelRDseed64_r(word64* rnd)
for (i = 0; i < INTELRD_RETRY; i++) {
if (IntelRDseed64(rnd) == 0)
return 0;
+ /* Give the hardware entropy source a chance to replenish between
+ * attempts (Intel DRNG guidance) and yield the CPU when it is safe to
+ * block. WC_RELAX_LONG_LOOP() is a no-op where blocking is unsafe, so
+ * this only ever helps -- e.g. it lets other work (and the entropy
+ * conditioner) run during a long boot-time RDSEED starvation. */
+ WC_RELAX_LONG_LOOP();
}
return -1;
}
@@ -3677,6 +3768,19 @@ static int wc_GenerateSeed_IntelRD(OS_Seed* os, byte* output, word32 sz)
(void)os;
+#ifdef FIPS_CODE_REVIEW
+ /* Each conditioned entropy sample produced by IntelRDseed64() is 64 bits
+ * wide. This entry-level trace makes the per-request entropy volume
+ * obvious in evidence logs: sz bytes requested -> ceil(sz/8) RDSEED
+ * invocations expected (plus the two-or-three sanity-status reads on the
+ * first ever call into this function). */
+ printf("FIPS_CODE_REVIEW wc_GenerateSeed_IntelRD [%s]: "
+ "requested %u bytes = %u bits "
+ "(expect %u RDSEED 64-bit samples)\n",
+ WC_RDSEED_VENDOR, (unsigned)sz, (unsigned)(sz * 8u),
+ (unsigned)((sz + sizeof(word64) - 1u) / sizeof(word64)));
+#endif
+
if (!IS_INTEL_RDSEED(intel_flags))
return -1;
diff --git a/wolfcrypt/src/rsa.c b/wolfcrypt/src/rsa.c
index cd66eab2ef..d1be58cfa5 100644
--- a/wolfcrypt/src/rsa.c
+++ b/wolfcrypt/src/rsa.c
@@ -5155,9 +5155,12 @@ static WC_INLINE int RsaSizeCheck(int size)
}
#ifdef HAVE_FIPS
- /* Key size requirements for CAVP */
+ /* Approved RSA key sizes per FIPS 186-5 sec 5.1 and NIST SP 800-131Ar2
+ * sec 4 Table 2 - 2048, 3072, 4096 only (1024 disallowed since
+ * 2014-01-01). wc_MakeRsaKey_fips gates on WC_RSA_FIPS_GEN_MIN, but
+ * RsaSizeCheck is also reached by internal paths bypassing that wrapper -
+ * defense-in-depth removal of 1024 here closes the gap. */
switch (size) {
- case 1024:
case 2048:
case 3072:
case 4096:
@@ -5417,6 +5420,18 @@ int wc_MakeRsaKey(RsaKey* key, int size, long e, WC_RNG* rng)
goto out;
}
+#ifdef HAVE_FIPS
+ /* FIPS 186-5 sec 5.2: 2^16 + 1 <= e < 2^256, e odd. The non-FIPS check
+ * above only requires e >= 3 odd. e is a long (<= 64 bits) so the upper
+ * bound holds structurally; enforce the 65537 lower bound explicitly.
+ * Defense-in-depth - FIPS callers conventionally pass e = 65537
+ * (RSA_F4). */
+ if (e < 65537L) {
+ err = BAD_FUNC_ARG;
+ goto out;
+ }
+#endif
+
#if defined(WOLFSSL_CRYPTOCELL)
err = cc310_RSA_GenerateKeyPair(key, size, e);
goto out;
diff --git a/wolfcrypt/src/sha256.c b/wolfcrypt/src/sha256.c
index e0aafdcb2e..7fb1891ce0 100644
--- a/wolfcrypt/src/sha256.c
+++ b/wolfcrypt/src/sha256.c
@@ -1231,14 +1231,31 @@ int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
return ret;
}
+/* 32-bit ARM SHA-256 NEON/crypto transforms use vector registers, so in a
+ * kernel module they need SAVE/RESTORE_VECTOR_REGISTERS (kernel_neon_begin/
+ * end) or SIMD faults "undefined instruction". !__aarch64__-scoped so
+ * aarch64 is unchanged. (FIPS 180-4 SHA-256.) */
+#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && !defined(__aarch64__) && \
+ !defined(WOLFSSL_ARMASM_THUMB2) && !defined(WOLFSSL_ARMASM_NO_NEON)
+ #define WC_SHA256_ARM_SVR_BEGIN(fail) SAVE_VECTOR_REGISTERS(fail)
+ #define WC_SHA256_ARM_SVR_END() RESTORE_VECTOR_REGISTERS()
+#else
+ #define WC_SHA256_ARM_SVR_BEGIN(fail) WC_DO_NOTHING
+ #define WC_SHA256_ARM_SVR_END() WC_DO_NOTHING
+#endif
+
static WC_INLINE int Transform_Sha256(wc_Sha256* sha256, const byte* data)
{
#if defined(WOLFSSL_ARMASM_THUMB2) || defined(WOLFSSL_ARMASM_NO_NEON)
Transform_Sha256_Len_base(sha256, data, WC_SHA256_BLOCK_SIZE);
-#elif defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
- Transform_Sha256_Len_neon(sha256, data, WC_SHA256_BLOCK_SIZE);
#else
+ WC_SHA256_ARM_SVR_BEGIN(return _svr_ret;);
+ #if defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
+ Transform_Sha256_Len_neon(sha256, data, WC_SHA256_BLOCK_SIZE);
+ #else
Transform_Sha256_Len_crypto(sha256, data, WC_SHA256_BLOCK_SIZE);
+ #endif
+ WC_SHA256_ARM_SVR_END();
#endif
return 0;
}
@@ -1248,10 +1265,14 @@ static WC_INLINE int Transform_Sha256_Len(wc_Sha256* sha256, const byte* data,
{
#if defined(WOLFSSL_ARMASM_THUMB2) || defined(WOLFSSL_ARMASM_NO_NEON)
Transform_Sha256_Len_base(sha256, data, len);
-#elif defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
- Transform_Sha256_Len_neon(sha256, data, len);
#else
+ WC_SHA256_ARM_SVR_BEGIN(return _svr_ret;);
+ #if defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
+ Transform_Sha256_Len_neon(sha256, data, len);
+ #else
Transform_Sha256_Len_crypto(sha256, data, len);
+ #endif
+ WC_SHA256_ARM_SVR_END();
#endif
return 0;
}
diff --git a/wolfcrypt/src/sha512.c b/wolfcrypt/src/sha512.c
index 9eb6635e37..79444fc9c8 100644
--- a/wolfcrypt/src/sha512.c
+++ b/wolfcrypt/src/sha512.c
@@ -1457,15 +1457,31 @@ static void (*Transform_Sha512_p)(wc_Sha512* sha512, const byte* data) = NULL;
static void (*Transform_Sha512_Len_p)(wc_Sha512* sha512, const byte* data,
word32 len) = NULL;
+/* 32-bit ARM SHA-512 NEON asm below needs SAVE/RESTORE_VECTOR_REGISTERS
+ * (kernel_neon_begin/end) in a Linux kernel module, else the first NEON insn
+ * faults "undefined instruction". Scoped to !__aarch64__ so aarch64 and the
+ * THUMB2/NO_NEON base path stay unchanged. (FIPS 180-4 SHA-512.) */
+#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && !defined(__aarch64__) && \
+ !defined(WOLFSSL_ARMASM_THUMB2) && !defined(WOLFSSL_ARMASM_NO_NEON)
+ #define WC_SHA512_ARM_SVR_BEGIN(fail) SAVE_VECTOR_REGISTERS(fail)
+ #define WC_SHA512_ARM_SVR_END() RESTORE_VECTOR_REGISTERS()
+#else
+ #define WC_SHA512_ARM_SVR_BEGIN(fail) WC_DO_NOTHING
+ #define WC_SHA512_ARM_SVR_END() WC_DO_NOTHING
+#endif
static WC_INLINE int Transform_Sha512(wc_Sha512 *sha512, const byte* data)
{
+ WC_SHA512_ARM_SVR_BEGIN(return _svr_ret;);
(*Transform_Sha512_p)(sha512, data);
+ WC_SHA512_ARM_SVR_END();
return 0;
}
static WC_INLINE int Transform_Sha512_Len(wc_Sha512 *sha512, const byte* data,
word32 len)
{
+ WC_SHA512_ARM_SVR_BEGIN(return _svr_ret;);
(*Transform_Sha512_Len_p)(sha512, data, len);
+ WC_SHA512_ARM_SVR_END();
return 0;
}
diff --git a/wolfcrypt/src/wc_lms.c b/wolfcrypt/src/wc_lms.c
index 595b93622d..09dd20b3b7 100644
--- a/wolfcrypt/src/wc_lms.c
+++ b/wolfcrypt/src/wc_lms.c
@@ -28,6 +28,14 @@
#if FIPS_VERSION3_GE(2,0,0)
/* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
#define FIPS_NO_WRAPPERS
+
+ /* Windows orders the FIPS in-core integrity boundary by named linker
+ * sections. Keep LMS (SP 800-208) code/const inside the boundary,
+ * sorted between sha3 (.fipsA$n) and fips.c (.fipsA$o). */
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$nc")
+ #pragma const_seg(".fipsB$nc")
+ #endif
#endif
#include
#include
diff --git a/wolfcrypt/src/wc_lms_impl.c b/wolfcrypt/src/wc_lms_impl.c
index e88c032d87..0df71d4d5b 100644
--- a/wolfcrypt/src/wc_lms_impl.c
+++ b/wolfcrypt/src/wc_lms_impl.c
@@ -41,6 +41,15 @@
#include
+#if FIPS_VERSION3_GE(2,0,0)
+ /* Keep this LMS (SP 800-208) implementation's code/const inside the FIPS
+ * in-core integrity boundary (Windows orders it by named sections). */
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$ne")
+ #pragma const_seg(".fipsB$ne")
+ #endif
+#endif
+
#include
#ifdef NO_INLINE
@@ -2319,7 +2328,10 @@ static int wc_lms_treehash_update(LmsState* state, LmsPrivState* privState,
byte* left = dp + LMS_D_LEN;
byte* temp = left + params->hash_len;
WC_DECLARE_VAR(stack, byte, (LMS_MAX_HEIGHT + 1) * LMS_MAX_NODE_LEN, 0);
- byte* sp;
+ /* Init to NULL: sp is set and used only on the ret==0 path, but 32-bit ARM
+ * gcc cannot correlate the two separate `if (ret == 0)` guards and reports a
+ * false-positive -Wmaybe-uninitialized (x86_64/aarch64 gcc do not). */
+ byte* sp = NULL;
word32 max_cb = (word32)1 << params->cacheBits;
word32 i;
diff --git a/wolfcrypt/src/wc_mldsa.c b/wolfcrypt/src/wc_mldsa.c
index 79d18dddb6..209a2883d5 100644
--- a/wolfcrypt/src/wc_mldsa.c
+++ b/wolfcrypt/src/wc_mldsa.c
@@ -142,6 +142,14 @@
#if FIPS_VERSION3_GE(2,0,0)
/* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
#define FIPS_NO_WRAPPERS
+
+ /* Windows orders the FIPS in-core integrity boundary by named linker
+ * sections. Keep ML-DSA (FIPS 204) code/const inside the boundary,
+ * sorted between sha3 (.fipsA$n) and fips.c (.fipsA$o). */
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$nc")
+ #pragma const_seg(".fipsB$nc")
+ #endif
#endif
#ifndef WOLFSSL_MLDSA_NO_ASN1
@@ -772,8 +780,95 @@ static int mldsa_hash256_ctx_msg(wc_Shake* shake256, const byte* tr,
* @return 0 on success.
* @return BAD_FUNC_ARG if hash algorithm not known.
*/
-static int mldsa_get_hash_oid(int hash, byte* oidBuffer, word32* oidLen)
+/* HashML-DSA PH-vs-paramSet enforcement.
+ *
+ * FIPS 204 sec. 5.4 (Table 4) restricts the HashML-DSA pre-hash PH to
+ * algorithms whose collision-resistance strength meets or exceeds the
+ * paramSet's security level; enforced for both sigGen and sigVer. Returns
+ * 0 for an approved (hashAlg, level) pair, else BAD_FUNC_ARG (including any
+ * hash not on the approved list).
+ */
+static int mldsa_check_hash_for_level(int hashAlg, byte level)
{
+ int strengthBits; /* collision-resistance strength of the chosen hash */
+ int requiredBits; /* security level required by the paramSet */
+
+ switch (hashAlg) {
+ #ifndef NO_SHA256
+ case WC_HASH_TYPE_SHA256:
+ strengthBits = 128;
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA384
+ case WC_HASH_TYPE_SHA384:
+ strengthBits = 192;
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA512
+ case WC_HASH_TYPE_SHA512:
+ strengthBits = 256;
+ break;
+ #ifndef WOLFSSL_NOSHA512_256
+ case WC_HASH_TYPE_SHA512_256:
+ /* SHA-512/256 has 128-bit collision resistance (truncated). */
+ strengthBits = 128;
+ break;
+ #endif
+ #endif
+ #ifdef WOLFSSL_SHA3
+ #ifndef WOLFSSL_NOSHA3_256
+ case WC_HASH_TYPE_SHA3_256:
+ strengthBits = 128;
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_384
+ case WC_HASH_TYPE_SHA3_384:
+ strengthBits = 192;
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_512
+ case WC_HASH_TYPE_SHA3_512:
+ strengthBits = 256;
+ break;
+ #endif
+ #endif
+ #ifdef WOLFSSL_SHAKE128
+ case WC_HASH_TYPE_SHAKE128:
+ strengthBits = 128;
+ break;
+ #endif
+ #ifdef WOLFSSL_SHAKE256
+ case WC_HASH_TYPE_SHAKE256:
+ strengthBits = 256;
+ break;
+ #endif
+ default:
+ /* Hash not on the FIPS 204 Table 4 approved list (e.g. SHA-224,
+ * SHA-512/224, SHA3-224, MD5). Reject regardless of level. */
+ return BAD_FUNC_ARG;
+ }
+
+ switch (level) {
+ case WC_ML_DSA_44:
+ requiredBits = 128;
+ break;
+ case WC_ML_DSA_65:
+ requiredBits = 192;
+ break;
+ case WC_ML_DSA_87:
+ requiredBits = 256;
+ break;
+ default:
+ return BAD_FUNC_ARG;
+ }
+
+ if (strengthBits < requiredBits) {
+ return BAD_FUNC_ARG;
+ }
+ return 0;
+}
+
+static int mldsa_get_hash_oid(int hash, byte* oidBuffer, word32* oidLen){
int ret = 0;
const byte* oid;
@@ -9467,11 +9562,17 @@ static int mldsa_sign_ctx_hash_with_seed(wc_MlDsaKey* key,
byte oidMsgHash[MLDSA_HASH_OID_LEN + WC_MAX_DIGEST_SIZE];
word32 oidMsgHashLen = 0;
- /* Check that the input hash length is valid. */
+ /* Check that the input hash length is valid (guards against caller-side
+ * buffer overruns before we touch hash). */
if ((int)hashLen != wc_HashGetDigestSize((enum wc_HashType)hashAlg)) {
ret = BAD_LENGTH_E;
}
+ /* FIPS 204 sec. 5.4 Table 4: enforce hash <-> paramSet matching. */
+ if (ret == 0) {
+ ret = mldsa_check_hash_for_level(hashAlg, key->level);
+ }
+
if (ret == 0) {
XMEMCPY(seedMu, seed, MLDSA_RND_SZ);
@@ -10140,12 +10241,17 @@ static int mldsa_verify_ctx_hash(wc_MlDsaKey* key, const byte* ctx,
if ((key == NULL) || (key->params == NULL)) {
ret = BAD_FUNC_ARG;
}
- /* Check that the input hash length is valid. */
+ /* Check that the input hash length is valid (guards against caller-side
+ * buffer overruns before we touch hash). */
if ((ret == 0) &&
((int)hashLen != wc_HashGetDigestSize((enum wc_HashType)hashAlg)))
{
ret = BAD_LENGTH_E;
}
+ /* FIPS 204 sec. 5.4 Table 4: enforce hash <-> paramSet matching. */
+ if (ret == 0) {
+ ret = mldsa_check_hash_for_level(hashAlg, key->level);
+ }
if (ret == 0) {
/* Step 6: Hash public key. */
diff --git a/wolfcrypt/src/wc_mlkem.c b/wolfcrypt/src/wc_mlkem.c
index eb96e9526b..f4ec2d39fa 100644
--- a/wolfcrypt/src/wc_mlkem.c
+++ b/wolfcrypt/src/wc_mlkem.c
@@ -83,6 +83,14 @@
#if FIPS_VERSION3_GE(2,0,0)
/* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
#define FIPS_NO_WRAPPERS
+
+ /* Windows orders the FIPS in-core integrity boundary by named linker
+ * sections. Keep ML-KEM (FIPS 203) code/const inside the boundary,
+ * sorted between sha3 (.fipsA$n) and fips.c (.fipsA$o). */
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$na")
+ #pragma const_seg(".fipsB$na")
+ #endif
#endif
#include
@@ -696,49 +704,11 @@ int wc_MlKemKey_MakeKey(MlKemKey* key, WC_RNG* rng)
ret = wc_MlKemKey_MakeKeyWithRandom(key, rand, sizeof(rand));
}
-#ifdef HAVE_FIPS
- /* Pairwise Consistency Test (PCT) per FIPS 140-3 / ISO 19790:2012
- * Section 7.10.3.3: encapsulate with ek, decapsulate with dk,
- * verify shared secrets match. */
- if (ret == 0) {
- WC_DECLARE_VAR(pct_ct, byte, WC_ML_KEM_MAX_CIPHER_TEXT_SIZE,
- key->heap);
- byte pct_ss1[WC_ML_KEM_SS_SZ];
- byte pct_ss2[WC_ML_KEM_SS_SZ];
- word32 ctSz = 0;
-
- WC_ALLOC_VAR_EX(pct_ct, byte, WC_ML_KEM_MAX_CIPHER_TEXT_SIZE,
- key->heap, DYNAMIC_TYPE_TMP_BUFFER, ret = MEMORY_E);
-
- if (ret == 0)
- ret = wc_MlKemKey_CipherTextSize(key, &ctSz);
-
- if (ret == 0)
- ret = wc_MlKemKey_Encapsulate(key, pct_ct, pct_ss1, rng);
-
- if (ret == 0)
- ret = wc_MlKemKey_Decapsulate(key, pct_ss2, pct_ct, ctSz);
-
- if (ret == 0) {
- if (XMEMCMP(pct_ss1, pct_ss2, WC_ML_KEM_SS_SZ) != 0)
- ret = ML_KEM_PCT_E;
- }
-
- ForceZero(pct_ss1, sizeof(pct_ss1));
- ForceZero(pct_ss2, sizeof(pct_ss2));
- if (WC_VAR_OK(pct_ct))
- ForceZero(pct_ct, WC_ML_KEM_MAX_CIPHER_TEXT_SIZE);
-
- WC_FREE_VAR_EX(pct_ct, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
-
- /* FIPS 140-3 IG 10.3.A (TE10.35.02): a key pair that fails the PCT
- * must be rendered unusable. Zeroize the generated key material so
- * a caller that ignores the return value cannot use it. */
- if (ret != 0) {
- wc_MlKemKey_Free(key);
- }
- }
-#endif /* HAVE_FIPS */
+ /* PCT now lives in wc_MlKemKey_MakeKeyWithRandom() (called above) so both
+ * the random-seeded path (here) and the caller-supplied-seed path exercise
+ * the FIPS 140-3 IG 10.3.A 1.B Pairwise Consistency Test.
+ * Audit A16-1: PCT previously lived only here, leaving the
+ * deterministic-seed entry uncovered. */
/* Ensure seeds are zeroized. */
ForceZero((void*)rand, (word32)sizeof(rand));
@@ -987,8 +957,67 @@ int wc_MlKemKey_MakeKeyWithRandom(MlKemKey* key, const unsigned char* rand,
ForceZero(e, (size_t)(k * MLKEM_N) * sizeof(sword16));
#endif
- /* Note: PCT is performed in wc_MlKemKey_MakeKey() which calls this
- * function and has the RNG parameter needed for encapsulation. */
+#ifdef HAVE_FIPS
+ /* Pairwise Consistency Test (PCT) per FIPS 140-3 IG 10.3.A 1.B and
+ * ISO/IEC 19790:2012 Section 7.10.3.3: encapsulate with the generated
+ * encapsulation key (ek), decapsulate with the matching decapsulation
+ * key (dk), and verify the recovered shared secret matches. This is a
+ * deterministic key-gen path with no caller RNG, so the PCT uses
+ * wc_MlKemKey_EncapsulateWithRandom() with a fixed 32-byte `m` (FIPS 203
+ * Algorithm 17 input); `m` need not be unpredictable for a PCT roundtrip.
+ *
+ * Audit A16-1: PCT previously lived only in wc_MlKemKey_MakeKey (which
+ * seeds `rand` from the DRBG), leaving this deterministic-seed entry
+ * without PCT coverage. */
+ if (ret == 0) {
+ WC_DECLARE_VAR(pct_ct, byte, WC_ML_KEM_MAX_CIPHER_TEXT_SIZE,
+ key->heap);
+ byte pct_ss1[WC_ML_KEM_SS_SZ];
+ byte pct_ss2[WC_ML_KEM_SS_SZ];
+ word32 pct_ctSz = 0;
+ /* Fixed 32-byte test pattern for FIPS 203 Alg 17 `m` parameter.
+ * Value is arbitrary - PCT only requires encap/decap roundtrip,
+ * not encap unpredictability. */
+ static const byte pct_m[WC_ML_KEM_ENC_RAND_SZ] = {
+ 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB,
+ 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB,
+ 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB,
+ 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB
+ };
+
+ WC_ALLOC_VAR_EX(pct_ct, byte, WC_ML_KEM_MAX_CIPHER_TEXT_SIZE,
+ key->heap, DYNAMIC_TYPE_TMP_BUFFER, ret = MEMORY_E);
+
+ if (ret == 0)
+ ret = wc_MlKemKey_CipherTextSize(key, &pct_ctSz);
+
+ if (ret == 0)
+ ret = wc_MlKemKey_EncapsulateWithRandom(key, pct_ct, pct_ss1,
+ pct_m, (int)sizeof(pct_m));
+
+ if (ret == 0)
+ ret = wc_MlKemKey_Decapsulate(key, pct_ss2, pct_ct, pct_ctSz);
+
+ if (ret == 0) {
+ if (XMEMCMP(pct_ss1, pct_ss2, WC_ML_KEM_SS_SZ) != 0)
+ ret = ML_KEM_PCT_E;
+ }
+
+ ForceZero(pct_ss1, sizeof(pct_ss1));
+ ForceZero(pct_ss2, sizeof(pct_ss2));
+ if (WC_VAR_OK(pct_ct))
+ ForceZero(pct_ct, WC_ML_KEM_MAX_CIPHER_TEXT_SIZE);
+
+ WC_FREE_VAR_EX(pct_ct, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+ /* FIPS 140-3 IG 10.3.A (TE10.35.02): a key pair that fails the PCT
+ * must be rendered unusable. Zeroize the generated key material so
+ * a caller that ignores the return value cannot use it. */
+ if (ret != 0) {
+ wc_MlKemKey_Free(key);
+ }
+ }
+#endif /* HAVE_FIPS */
return ret;
}
diff --git a/wolfcrypt/src/wc_mlkem_poly.c b/wolfcrypt/src/wc_mlkem_poly.c
index cd067b46e7..8ed957f77d 100644
--- a/wolfcrypt/src/wc_mlkem_poly.c
+++ b/wolfcrypt/src/wc_mlkem_poly.c
@@ -71,6 +71,15 @@
#include
+#if FIPS_VERSION3_GE(2,0,0)
+ /* Keep this ML-KEM (FIPS 203) implementation's code/const inside the FIPS
+ * in-core integrity boundary (Windows orders it by named sections). */
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$nb")
+ #pragma const_seg(".fipsB$nb")
+ #endif
+#endif
+
#ifdef WC_MLKEM_NO_ASM
#undef USE_INTEL_SPEEDUP
#undef WOLFSSL_ARMASM
diff --git a/wolfcrypt/src/wc_slhdsa.c b/wolfcrypt/src/wc_slhdsa.c
index 4d24d5ff96..02de84d109 100644
--- a/wolfcrypt/src/wc_slhdsa.c
+++ b/wolfcrypt/src/wc_slhdsa.c
@@ -26,6 +26,14 @@
#if FIPS_VERSION3_GE(2,0,0)
/* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
#define FIPS_NO_WRAPPERS
+
+ /* Windows orders the FIPS in-core integrity boundary by named linker
+ * sections. Keep SLH-DSA (FIPS 205) code/const inside the boundary,
+ * sorted between sha3 (.fipsA$n) and fips.c (.fipsA$o). */
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$nh")
+ #pragma const_seg(".fipsB$nh")
+ #endif
#endif
#include
@@ -3286,6 +3294,9 @@ static int slhdsakey_wots_pkgen_chain_c(SlhDsaKey* key, const byte* sk_seed,
if (ret == 0)
XMEMSET(sk, 0, (SLHDSA_MAX_MSG_SZ + 3) * SLHDSA_MAX_N);
if (ret == 0) {
+ /* Zero the WOTS+ leaf buffer up front: defensive clearing of secret
+ * key material that also avoids a -Wmaybe-uninitialized read of sk. */
+ XMEMSET(sk, 0, (SLHDSA_MAX_MSG_SZ + 3) * SLHDSA_MAX_N);
/* Step 4. len consecutive addresses. */
for (i = 0; i < len; i++) {
/* Step 5. Set chain address for WOTS PRF. */
@@ -7006,6 +7017,46 @@ int wc_SlhDsaKey_MakeKey(SlhDsaKey* key, WC_RNG* rng)
key->sk + 2 * n, n);
}
+#ifdef HAVE_FIPS
+ /* Pairwise Consistency Test (PCT) per FIPS 140-3 IG 10.3.A (TE10.35.02):
+ * sign with the new sk, verify with the matching pk. SLH-DSA (FIPS 205)
+ * is stateless, so the relaxed PCT rule for stateful HBS (LMS/XMSS) does
+ * not apply -- PCT runs on every KeyGen. SignDeterministic avoids
+ * consuming RNG state; sigLen is heap-allocated for this exact variant
+ * (~8 KB to 50 KB across SLH-DSA variants). */
+ if (ret == 0) {
+ static const byte pct_msg[] = "wolfSSL SLH-DSA PCT";
+ word32 pct_sigLen = key->params->sigLen;
+ byte* pct_sig = (byte*)XMALLOC(pct_sigLen, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ word32 pct_sigSz = pct_sigLen;
+
+ if (pct_sig == NULL) {
+ ret = MEMORY_E;
+ }
+ if (ret == 0) {
+ ret = wc_SlhDsaKey_SignDeterministic(key, NULL, 0,
+ pct_msg, sizeof(pct_msg), pct_sig, &pct_sigSz);
+ }
+ if (ret == 0) {
+ ret = wc_SlhDsaKey_Verify(key, NULL, 0,
+ pct_msg, sizeof(pct_msg), pct_sig, pct_sigSz);
+ if (ret != 0) {
+ ret = SLH_DSA_PCT_E;
+ }
+ }
+ if (pct_sig != NULL) {
+ ForceZero(pct_sig, pct_sigLen);
+ XFREE(pct_sig, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+ /* IG 10.3.A (TE10.35.02): a key pair that fails the PCT must be
+ * rendered unusable. */
+ if (ret != 0) {
+ wc_SlhDsaKey_Free(key);
+ }
+ }
+#endif /* HAVE_FIPS */
+
return ret;
}
@@ -7952,6 +8003,97 @@ static const byte slhdsakey_oid_sha3_512[] = {
#endif
#endif
+/* HashSLH-DSA PH-vs-paramSet enforcement.
+ *
+ * FIPS 205 sec. 10.2.2 (Table 9): the pre-hash PH must have collision-
+ * resistance >= the paramSet security level (key->params->n in bytes):
+ * n = 16 (128-bit): SHA2-256, SHA2-384, SHA2-512, SHA2-512/256,
+ * SHA3-256, SHA3-384, SHA3-512,
+ * SHAKE-128, SHAKE-256
+ * n = 24 (192-bit): SHA2-384, SHA2-512, SHA3-384, SHA3-512, SHAKE-256
+ * n = 32 (256-bit): SHA2-512, SHA3-512, SHAKE-256
+ *
+ * Returns 0 if approved, else BAD_FUNC_ARG (including any hash off the list,
+ * e.g. SHA-224, SHA-512/224, SHA3-224).
+ */
+static int slhdsa_check_hash_for_n(enum wc_HashType hashType, byte n)
+{
+ int strengthBits;
+ int requiredBits;
+
+ switch ((int)hashType) {
+ #ifndef NO_SHA256
+ case WC_HASH_TYPE_SHA256:
+ strengthBits = 128;
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA384
+ case WC_HASH_TYPE_SHA384:
+ strengthBits = 192;
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA512
+ case WC_HASH_TYPE_SHA512:
+ strengthBits = 256;
+ break;
+ #ifndef WOLFSSL_NOSHA512_256
+ case WC_HASH_TYPE_SHA512_256:
+ /* SHA-512/256 has 128-bit collision resistance (truncated). */
+ strengthBits = 128;
+ break;
+ #endif
+ #endif
+ #ifdef WOLFSSL_SHA3
+ #ifndef WOLFSSL_NOSHA3_256
+ case WC_HASH_TYPE_SHA3_256:
+ strengthBits = 128;
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_384
+ case WC_HASH_TYPE_SHA3_384:
+ strengthBits = 192;
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_512
+ case WC_HASH_TYPE_SHA3_512:
+ strengthBits = 256;
+ break;
+ #endif
+ #endif
+ #ifdef WOLFSSL_SHAKE128
+ case WC_HASH_TYPE_SHAKE128:
+ strengthBits = 128;
+ break;
+ #endif
+ #ifdef WOLFSSL_SHAKE256
+ case WC_HASH_TYPE_SHAKE256:
+ strengthBits = 256;
+ break;
+ #endif
+ default:
+ /* Hash not on the FIPS 205 Table 9 approved list. */
+ return BAD_FUNC_ARG;
+ }
+
+ if (n == WC_SLHDSA_N_128) {
+ requiredBits = 128;
+ }
+ else if (n == WC_SLHDSA_N_192) {
+ requiredBits = 192;
+ }
+ else if (n == WC_SLHDSA_N_256) {
+ requiredBits = 256;
+ }
+ else {
+ return BAD_FUNC_ARG;
+ }
+
+ if (strengthBits < requiredBits) {
+ return BAD_FUNC_ARG;
+ }
+ return 0;
+}
+
/* Validate the caller-supplied pre-hashed digest length and look up the
* corresponding OID for the chosen hash algorithm.
*
@@ -8169,6 +8311,14 @@ static int slhdsakey_signhash_external(SlhDsaKey* key, const byte* ctx,
(sigSz == NULL)) {
ret = BAD_FUNC_ARG;
}
+ /* HashSLH-DSA requires an explicit, approved pre-hash algorithm.
+ * WC_HASH_TYPE_NONE (the "pure SLH-DSA" sentinel) is never valid here;
+ * reject it explicitly (FIPS 205 Section 10.2.2 / Table 9) rather than
+ * via the slhdsa_check_hash_for_n() switch default, so it survives any
+ * future reorder of the validators or a NONE case added to that switch. */
+ else if (hashType == WC_HASH_TYPE_NONE) {
+ ret = BAD_FUNC_ARG;
+ }
/* Check sig buffer is large enough to hold generated signature. */
else if (*sigSz < key->params->sigLen) {
ret = BAD_LENGTH_E;
@@ -8178,6 +8328,12 @@ static int slhdsakey_signhash_external(SlhDsaKey* key, const byte* ctx,
/* Alg 23, Step 6: Return error. */
ret = BAD_FUNC_ARG;
}
+ /* FIPS 205 sec. 10.2.2 Table 9: enforce PH <-> paramSet matching before
+ * pre-hashing the message. Rejects PHs whose collision-resistance
+ * strength is below the paramSet's security level (n). */
+ if (ret == 0) {
+ ret = slhdsa_check_hash_for_n(hashType, key->params->n);
+ }
if (ret == 0) {
/* Alg 23, Steps 8-23: Validate caller-supplied pre-hashed digest length
* and select OID for the chosen hash algorithm. */
@@ -8412,8 +8568,11 @@ int wc_SlhDsaKey_SignHash(SlhDsaKey* key, const byte* ctx, byte ctxSz,
ret = MISSING_KEY;
}
/* First sanity check on hashType; the downstream prehash validator does
- * the detailed check for the actual type. */
- else if ((word32)hashType > (word32)WC_HASH_TYPE_MAX) {
+ * the detailed check. Reject WC_HASH_TYPE_NONE here too -- never a valid
+ * pre-hash (FIPS 205 Section 10.2.2 / Table 9) -- so a known-invalid call
+ * fails before consuming DRBG output below. */
+ else if ((hashType == WC_HASH_TYPE_NONE) ||
+ ((word32)hashType > (word32)WC_HASH_TYPE_MAX)) {
ret = BAD_FUNC_ARG;
}
@@ -8542,6 +8701,12 @@ int wc_SlhDsaKey_VerifyHash(SlhDsaKey* key, const byte* ctx, byte ctxSz,
}
#ifdef WOLF_CRYPTO_CB
+ /* FIPS 205 sec. 10.2.2 Table 9: enforce PH <-> paramSet matching.
+ * Rejects PHs whose collision-resistance strength is below the
+ * paramSet's security level (n). */
+ if (ret == 0) {
+ ret = slhdsa_check_hash_for_n(hashType, key->params->n);
+ }
if (ret == 0) {
#ifndef WOLF_CRYPTO_CB_FIND
if (key->devId != INVALID_DEVID)
diff --git a/wolfcrypt/src/wc_xmss.c b/wolfcrypt/src/wc_xmss.c
index b4f4c76185..3f0105030c 100644
--- a/wolfcrypt/src/wc_xmss.c
+++ b/wolfcrypt/src/wc_xmss.c
@@ -28,6 +28,14 @@
#if FIPS_VERSION3_GE(2,0,0)
/* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
#define FIPS_NO_WRAPPERS
+
+ /* Windows orders the FIPS in-core integrity boundary by named linker
+ * sections. Keep XMSS (SP 800-208) code/const inside the boundary,
+ * sorted between sha3 (.fipsA$n) and fips.c (.fipsA$o). */
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$nf")
+ #pragma const_seg(".fipsB$nf")
+ #endif
#endif
#include
#include
diff --git a/wolfcrypt/src/wc_xmss_impl.c b/wolfcrypt/src/wc_xmss_impl.c
index 9029fca4a0..135fc40111 100644
--- a/wolfcrypt/src/wc_xmss_impl.c
+++ b/wolfcrypt/src/wc_xmss_impl.c
@@ -33,6 +33,15 @@
#include
+#if FIPS_VERSION3_GE(2,0,0)
+ /* Keep this XMSS (SP 800-208) implementation's code/const inside the FIPS
+ * in-core integrity boundary (Windows orders it by named sections). */
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$ng")
+ #pragma const_seg(".fipsB$ng")
+ #endif
+#endif
+
#include
#include
diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c
index 6e574e3b31..7ca294cff7 100644
--- a/wolfcrypt/test/test.c
+++ b/wolfcrypt/test/test.c
@@ -137,7 +137,7 @@ static const byte const_byte_array[] = "A+Gd\0\0\0";
esp_start_heap = esp_this_heap; \
} \
ESP_LOGI(ESPIDF_TAG, "%s #%d; Heap free: %d", \
- ((b) ? (b) : ""), /* breadcumb string */ \
+ ((b) ? (b) : ""), /* breadcrumb string */ \
((i) ? (i) : 0), /* index */ \
esp_this_heap);
@@ -56056,6 +56056,132 @@ static wc_test_ret_t mldsa_param_test(int param, WC_RNG* rng)
#endif
return ret;
}
+
+#if !defined(WOLFSSL_DILITHIUM_NO_SIGN) && \
+ !defined(WOLFSSL_DILITHIUM_NO_VERIFY)
+/* Negative test: HashML-DSA must reject a pre-hash whose collision resistance
+ * is below the parameter set's claimed security strength (FIPS 204 sec. 5.4,
+ * Table 4: approved PH per level). Targets here:
+ * ML-DSA-65 (192-bit): SHA-256 (128-bit) disallowed.
+ * ML-DSA-87 (256-bit): SHA-384 (192-bit) disallowed.
+ * Asserts sigGen and sigVer both reject (non-zero). Fails until the in-module
+ * hash-vs-paramSet check exists in wc_dilithium_{sign,verify}_ctx_hash. */
+static wc_test_ret_t mldsa_hash_paramset_rejection_test(WC_RNG* rng)
+{
+ wc_test_ret_t ret = 0;
+ int i;
+#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC)
+ dilithium_key* key = NULL;
+ byte* sig = NULL;
+#else
+ dilithium_key key[1];
+ byte sig[DILITHIUM_MAX_SIG_SIZE];
+#endif
+ word32 sigLen;
+ int verified;
+
+ /* Fixed-content digests; only the (paramSet, hashAlg, hashLen) tuple
+ * matters for rejection. Sizes match each digest length so the length
+ * sanity check in wc_dilithium_*_ctx_hash() does not short-circuit before
+ * the hash-vs-paramSet gate under test. */
+ static const byte hash32[32] = { /* SHA-256 digest size */
+ 0xBA,0x78,0x16,0xBF,0x8F,0x01,0xCF,0xEA,
+ 0x41,0x41,0x40,0xDE,0x5D,0xAE,0x22,0x23,
+ 0xB0,0x03,0x61,0xA3,0x96,0x17,0x7A,0x9C,
+ 0xB4,0x10,0xFF,0x61,0xF2,0x00,0x15,0xAD
+ };
+ static const byte hash48[48] = { /* SHA-384 digest size */
+ 0xCB,0x00,0x75,0x3F,0x45,0xA3,0x5E,0x8B,
+ 0xB5,0xA0,0x3D,0x69,0x9A,0xC6,0x50,0x07,
+ 0x27,0x2C,0x32,0xAB,0x0E,0xDE,0xD1,0x63,
+ 0x1A,0x8B,0x60,0x5A,0x43,0xFF,0x5B,0xED,
+ 0x80,0x86,0x07,0x2B,0xA1,0xE7,0xCC,0x23,
+ 0x58,0xBA,0xEC,0xA1,0x34,0xC8,0x25,0xA7
+ };
+
+ struct {
+ int level;
+ int hashAlg;
+ const byte* hash;
+ word32 hashLen;
+ } forbidden[] = {
+ /* ML-DSA-65 needs >=192-bit collision strength; SHA-256 = 128-bit. */
+ { WC_ML_DSA_65, WC_HASH_TYPE_SHA256, hash32, 32 },
+ /* ML-DSA-87 needs >=256-bit collision strength; SHA-384 = 192-bit. */
+ { WC_ML_DSA_87, WC_HASH_TYPE_SHA384, hash48, 48 }
+ };
+
+#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC)
+ key = (dilithium_key*)XMALLOC(sizeof(*key), HEAP_HINT,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ sig = (byte*)XMALLOC(DILITHIUM_MAX_SIG_SIZE, HEAP_HINT,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if ((key == NULL) || (sig == NULL)) {
+ ERROR_OUT(WC_TEST_RET_ENC_ERRNO, neg_out);
+ }
+#endif
+ XMEMSET(sig, 0, DILITHIUM_MAX_SIG_SIZE);
+
+ for (i = 0; i < (int)(sizeof(forbidden) / sizeof(forbidden[0])); i++) {
+ #ifdef WOLFSSL_NO_ML_DSA_65
+ if (forbidden[i].level == WC_ML_DSA_65) continue;
+ #endif
+ #ifdef WOLFSSL_NO_ML_DSA_87
+ if (forbidden[i].level == WC_ML_DSA_87) continue;
+ #endif
+
+ ret = wc_dilithium_init_ex(key, NULL, devId);
+ if (ret != 0) {
+ ERROR_OUT(WC_TEST_RET_ENC_EC(ret), neg_out);
+ }
+ ret = wc_dilithium_set_level(key, (byte)forbidden[i].level);
+ if (ret != 0) {
+ wc_dilithium_free(key);
+ ERROR_OUT(WC_TEST_RET_ENC_EC(ret), neg_out);
+ }
+ ret = wc_dilithium_make_key(key, rng);
+ if (ret != 0) {
+ wc_dilithium_free(key);
+ ERROR_OUT(WC_TEST_RET_ENC_EC(ret), neg_out);
+ }
+
+ sigLen = (word32)wc_dilithium_sig_size(key);
+
+ /* sigGen with disallowed PH must be REJECTED. */
+ PRIVATE_KEY_UNLOCK();
+ ret = wc_dilithium_sign_ctx_hash(NULL, 0, forbidden[i].hashAlg,
+ forbidden[i].hash, forbidden[i].hashLen, sig, &sigLen, key, rng);
+ PRIVATE_KEY_LOCK();
+ if (ret == 0) {
+ /* Module did NOT reject -- this is the missing-enforcement bug. */
+ wc_dilithium_free(key);
+ ERROR_OUT(WC_TEST_RET_ENC_NC, neg_out);
+ }
+
+ /* sigVer with disallowed PH must ALSO be REJECTED. */
+ verified = -1;
+ sigLen = (word32)wc_dilithium_sig_size(key);
+ ret = wc_dilithium_verify_ctx_hash(sig, sigLen, NULL, 0,
+ forbidden[i].hashAlg, forbidden[i].hash, forbidden[i].hashLen,
+ &verified, key);
+ if (ret == 0) {
+ wc_dilithium_free(key);
+ ERROR_OUT(WC_TEST_RET_ENC_NC, neg_out);
+ }
+
+ wc_dilithium_free(key);
+ ret = 0;
+ }
+
+neg_out:
+#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC)
+ if (sig != NULL) XFREE(sig, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (key != NULL) XFREE(key, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return ret;
+}
+#endif /* !WOLFSSL_DILITHIUM_NO_SIGN && !WOLFSSL_DILITHIUM_NO_VERIFY */
+
#endif
#if defined(WC_MLDSA_CACHE_MATRIX_A) && \
@@ -56501,6 +56627,18 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t mldsa_test(void)
#endif /* (WOLFSSL_MLDSA_PUBLIC_KEY && !WOLFSSL_MLDSA_NO_VERIFY) ||
* (WOLFSSL_MLDSA_PRIVATE_KEY && !WOLFSSL_MLDSA_NO_SIGN) */
+#if !defined(WOLFSSL_MLDSA_NO_MAKE_KEY) && \
+ !defined(WOLFSSL_MLDSA_NO_SIGN) && \
+ !defined(WOLFSSL_MLDSA_NO_VERIFY) && \
+ (!defined(WOLFSSL_NO_ML_DSA_65) || !defined(WOLFSSL_NO_ML_DSA_87))
+ /* FIPS 204 sec. 5.4 -- HashML-DSA must reject pre-hashes weaker than
+ * the parameter set's security level. */
+ ret = mldsa_hash_paramset_rejection_test(&rng);
+ if (ret != 0) {
+ ERROR_OUT(ret, out);
+ }
+#endif
+
#if !defined(WOLFSSL_MLDSA_NO_MAKE_KEY) || \
!defined(WOLFSSL_MLDSA_NO_VERIFY) || \
defined(WOLFSSL_MLDSA_PRIVATE_KEY) || \
@@ -57946,29 +58084,17 @@ static wc_test_ret_t slhdsa_test_param(enum SlhDsaParam param)
ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out);
}
- /* HashSLH-DSA takes the caller's pre-hashed digest as input. */
+ /* HashSLH-DSA takes the caller's pre-hashed digest as input. SHAKE-256 is
+ * approved for all SLH-DSA-{128,192,256} variants (FIPS 205 sec. 10.2.2
+ * Table 9), so use it unconditionally for the positive round-trip -- it
+ * never trips the hash-vs-paramSet gate for higher-security paramSets. */
{
-#ifdef WOLFSSL_SLHDSA_SHA2
- enum wc_HashType phType = SLHDSA_IS_SHA2(param) ?
- WC_HASH_TYPE_SHA256 : WC_HASH_TYPE_SHAKE256;
-#else
enum wc_HashType phType = WC_HASH_TYPE_SHAKE256;
-#endif
byte digest[WC_SHA3_512_DIGEST_SIZE];
- word32 digestLen;
+ word32 digestLen = WC_SHA3_512_DIGEST_SIZE;
-#ifdef WOLFSSL_SLHDSA_SHA2
- if (phType == WC_HASH_TYPE_SHA256) {
- ret = wc_Sha256Hash(msg, (word32)sizeof(msg), digest);
- digestLen = WC_SHA256_DIGEST_SIZE;
- }
- else
-#endif
- {
- ret = wc_Shake256Hash(msg, (word32)sizeof(msg), digest,
- WC_SHA3_512_DIGEST_SIZE);
- digestLen = WC_SHA3_512_DIGEST_SIZE;
- }
+ ret = wc_Shake256Hash(msg, (word32)sizeof(msg), digest,
+ WC_SHA3_512_DIGEST_SIZE);
if (ret != 0) {
ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out);
}
@@ -57987,9 +58113,11 @@ static wc_test_ret_t slhdsa_test_param(enum SlhDsaParam param)
ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out);
}
- /* Additional pre-hash test: SHA-384 exercises a different OID path */
+ /* Additional pre-hash test: SHA-384 exercises a different OID path. Skip
+ * for SLH-DSA-256 -- SHA-384 (192-bit collision) is below its 256-bit
+ * security level (FIPS 205 sec. 10.2.2 Table 9). */
#ifdef WOLFSSL_SHA384
- {
+ if (key->params->n != WC_SLHDSA_N_256) {
byte digest384[WC_SHA384_DIGEST_SIZE];
ret = wc_Sha384Hash(msg, (word32)sizeof(msg), digest384);
@@ -58049,6 +58177,87 @@ static wc_test_ret_t slhdsa_test_param(enum SlhDsaParam param)
return ret;
}
+
+/* Negative test: HashSLH-DSA must reject a pre-hash whose collision resistance
+ * is below the parameter set's claimed security strength (FIPS 205 sec.
+ * 10.2.2, Table 9: approved PH per level). Here a 192/256-bit paramSet is
+ * given SHA-256 (128-bit, approved only for 128-bit paramSets). Asserts
+ * sigGen and sigVer both reject. Fails until the in-module hash-vs-paramSet
+ * check exists in wc_SlhDsaKey_{Sign,Verify}Hash. */
+static wc_test_ret_t slhdsa_hash_paramset_rejection_test(enum SlhDsaParam param)
+{
+ int ret = 0;
+ WC_RNG rng;
+ SlhDsaKey key[1];
+ byte sig[WC_SLHDSA_MAX_SIG_LEN];
+ word32 sigLen;
+ static const byte msg[] = {
+ 0x48,0x65,0x6c,0x6c,0x6f,0x20,0x57,0x6f,
+ 0x72,0x6c,0x64,0x21
+ };
+ byte ctx[1];
+ /* SHA-256 (128-bit collision) is approved only for 128-bit paramSets, so
+ * any 192/256-bit paramSet must reject it. */
+ enum wc_HashType badHash = WC_HASH_TYPE_SHA256;
+
+ XMEMSET(&key, 0, sizeof(key));
+
+#ifndef HAVE_FIPS
+ ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
+ ret = wc_InitRng(&rng);
+#endif
+ if (ret != 0) return WC_TEST_RET_ENC_EC(ret);
+
+ ret = wc_SlhDsaKey_Init(key, param, NULL, INVALID_DEVID);
+ if (ret != 0) {
+ wc_FreeRng(&rng);
+ return WC_TEST_RET_ENC_EC(ret);
+ }
+
+ ret = wc_SlhDsaKey_MakeKey(key, &rng);
+ if (ret != 0) {
+ wc_SlhDsaKey_Free(key);
+ wc_FreeRng(&rng);
+ return WC_TEST_RET_ENC_EC(ret);
+ }
+
+ /* Only enforce on paramSets above 128-bit security; SHA-256 is approved
+ * for 128-bit so wouldn't be a rejection target there. */
+ if (key->params->n == WC_SLHDSA_N_128) {
+ wc_SlhDsaKey_Free(key);
+ wc_FreeRng(&rng);
+ return 0;
+ }
+
+ /* sigGen with too-weak PH must be REJECTED. */
+ sigLen = WC_SLHDSA_MAX_SIG_LEN;
+ PRIVATE_KEY_UNLOCK();
+ ret = wc_SlhDsaKey_SignHash(key, ctx, 0, msg, (word32)sizeof(msg),
+ badHash, sig, &sigLen, &rng);
+ PRIVATE_KEY_LOCK();
+ if (ret == 0) {
+ /* Module did NOT reject -- this is the missing-enforcement bug. */
+ wc_SlhDsaKey_Free(key);
+ wc_FreeRng(&rng);
+ return WC_TEST_RET_ENC_NC;
+ }
+
+ /* sigVer with too-weak PH must ALSO be REJECTED. */
+ sigLen = WC_SLHDSA_MAX_SIG_LEN;
+ XMEMSET(sig, 0, sigLen);
+ ret = wc_SlhDsaKey_VerifyHash(key, ctx, 0, msg, (word32)sizeof(msg),
+ badHash, sig, sigLen);
+ if (ret == 0) {
+ wc_SlhDsaKey_Free(key);
+ wc_FreeRng(&rng);
+ return WC_TEST_RET_ENC_NC;
+ }
+
+ wc_SlhDsaKey_Free(key);
+ wc_FreeRng(&rng);
+ return 0;
+}
#endif
/* True iff slhdsa_test() actually emits at least one `goto out;` /
@@ -60042,6 +60251,39 @@ wc_test_ret_t slhdsa_test(void)
}
#endif
+ /* FIPS 205 sec. 10.2.2 -- HashSLH-DSA must reject pre-hashes below the
+ * paramSet's security level. Use any available 192/256-bit paramSet;
+ * 128-bit paramSets allow SHA-256 so are not useful targets here. */
+#ifdef WOLFSSL_SLHDSA_PARAM_192S
+ ret = slhdsa_hash_paramset_rejection_test(SLHDSA_SHAKE192S);
+ if (ret != 0) {
+ wc_test_render_error_message("SLHDSA_SHAKE192S (hash-paramset reject)",
+ 0);
+ goto out;
+ }
+#elif defined(WOLFSSL_SLHDSA_PARAM_256S)
+ ret = slhdsa_hash_paramset_rejection_test(SLHDSA_SHAKE256S);
+ if (ret != 0) {
+ wc_test_render_error_message("SLHDSA_SHAKE256S (hash-paramset reject)",
+ 0);
+ goto out;
+ }
+#elif defined(WOLFSSL_SLHDSA_PARAM_SHA2_192S)
+ ret = slhdsa_hash_paramset_rejection_test(SLHDSA_SHA2_192S);
+ if (ret != 0) {
+ wc_test_render_error_message("SLHDSA_SHA2_192S (hash-paramset reject)",
+ 0);
+ goto out;
+ }
+#elif defined(WOLFSSL_SLHDSA_PARAM_SHA2_256S)
+ ret = slhdsa_hash_paramset_rejection_test(SLHDSA_SHA2_256S);
+ if (ret != 0) {
+ wc_test_render_error_message("SLHDSA_SHA2_256S (hash-paramset reject)",
+ 0);
+ goto out;
+ }
+#endif
+
#endif /* !WOLFSSL_SLHDSA_VERIFY_ONLY */
#if defined(WOLF_PRIVATE_KEY_ID) && \
diff --git a/wolfssl/wolfcrypt/aes.h b/wolfssl/wolfcrypt/aes.h
index c0d6a789bf..e5f434a0e8 100644
--- a/wolfssl/wolfcrypt/aes.h
+++ b/wolfssl/wolfcrypt/aes.h
@@ -66,8 +66,13 @@ typedef struct Gcm {
#endif
WOLFSSL_LOCAL void GenerateM0(Gcm* gcm);
+/* This two-byte-pointer GMULT is the GCM_SMALL form; GCM_TABLE/
+ * GCM_TABLE_4BIT use a static GMULT taking byte m[N][16]. Scope to GCM_SMALL
+ * so it doesn't clash with the table-mode GMULT on 32-bit ARM armasm +
+ * WOLFSSL_AESGCM_STREAM (streaming path now uses the software table GHASH).
+ * See SP 800-38D AES-GCM GHASH. */
#if !defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \
- !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
+ !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) && defined(GCM_SMALL)
WOLFSSL_LOCAL void GMULT(byte* X, byte* Y);
#endif
WOLFSSL_LOCAL void WC_ARG_NOT_NULL(1) GHASH(Gcm* gcm, const byte* a,
diff --git a/wolfssl/wolfcrypt/error-crypt.h b/wolfssl/wolfcrypt/error-crypt.h
index 5b089f118b..286a8739f4 100644
--- a/wolfssl/wolfcrypt/error-crypt.h
+++ b/wolfssl/wolfcrypt/error-crypt.h
@@ -327,9 +327,17 @@ enum wolfCrypt_ErrorCodes {
ML_DSA_PCT_E = -1016, /* ML-DSA Pairwise Consistency Test failure */
DRBG_SHA512_KAT_FIPS_E = -1017, /* SHA-512 DRBG KAT failure */
SLH_DSA_KAT_FIPS_E = -1018, /* SLH-DSA CAST KAT failure */
-
- WC_SPAN2_LAST_E = -1018, /* Update to indicate last used error code */
- WC_LAST_E = -1018, /* the last code used either here or in
+ SLH_DSA_PCT_E = -1019, /* SLH-DSA Pairwise Consistency Test failure */
+ CMAC_KAT_FIPS_E = -1020, /* AES-CMAC KAT failure (vendor-elected) */
+ SHAKE_KAT_FIPS_E = -1021, /* SHAKE KAT failure (vendor-elected) */
+ DH_PCT_E = -1022, /* DH (FFC) Pairwise Consistency Test
+ * failure (SP 800-56A r3 sec 5.6.2.1.4,
+ * FIPS 140-3 IG 10.3.B) */
+ AES_KW_KAT_FIPS_E = -1023, /* AES-KW KAT failure (vendor-elected,
+ * SP 800-38F sec 6.2 / RFC 3394) */
+
+ WC_SPAN2_LAST_E = -1023, /* Update to indicate last used error code */
+ WC_LAST_E = -1023, /* the last code used either here or in
* error-ssl.h */
WC_SPAN2_MIN_CODE_E = -1999, /* Last usable code in span 2 */
diff --git a/wolfssl/wolfcrypt/fips_test.h b/wolfssl/wolfcrypt/fips_test.h
index de2b506df2..38d65af5a3 100644
--- a/wolfssl/wolfcrypt/fips_test.h
+++ b/wolfssl/wolfcrypt/fips_test.h
@@ -31,8 +31,22 @@
extern "C" {
#endif
-/* Added for FIPS v5.3 or later */
-#if defined(FIPS_VERSION_GE) && FIPS_VERSION_GE(5,3)
+/* Added for FIPS v5.3 or later.
+ *
+ * v7.0.0+ upgrades the in-core integrity HMAC to SHA-512 (512-bit key) for
+ * NSA 2.0 compliance, leaving no SHA-256 integrity material in the module.
+ * v5.3 and v6.x retain HMAC-SHA-256.
+ */
+#if defined(FIPS_VERSION_GE) && FIPS_VERSION_GE(7,0)
+ #ifdef WOLFSSL_SHA512
+ #define FIPS_IN_CORE_DIGEST_SIZE 64
+ #define FIPS_IN_CORE_HASH_TYPE WC_SHA512
+ #define FIPS_IN_CORE_KEY_SZ 64
+ #define FIPS_IN_CORE_VERIFY_SZ FIPS_IN_CORE_KEY_SZ
+ #else
+ #error FIPS v7+ integrity test requires WOLFSSL_SHA512
+ #endif
+#elif defined(FIPS_VERSION_GE) && FIPS_VERSION_GE(5,3)
/* Determine FIPS in core hash type and size */
#ifndef NO_SHA256
#define FIPS_IN_CORE_DIGEST_SIZE 32
@@ -62,7 +76,10 @@ enum FipsCastId {
FIPS_CAST_RSA_SIGN_PKCS1v15 = 7,
FIPS_CAST_ECC_CDH = 8,
FIPS_CAST_ECC_PRIMITIVE_Z = 9,
- FIPS_CAST_DH_PRIMITIVE_Z = 10,
+ FIPS_CAST_DH_PRIMITIVE_Z = 10, /* RETIRED (v7+): classic DH dropped
+ * from the FIPS 140-3 v7 PQ module
+ * boundary. Kept for ABI; do not
+ * reuse this id. */
FIPS_CAST_ECDSA = 11,
FIPS_CAST_KDF_TLS12 = 12,
FIPS_CAST_KDF_TLS13 = 13,
@@ -80,7 +97,10 @@ enum FipsCastId {
FIPS_CAST_XMSS = 23,
FIPS_CAST_DRBG_SHA512 = 24,
FIPS_CAST_SLH_DSA = 25,
- FIPS_CAST_COUNT = 26
+ FIPS_CAST_AES_CMAC = 26,
+ FIPS_CAST_SHAKE = 27,
+ FIPS_CAST_AES_KW = 28,
+ FIPS_CAST_COUNT = 29
};
enum FipsCastStateId {
diff --git a/wolfssl/wolfcrypt/random.h b/wolfssl/wolfcrypt/random.h
index 102f05d6b5..339c9f6fa1 100644
--- a/wolfssl/wolfcrypt/random.h
+++ b/wolfssl/wolfcrypt/random.h
@@ -57,8 +57,12 @@
#define DRBG_SEED_LEN (440/8)
#endif
+/* Size of the DRBG seed (SHA-512) */
#ifdef WOLFSSL_DRBG_SHA512
- #define DRBG_SHA512_SEED_LEN (888/8) /* 111 bytes per SP 800-90A Table 2 */
+ #ifndef DRBG_SHA512_SEED_LEN
+ #define DRBG_SHA512_SEED_LEN (888/8) /* 111 bytes per SP 800-90A
+ * Table 2 */
+ #endif
#endif
@@ -212,12 +216,16 @@ struct OS_Seed {
*/
#define ENTROPY_SCALE_FACTOR (512)
#elif defined(HAVE_INTEL_RDSEED) || defined(HAVE_INTEL_RDRAND)
- /* The value of 2 applies to Intel's RDSEED which provides about
- * 0.5 bits minimum of entropy per bit. The value of 4 gives a
- * conservative margin for FIPS. */
+ /* Intel RDSEED nominally provides ~0.5 bits min entropy per bit
+ * (NIST CMVP cert3389 PUD). As of v7, FIPS mode uses scale=512 on
+ * Intel too (was 8), matching the AMD worst case: AMD "Tyzen
+ * V1xxxx" PUD Table 3 documents 0.656040 bits per 128-bit block as
+ * the floor across the CMVP-validated AMD family. One worst-case
+ * seeding budget then covers any x86 OE. Non-FIPS Intel keeps the
+ * lighter scale=2 (Intel-PUD-derived) for performance. */
#if defined(HAVE_FIPS) && defined(HAVE_FIPS_VERSION) && \
(HAVE_FIPS_VERSION >= 2)
- #define ENTROPY_SCALE_FACTOR (2*4)
+ #define ENTROPY_SCALE_FACTOR (512)
#else
/* Not FIPS, but Intel RDSEED, only double. */
#define ENTROPY_SCALE_FACTOR (2)
diff --git a/wolfssl/wolfcrypt/settings.h b/wolfssl/wolfcrypt/settings.h
index e3a3b884a4..eee8855814 100644
--- a/wolfssl/wolfcrypt/settings.h
+++ b/wolfssl/wolfcrypt/settings.h
@@ -557,6 +557,15 @@
#endif
/* blinding adds API not available yet in FIPS mode */
#undef WC_RSA_BLINDING
+
+ /* NIST SP 800-38A sec 6.2: CBC requires plaintext a multiple of the
+ * block size, and the cipher does not pad (project_aes_no_padding_policy).
+ * Force the wc_AesCbcEncrypt / wc_AesCbcDecrypt block-alignment check
+ * so a length not a multiple of WC_AES_BLOCK_SIZE returns BAD_LENGTH_E
+ * instead of silently truncating to the largest aligned prefix. */
+ #ifndef WOLFSSL_AES_CBC_LENGTH_CHECKS
+ #define WOLFSSL_AES_CBC_LENGTH_CHECKS
+ #endif
#endif
/* old FIPS has only AES_BLOCK_SIZE. */