From 6a9d5f14118795a0c079374ade7722448581f6a0 Mon Sep 17 00:00:00 2001 From: kaleb-himes Date: Mon, 22 Jun 2026 13:41:30 -0600 Subject: [PATCH] Phase 3: Security and FIPS Compliance Audit --- .wolfssl_known_macro_extras | 2 + IDE/WIN-SRTP-KDF-140-3/test.vcxproj | 10 + IDE/WIN-SRTP-KDF-140-3/wolfssl-fips.vcxproj | 10 + configure.ac | 70 +- fips-hash.sh | 5 +- linuxkm/Kbuild | 34 +- linuxkm/Makefile | 26 +- linuxkm/linuxkm_memory.c | 14 + linuxkm/linuxkm_memory.h | 7 + linuxkm/linuxkm_wc_port.h | 95 +-- linuxkm/module_hooks.c | 14 +- linuxkm/pie_redirect_table.c | 76 +- linuxkm/x86_vector_register_glue.c | 34 +- src/include.am | 15 + tests/api/test_aes.c | 11 +- tests/api/test_evp_pkey.c | 4 +- tests/api/test_ossl_rsa.c | 10 +- tests/api/test_slhdsa.c | 8 +- wolfcrypt/benchmark/fips_cast_bench.c | 354 +++++++++ wolfcrypt/benchmark/include.am | 10 + wolfcrypt/src/aes.c | 205 ++++- wolfcrypt/src/aes_xts_x86_asm.S | 840 ++++++++++++++++++++ wolfcrypt/src/cpuid.c | 14 + wolfcrypt/src/dh.c | 28 +- wolfcrypt/src/error.c | 15 + wolfcrypt/src/ge_operations.c | 8 +- wolfcrypt/src/port/arm/armv8-32-sha3-asm.S | 8 + wolfcrypt/src/random.c | 120 ++- wolfcrypt/src/rsa.c | 19 +- wolfcrypt/src/sha256.c | 29 +- wolfcrypt/src/sha512.c | 16 + wolfcrypt/src/wc_lms.c | 8 + wolfcrypt/src/wc_lms_impl.c | 14 +- wolfcrypt/src/wc_mldsa.c | 112 ++- wolfcrypt/src/wc_mlkem.c | 119 +-- wolfcrypt/src/wc_mlkem_poly.c | 9 + wolfcrypt/src/wc_slhdsa.c | 169 +++- wolfcrypt/src/wc_xmss.c | 8 + wolfcrypt/src/wc_xmss_impl.c | 9 + wolfcrypt/test/test.c | 286 ++++++- wolfssl/wolfcrypt/aes.h | 7 +- wolfssl/wolfcrypt/error-crypt.h | 14 +- wolfssl/wolfcrypt/fips_test.h | 28 +- wolfssl/wolfcrypt/random.h | 18 +- wolfssl/wolfcrypt/settings.h | 9 + 45 files changed, 2703 insertions(+), 218 deletions(-) create mode 100644 wolfcrypt/benchmark/fips_cast_bench.c create mode 100644 wolfcrypt/src/aes_xts_x86_asm.S diff --git a/.wolfssl_known_macro_extras b/.wolfssl_known_macro_extras index 50377597a88..00dc36c5b74 100644 --- a/.wolfssl_known_macro_extras +++ b/.wolfssl_known_macro_extras @@ -196,6 +196,7 @@ CONFIG_WOLFSSL_TLS_VERSION_1_3 CONFIG_WOLFTPM CONFIG_WOLFTPM_EXAMPLE_NAME_ESPRESSIF CONFIG_X86 +CONFIG_X86_32 CONV_WITH_DIV CPA_CY_API_VERSION_NUM_MAJOR CPA_CY_API_VERSION_NUM_MINOR @@ -246,6 +247,7 @@ ETHERNET_H EV_TRIGGER EXTERNAL_LOADER_APP FD_CLOEXEC +FIPS_CODE_REVIEW FIPS_OPTEST_FULL_RUN_AT_MODULE_INIT FORCE_FAILURE_GETRANDOM FP_ECC_CONTROL diff --git a/IDE/WIN-SRTP-KDF-140-3/test.vcxproj b/IDE/WIN-SRTP-KDF-140-3/test.vcxproj index a41ff9ac49f..2429f2fe9bb 100644 --- a/IDE/WIN-SRTP-KDF-140-3/test.vcxproj +++ b/IDE/WIN-SRTP-KDF-140-3/test.vcxproj @@ -162,7 +162,13 @@ true true UseLinkTimeCodeGeneration + false + true @@ -177,6 +183,10 @@ true + + false + true Console ws2_32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) true diff --git a/IDE/WIN-SRTP-KDF-140-3/wolfssl-fips.vcxproj b/IDE/WIN-SRTP-KDF-140-3/wolfssl-fips.vcxproj index 65bb39fffa2..390b38f0e92 100644 --- a/IDE/WIN-SRTP-KDF-140-3/wolfssl-fips.vcxproj +++ b/IDE/WIN-SRTP-KDF-140-3/wolfssl-fips.vcxproj @@ -314,6 +314,16 @@ + + + + + + + + + diff --git a/configure.ac b/configure.ac index 2e9fe76069a..ad666be2e26 100644 --- a/configure.ac +++ b/configure.ac @@ -1502,6 +1502,15 @@ then enable_aesgcm_stream=no fi +# FIPS 140-3 v7 guard support: capture whether DSA/DH were EXPLICITLY requested +# (--enable-dsa / --enable-dh) here, before any default (enable_dsa=yes path, +# --enable-all) sets them. The v7 out-of-scope guard hard-errors only on an +# explicit request, otherwise silently forces the algorithm off. (FIPS 186-5 +# retires DSA; classic finite-field DH and DSA are out of scope for the FIPS +# 140-3 v7 PQ module.) +explicit_enable_dsa="$enable_dsa" +explicit_enable_dh="$enable_dh" + # All wolfCrypt features: AC_ARG_ENABLE([all-crypto], [AS_HELP_STRING([--enable-all-crypto],[Enable all wolfcrypt algorithms (default: disabled)])], @@ -3781,13 +3790,21 @@ then AC_MSG_NOTICE([32bit ARMv4 found]) ;; *) - AM_CPPFLAGS="$AM_CPPFLAGS -mfpu=crypto-neon-fp-armv8 -marm" + # AArch32 ARMv8 crypto-extension asm (armv8-32-*-asm.S: sha256h, + # aese/aesmc, pmull) needs an explicit -march=armv8-a+crypto: the + # ARMv8-A crypto extension is OPTIONAL, so "+crypto" gates these + # instructions -- not -mfpu alone, nor a bare -march=armv8-a (nor + # -mcpu=cortex-a53 on some toolchains). Cross toolchains defaulting + # to ARMv7 (e.g. Xilinx Vitis cortex-a9) otherwise reject them: + # "selected processor does not support sha256h.32 in ARM mode". + # Mirrors the in-kernel ARM armasm enablement (port/arm/*.S crypto). + AM_CPPFLAGS="$AM_CPPFLAGS -march=armv8-a+crypto -mfpu=crypto-neon-fp-armv8 -marm" # Include options.h AM_CCASFLAGS="$AM_CCASFLAGS -DEXTERNAL_OPTS_OPENVPN" ENABLED_ARMASM_CRYPTO=yes ENABLED_ARMASM_NEON=yes ENABLED_ARM_32=yes - AC_MSG_NOTICE([32bit ARMv8 found, setting mfpu to crypto-neon-fp-armv8]) + AC_MSG_NOTICE([32bit ARMv8 found, setting -march=armv8-a+crypto + mfpu=crypto-neon-fp-armv8]) ;; esac esac @@ -5806,6 +5823,13 @@ if test "x$ENABLED_WPAS" = "xyes" || test "x$ENABLED_NGINX" = "xyes" || \ then ENABLED_ANON=yes fi +# Anonymous ciphers require classic DH (enforced below and by settings.h: +# HAVE_ANON => DH). DH is out of scope only for the LOCKED FIPS 140-3 v7 +# module, so force anon off there however it was enabled (--enable-all, a TLS- +# integration option like wpas/nginx, or --enable-anon). dev/ready keep DH +# (and thus anon) when --enable-all/--enable-dh ask. +AS_IF([test "$FIPS_VERSION" = "v7"], + [ENABLED_ANON=no]) if test "x$ENABLED_ANON" = "xyes" then if test "$ENABLED_DH" = "no" @@ -6349,13 +6373,7 @@ AS_CASE([$FIPS_VERSION], -DWC_RSA_NO_PADDING \ -DECC_USER_CURVES \ -DHAVE_ECC384 \ - -DHAVE_ECC521 \ - -DWOLFSSL_VALIDATE_FFC_IMPORT \ - -DHAVE_FFDHE_Q \ - -DHAVE_FFDHE_3072 \ - -DHAVE_FFDHE_4096 \ - -DHAVE_FFDHE_6144 \ - -DHAVE_FFDHE_8192" + -DHAVE_ECC521" # KCAPI API does not support custom k for sign, don't force enable ECC key sizes and don't use seed callback AS_IF([test "x$ENABLED_KCAPI_ECC" = "xno"], @@ -6369,6 +6387,28 @@ AS_CASE([$FIPS_VERSION], -DHAVE_ECC256"]) DEFAULT_MAX_CLASSIC_ASYM_KEY_BITS=8192 + +# Classic finite-field DH and DSA scope by FIPS mode (FIPS 186-5 retires DSA; +# the v7 boundary keeps only ECDH/ECDSA + PQ KEM/DSA): +# v7 -- LOCKED release module: DH/DSA OUT OF SCOPE. Hard-error on an +# explicit --enable-dh/--enable-dsa, otherwise force off. +# dev/ready -- pre-release: DH/DSA OFF BY DEFAULT but turn-on-able +# (--enable-all, --enable-dh/--enable-dsa) for test coverage and +# v6 migration. + AS_IF([test "$FIPS_VERSION" = "v7"], + [AS_IF([test "$explicit_enable_dh" = "yes"], + [AC_MSG_ERROR([--enable-dh is not supported with --enable-fips=v7. Classic finite-field DH is out of scope for the FIPS 140-3 v7 PQ module. Use --enable-fips=v6 if you need DH support.])], + [test "$ENABLED_DH" != "no"], + [ENABLED_DH="no"; enable_dh="no"; AM_CFLAGS="$AM_CFLAGS -DNO_DH"]) + AS_IF([test "$explicit_enable_dsa" = "yes"], + [AC_MSG_ERROR([--enable-dsa is not supported with --enable-fips=v7. DSA is retired by FIPS 186-5 and is out of scope for the FIPS 140-3 v7 PQ module. Use --enable-fips=v6 if you need DSA support.])], + [test "$ENABLED_DSA" != "no"], + [ENABLED_DSA="no"; enable_dsa="no"; AM_CFLAGS="$AM_CFLAGS -DNO_DSA"])], + [AS_IF([test "$explicit_enable_dh" != "yes" && test "x$enable_all" != "xyes"], + [ENABLED_DH="no"; enable_dh="no"; AM_CFLAGS="$AM_CFLAGS -DNO_DH"]) + AS_IF([test "$explicit_enable_dsa" != "yes" && test "x$enable_all" != "xyes"], + [ENABLED_DSA="no"; enable_dsa="no"; AM_CFLAGS="$AM_CFLAGS -DNO_DSA"])]) + # optimizations section # protocol section @@ -8926,8 +8966,16 @@ then fi if test "x$ENABLED_DH" = "xno" then - ENABLED_DH="yes" - AM_CFLAGS="$AM_CFLAGS -DHAVE_DH" + # Classic DH is out of scope for the FIPS 140-3 v7 PQ module. JNI + # normally auto-enables DH for legacy TLS suites; with FIPS v7+ we + # report and skip rather than silently re-enabling DH off-boundary. + if test "$FIPS_VERSION" = "v7" || test "$FIPS_VERSION" = "ready" || test "$FIPS_VERSION" = "dev" + then + AC_MSG_NOTICE([JNI enabled but FIPS is $FIPS_VERSION, NOT turning on DH with this module]) + else + ENABLED_DH="yes" + AM_CFLAGS="$AM_CFLAGS -DHAVE_DH" + fi fi if test "x$ENABLED_PSK" = "xno" then diff --git a/fips-hash.sh b/fips-hash.sh index 36f320c0bbd..309be837e85 100755 --- a/fips-hash.sh +++ b/fips-hash.sh @@ -13,7 +13,10 @@ then fi OUT=$(./wolfcrypt/test/testwolfcrypt | sed -n 's/hash = \(.*\)/\1/p') -NEWHASH=$(echo "$OUT" | cut -c1-64) +# Take the whole hash: FIPS v7.0.0+ is HMAC-SHA-512 (128 hex), older is +# HMAC-SHA-256 (64 hex). static_assert on sizeof(verifyCore) catches a +# wrong length at compile time. +NEWHASH=$(echo "$OUT" | head -n1 | tr -d '[:space:]') if test -n "$NEWHASH" then cp wolfcrypt/src/fips_test.c wolfcrypt/src/fips_test.c.bak diff --git a/linuxkm/Kbuild b/linuxkm/Kbuild index e5974e4a930..eb77097a95e 100644 --- a/linuxkm/Kbuild +++ b/linuxkm/Kbuild @@ -111,6 +111,16 @@ $(LIBWOLFSSL_NAME)-y := $(WOLFSSL_OBJ_FILES) linuxkm/module_hooks.o linuxkm/modu ifeq "$(FIPS_OPTEST)" "1" $(LIBWOLFSSL_NAME)-y += linuxkm/optest-140-3/linuxkm_optest_wrapper.o + # The optest wrapper (#includes test.c / invalid_tests.c) aggregates several + # AES contexts per invalid-input test fn. Under WOLFSSL_AESNI the Aes + # struct carries an inline ALIGN16 streamData[5*WC_AES_BLOCK_SIZE] + # (wolfssl/wolfcrypt/aes.h) plus use_aesni, so the + # aes_{,mac_,ofb_,cfb_,kw_}invalid_data_tests fns exceed the i386 + # THREAD_SIZE/4 = 2048 frame *warning* (x86_64's 4096 already fits; runtime + # is fine in a kernel thread). Relax to 4096 for the wrapper ONLY: it is + # test/evidence tooling OUTSIDE the FIPS module boundary, so module objects + # keep the strict MAX_STACK_FRAME_SIZE. No effect on x86_64 (default 4096). + $(obj)/linuxkm/optest-140-3/linuxkm_optest_wrapper.o: ccflags-y += -Wframe-larger-than=4096 endif WOLFSSL_CFLAGS_NO_VECTOR_INSNS := $(CFLAGS_SIMD_DISABLE) $(CFLAGS_FPU_DISABLE) @@ -139,6 +149,10 @@ ifeq "$(ENABLED_LINUXKM_PIE)" "yes" endif endif endif + ifeq ($(KERNEL_ARCH),i386) + NO_PIE_FLAG := 1 + $(info Note: disabling -fPIE on 32-bit x86 -- i386 -fPIE routes every local symbol through the GOT (R_386_GOTOFF), which the wolfCrypt PIE containerization forbids.) + endif endif ifdef NO_PIE_FLAG @@ -231,6 +245,24 @@ $(obj)/wolfcrypt/src/wc_mlkem_asm.o: OBJECT_FILES_NON_STANDARD := y $(obj)/wolfcrypt/src/wc_mldsa_asm.o: asflags-y := $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_DISABLE_SIMD_ENABLE) $(obj)/wolfcrypt/src/wc_mldsa_asm.o: OBJECT_FILES_NON_STANDARD := y +# ARM/ARM64 crypto+NEON asm (wolfcrypt/src/port/arm/*.S) needs crypto/NEON +# -march at assembly time. The asm carries no .arch/.fpu directives and +# configure leaves ASFLAGS_*_SIMD_ENABLE empty on ARM: userspace inherits +# +crypto from the toolchain -mcpu, but the kernel forces its own baseline +# -march without it, so AES/SHA/PMULL are rejected ("selected processor does not +# support `aesd ...'"). Supply the right -march per kernel arch. (Wrong-arch +# port/arm files are #ifdef'd to empty objects, so it is a no-op for them; +# OBJECT_FILES_NON_STANDARD silences objtool on the hand asm.) +ifeq ($(CONFIG_ARM64),y) + WOLFSSL_ARM_ASM_MARCH := -march=armv8-a+crypto +else ifeq ($(CONFIG_ARM),y) + WOLFSSL_ARM_ASM_MARCH := -march=armv8-a -mfpu=crypto-neon-fp-armv8 +endif +ifdef WOLFSSL_ARM_ASM_MARCH +$(obj)/wolfcrypt/src/port/arm/%.o: asflags-y := $(WOLFSSL_ASFLAGS) $(WOLFSSL_ARM_ASM_MARCH) +$(obj)/wolfcrypt/src/port/arm/%.o: OBJECT_FILES_NON_STANDARD := y +endif + ifndef READELF READELF := readelf endif @@ -339,7 +371,7 @@ RENAME_PIE_TEXT_AND_DATA_SECTIONS := \ next; \ } \ else if ($$4 == "OBJECT") { \ - if (! ($$7 in wolfcrypt_data_sections)) { \ + if (! ($$7 in wolfcrypt_data_sections) && ! ($$7 in wolfcrypt_text_sections)) { \ if ((other_sections[$$7] == ".printk_index") || \ (($$8 ~ /^_entry\.[0-9]+$$|^kernel_read_file_str$$/) && \ (other_sections[$$7] == ".data.rel.ro.local"))) \ diff --git a/linuxkm/Makefile b/linuxkm/Makefile index 24a867b9356..cccddf0cf58 100644 --- a/linuxkm/Makefile +++ b/linuxkm/Makefile @@ -45,7 +45,14 @@ ifndef SRC_TOP SRC_TOP=$(shell dirname $(MODULE_TOP)) endif -WOLFSSL_CFLAGS=-DHAVE_CONFIG_H -I$(SRC_TOP) -DBUILDING_WOLFSSL $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -Wno-declaration-after-statement -Wno-redundant-decls -DLIBWOLFSSL_GLOBAL_EXTRA_CFLAGS="\" $(KERNEL_EXTRA_CFLAGS)\"" +# -Wno-nested-externs: the kernel's _compiletime_assert (, +# via atomic/per-CPU/printk-once macros in x86_vector_register_glue.c) emits an +# "extern ... __compiletime_assert_N(void)" inside a function body, tripping +# -Wnested-externs; with -Werror this breaks the i386 + AES-NI build (glue compiles +# only when WOLFSSL_USE_SAVE_VECTOR_REGISTERS / a PAA is enabled). Strip it +# build-wide like the other kernel-incompatible warnings; diagnostic-only, so object +# code (and the FIPS in-core hash) stays byte-identical on every arch. +WOLFSSL_CFLAGS=-DHAVE_CONFIG_H -I$(SRC_TOP) -DBUILDING_WOLFSSL $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -Wno-declaration-after-statement -Wno-redundant-decls -Wno-nested-externs -DLIBWOLFSSL_GLOBAL_EXTRA_CFLAGS="\" $(KERNEL_EXTRA_CFLAGS)\"" ifdef KERNEL_EXTRA_CFLAGS WOLFSSL_CFLAGS += $(KERNEL_EXTRA_CFLAGS) endif @@ -55,7 +62,12 @@ endif WOLFSSL_ASFLAGS=-DHAVE_CONFIG_H -I$(SRC_TOP) -DBUILDING_WOLFSSL $(AM_CCASFLAGS) $(CCASFLAGS) -WOLFSSL_OBJ_FILES=$(patsubst %.lo, %.o, $(patsubst src/src_libwolfssl_la-%, src/%, $(patsubst src/libwolfssl_la-%, src/%, $(patsubst wolfcrypt/src/src_libwolfssl_la-%, wolfcrypt/src/%, $(src_libwolfssl_la_OBJECTS))))) +# Strip libtool's per-target object prefix (src_libwolfssl_la-) so Kbuild sees the +# real object names. The innermost patsubst covers wolfcrypt/src/port/arm/ asm +# (armv8-*/armv8-32-*/thumb2-*), which the src/ and wolfcrypt/src/ patsubsts miss; +# without it --enable-armasm kernel builds fail ("No rule to make target +# .../src_libwolfssl_la-armv8-aes-asm.o"). +WOLFSSL_OBJ_FILES=$(patsubst %.lo, %.o, $(patsubst src/src_libwolfssl_la-%, src/%, $(patsubst src/libwolfssl_la-%, src/%, $(patsubst wolfcrypt/src/src_libwolfssl_la-%, wolfcrypt/src/%, $(patsubst wolfcrypt/src/port/arm/src_libwolfssl_la-%, wolfcrypt/src/port/arm/%, $(src_libwolfssl_la_OBJECTS)))))) ifeq "$(ENABLED_CRYPT_TESTS)" "yes" WOLFSSL_OBJ_FILES+=wolfcrypt/test/test.o @@ -160,7 +172,7 @@ GENERATE_RELOC_TAB := $(AWK) ' \ function open_seg(seg) { \ seen_seg[seg] = 1; \ printf("%s\n ", \ - "WOLFSSL_LOCAL const struct wc_reloc_table_ent wc_linuxkm_pie_" seg "_reloc_tab[] = { "); \ + "WOLFSSL_LOCAL const struct wc_reloc_table_ent wc_linuxkm_pie_" seg "_reloc_tab[] = {"); \ cur_seg = seg; \ } \ function close_cur_seg() { \ @@ -197,7 +209,7 @@ GENERATE_RELOC_TAB := $(AWK) ' \ next; \ } \ /^0/ { \ - if ($$3 !~ "^(R_X86_.*|R_AARCH64_.*|R_ARM.*)$$") { \ + if ($$3 !~ "^(R_X86_.*|R_386_.*|R_AARCH64_.*|R_ARM.*)$$") { \ print "Unexpected relocation type in " cur_seg ":\n" $$0 >"/dev/stderr"; \ ++bad_relocs; \ } \ @@ -361,12 +373,12 @@ module-update-fips-hash: $(LIBWOLFSSL_NAME).ko readarray -t verifyCore_attrs < <($(READELF) --wide --symbols "$<" | \ sed -E -n 's/^[[:space:]]*[0-9]+: ([0-9a-fA-F]+)[[:space:]]+([0-9]+)[[:space:]]+OBJECT[[:space:]]+[A-Z]+[[:space:]]+[A-Z]+[[:space:]]+'"$${rodata_segment[0]}"'[[:space:]]+verifyCore$$/\1\n\2/p'); \ if [[ $${#verifyCore_attrs[@]} != 2 ]]; then echo ' unexpected verifyCore_attrs.' >&2; exit 1; fi; \ - if [[ "$${verifyCore_attrs[1]}" != "65" ]]; then echo " verifyCore has unexpected length $${verifyCore_attrs[1]}." >&2; exit 1; fi; \ + if [[ "$${verifyCore_attrs[1]}" != "129" ]]; then echo " verifyCore has unexpected length $${verifyCore_attrs[1]}." >&2; exit 1; fi; \ verifyCore_offset=$$((0x$${rodata_segment[1]} + 0x$${verifyCore_attrs[0]})); \ - current_verifyCore=$$(dd bs=1 if="$<" skip=$$verifyCore_offset count=64 status=none); \ + current_verifyCore=$$(dd bs=1 if="$<" skip=$$verifyCore_offset count=128 status=none); \ if [[ ! "$$current_verifyCore" =~ [0-9a-fA-F]{64} ]]; then echo " verifyCore at offset $$verifyCore_offset has unexpected value." >&2; exit 1; fi; \ if [[ '$(FIPS_HASH)' == "$$current_verifyCore" ]]; then echo ' Supplied FIPS_HASH matches existing verifyCore -- no update needed.'; exit 0; fi; \ - echo -n '$(FIPS_HASH)' | dd bs=1 conv=notrunc of="$<" seek=$$verifyCore_offset count=64 status=none && \ + echo -n '$(FIPS_HASH)' | dd bs=1 conv=notrunc of="$<" seek=$$verifyCore_offset count=128 status=none && \ echo " FIPS verifyCore updated successfully." && \ if [[ -f '$(LIBWOLFSSL_NAME).ko.signed' ]]; then $(MAKE) $(QFLAG) --no-print-directory --no-silent -C . '$(LIBWOLFSSL_NAME).ko.signed'; fi diff --git a/linuxkm/linuxkm_memory.c b/linuxkm/linuxkm_memory.c index 2f1b75e1125..fc81280df16 100644 --- a/linuxkm/linuxkm_memory.c +++ b/linuxkm/linuxkm_memory.c @@ -52,6 +52,8 @@ static const struct reloc_layout_ent { [WC_R_X86_64_64] = { "R_X86_64_64", ~0UL, 64, .is_signed = 0, .is_relative = 0 }, [WC_R_X86_64_PC32] = { "R_X86_64_PC32", ~0UL, 32, .is_signed = 1, .is_relative = 1 }, [WC_R_X86_64_PLT32] = { "R_X86_64_PLT32", ~0UL, 32, .is_signed = 1, .is_relative = 1 }, + [WC_R_386_32] = { "R_386_32", ~0UL, 32, .is_signed = 0, .is_relative = 0 }, + [WC_R_386_PC32] = { "R_386_PC32", ~0UL, 32, .is_signed = 1, .is_relative = 1 }, [WC_R_AARCH64_ABS32] = { "R_AARCH64_ABS32", ~0UL, 32, .is_signed = 1, .is_relative = 0, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 }, [WC_R_AARCH64_ABS64] = { "R_AARCH64_ABS64", ~0UL, 64, .is_signed = 1, .is_relative = 0, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 }, [WC_R_AARCH64_ADD_ABS_LO12_NC] = { "R_AARCH64_ADD_ABS_LO12_NC", 0b00000000001111111111110000000000, 32, .is_signed = 0, .is_relative = 0, .is_pages = 0, .is_pair_lo = 1, .is_pair_hi = 0 }, @@ -64,6 +66,10 @@ static const struct reloc_layout_ent { [WC_R_AARCH64_LDST64_ABS_LO12_NC] = { "R_AARCH64_LDST64_ABS_LO12_NC", 0b00000000001111111111110000000000, 32, .is_signed = 0, .is_relative = 0, .is_pages = 0, .is_pair_lo = 1, .is_pair_hi = 0 }, [WC_R_AARCH64_PREL32] = { "R_AARCH64_PREL32", ~0UL, 32, .is_signed = 1, .is_relative = 1, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 }, [WC_R_ARM_ABS32] = { "R_ARM_ABS32", ~0UL, 32, .is_signed = 0, .is_relative = 0, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 }, + /* ARM-mode BL/B: signed 24-bit word offset in bits [23:0] (cf. AARCH64_CALL26's + * 26-bit field). Emitted by the arm32 ARM-mode (non-Thumb) kernel module build. */ + [WC_R_ARM_CALL] = { "R_ARM_CALL", 0b00000000111111111111111111111111, 32, .is_signed = 1, .is_relative = 1, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 }, + [WC_R_ARM_JUMP24] = { "R_ARM_JUMP24", 0b00000000111111111111111111111111, 32, .is_signed = 1, .is_relative = 1, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 }, [WC_R_ARM_PREL31] = { "R_ARM_PREL31", 0b01111111111111111111111111111111, 32, .is_signed = 1, .is_relative = 1, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 }, [WC_R_ARM_REL32] = { "R_ARM_REL32", ~0UL, 32, .is_signed = 1, .is_relative = 1, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 }, [WC_R_ARM_THM_CALL] = { "R_ARM_THM_CALL", 0b00000111111111110010111111111111, 32, .is_signed = 1, .is_relative = 1, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 }, @@ -366,6 +372,12 @@ ssize_t wc_reloc_normalize_segment( case WC_R_X86_64_32: case WC_R_X86_64_32S: case WC_R_X86_64_64: + /* i386 reuses the x86_64 path: math is driven by + * layout->is_relative/is_signed and stays width-correct via + * uintptr_t (32-bit on i386). R_386_32 is absolute + * (is_relative=0); R_386_PC32 is PC-relative (is_relative=1). */ + case WC_R_386_32: + case WC_R_386_PC32: if (dest_seg != WC_R_SEG_OTHER) { #ifdef DEBUG_LINUXKM_PIE_SUPPORT @@ -413,6 +425,8 @@ ssize_t wc_reloc_normalize_segment( break; case WC_R_ARM_ABS32: + case WC_R_ARM_CALL: + case WC_R_ARM_JUMP24: case WC_R_ARM_PREL31: case WC_R_ARM_REL32: case WC_R_ARM_THM_CALL: diff --git a/linuxkm/linuxkm_memory.h b/linuxkm/linuxkm_memory.h index 76e681da805..1c8ef3f6625 100644 --- a/linuxkm/linuxkm_memory.h +++ b/linuxkm/linuxkm_memory.h @@ -40,6 +40,11 @@ enum wc_reloc_type { WC_R_X86_64_64, WC_R_X86_64_PC32, WC_R_X86_64_PLT32, + /* 32-bit x86 (i386): with NO_PIE_FLAG the container emits only R_386_32 + * (absolute) and R_386_PC32 (PC-relative), equivalent to R_X86_64_32 / + * R_X86_64_PC32 and sharing their canonicalization case below. */ + WC_R_386_32, + WC_R_386_PC32, WC_R_AARCH64_ABS32, WC_R_AARCH64_ABS64, WC_R_AARCH64_ADD_ABS_LO12_NC, @@ -52,6 +57,8 @@ enum wc_reloc_type { WC_R_AARCH64_LDST64_ABS_LO12_NC, WC_R_AARCH64_PREL32, WC_R_ARM_ABS32, + WC_R_ARM_CALL, + WC_R_ARM_JUMP24, WC_R_ARM_PREL31, WC_R_ARM_REL32, WC_R_ARM_THM_CALL, diff --git a/linuxkm/linuxkm_wc_port.h b/linuxkm/linuxkm_wc_port.h index 0e64c14be70..5f06206cc5c 100644 --- a/linuxkm/linuxkm_wc_port.h +++ b/linuxkm/linuxkm_wc_port.h @@ -712,8 +712,11 @@ #define WOLFSSL_USE_SAVE_VECTOR_REGISTERS #endif + /* x86 (kernel_fpu_*) and ARM/ARM64 (kernel_neon_*) share the arch-neutral + * tracker in x86_vector_register_glue.c; the glue keeps its wc_*_x86 names + * on all arches (outside-boundary glue via the PIE redirect table). */ #if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && \ - defined(CONFIG_X86) + (defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64)) extern __must_check int allocate_wolfcrypt_linuxkm_fpu_states(void); extern void free_wolfcrypt_linuxkm_fpu_states(void); @@ -721,18 +724,23 @@ WOLFSSL_API __must_check int wc_save_vector_registers_x86(enum wc_svr_flags flags); WOLFSSL_API void wc_restore_vector_registers_x86(enum wc_svr_flags flags); - #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) - #include - #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) - /* added by a62b01cd6c */ - #include - #endif - #else - #include - #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) - /* added by 266d051601 */ - #include + #ifdef CONFIG_X86 + #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) + #include + #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) + /* added by a62b01cd6c */ + #include + #endif + #else + #include + #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + /* added by 266d051601 */ + #include + #endif #endif + #else /* CONFIG_ARM || CONFIG_ARM64 */ + #include /* may_use_simd() */ + #include /* kernel_neon_begin() / kernel_neon_end() */ #endif #ifndef CAN_SAVE_VECTOR_REGISTERS #define CAN_SAVE_VECTOR_REGISTERS() wc_can_save_vector_registers_x86() @@ -763,42 +771,6 @@ #define REENABLE_VECTOR_REGISTERS() wc_restore_vector_registers_x86(WC_SVR_FLAG_INHIBIT) #endif - #elif defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && (defined(CONFIG_ARM) || defined(CONFIG_ARM64)) - - #error kernel module ARM SIMD is not yet tested or usable. - - #include - - static WARN_UNUSED_RESULT inline int save_vector_registers_arm(void) - { - preempt_disable(); - if (! may_use_simd()) { - preempt_enable(); - return BAD_STATE_E; - } else { - fpsimd_preserve_current_state(); - return 0; - } - } - static inline void restore_vector_registers_arm(void) - { - fpsimd_restore_current_state(); - preempt_enable(); - } - - #ifndef SAVE_VECTOR_REGISTERS - #define SAVE_VECTOR_REGISTERS(fail_clause) { int _svr_ret = save_vector_registers_arm(); if (_svr_ret != 0) { fail_clause } } - #endif - #ifndef SAVE_VECTOR_REGISTERS2 - #define SAVE_VECTOR_REGISTERS2() save_vector_registers_arm() - #endif - #ifndef CAN_SAVE_VECTOR_REGISTERS - #define CAN_SAVE_VECTOR_REGISTERS() can_save_vector_registers_arm() - #endif - #ifndef RESTORE_VECTOR_REGISTERS - #define RESTORE_VECTOR_REGISTERS() restore_vector_registers_arm() - #endif - #elif defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) #error WOLFSSL_USE_SAVE_VECTOR_REGISTERS is set for an unimplemented architecture. #endif /* WOLFSSL_USE_SAVE_VECTOR_REGISTERS */ @@ -1008,6 +980,20 @@ extern int memcmp(const void *s1, const void *s2, size_t n); #endif +#ifdef CONFIG_X86_32 + /* arch/x86/include/asm/string_32.h #defines memcpy/memcmp/memset as + * __builtin_* macros (x86_64's string_64.h uses plain functions, so this + * does not arise on K2). Left active they expand inside the PIE + * redirect-table member declarations below ("typeof(memcmp) *memcmp;" -> + * "... *__builtin_memcmp;"), dropping those members and breaking the + * WC_PIE_INDIRECT_SYM(memcmp) lookups. #undef before the struct; + * string_32.h still declares the functions so typeof() and the canonical + * names resolve. Mirrors the CONFIG_MIPS handling just above. */ + #undef memcpy + #undef memcmp + #undef memset +#endif + struct wolfssl_linuxkm_pie_redirect_table { #ifdef HAVE_FIPS typeof(wc_linuxkm_normalize_relocations) *wc_linuxkm_normalize_relocations; @@ -1144,13 +1130,13 @@ #ifdef WOLFSSL_USE_SAVE_VECTOR_REGISTERS - #ifdef CONFIG_X86 + #if defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) typeof(allocate_wolfcrypt_linuxkm_fpu_states) *allocate_wolfcrypt_linuxkm_fpu_states; typeof(wc_can_save_vector_registers_x86) *wc_can_save_vector_registers_x86; typeof(free_wolfcrypt_linuxkm_fpu_states) *free_wolfcrypt_linuxkm_fpu_states; typeof(wc_restore_vector_registers_x86) *wc_restore_vector_registers_x86; typeof(wc_save_vector_registers_x86) *wc_save_vector_registers_x86; - #else /* !CONFIG_X86 */ + #else #error WOLFSSL_USE_SAVE_VECTOR_REGISTERS is set for an unimplemented architecture. #endif /* arch */ @@ -1501,7 +1487,8 @@ #undef get_current #define get_current WC_PIE_INDIRECT_SYM(get_current) - #if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && defined(CONFIG_X86) + #if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && \ + (defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64)) #define allocate_wolfcrypt_linuxkm_fpu_states WC_PIE_INDIRECT_SYM(allocate_wolfcrypt_linuxkm_fpu_states) #define wc_can_save_vector_registers_x86 WC_PIE_INDIRECT_SYM(wc_can_save_vector_registers_x86) #define free_wolfcrypt_linuxkm_fpu_states WC_PIE_INDIRECT_SYM(free_wolfcrypt_linuxkm_fpu_states) @@ -1831,7 +1818,7 @@ #if !defined(BUILDING_WOLFSSL) /* some caller code needs these. */ #if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) - #if defined(CONFIG_X86) + #if defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) WOLFSSL_API __must_check int wc_can_save_vector_registers_x86(void); WOLFSSL_API __must_check int wc_save_vector_registers_x86(enum wc_svr_flags flags); WOLFSSL_API void wc_restore_vector_registers_x86(enum wc_svr_flags flags); @@ -1841,9 +1828,9 @@ #ifndef REENABLE_VECTOR_REGISTERS #define REENABLE_VECTOR_REGISTERS() wc_restore_vector_registers_x86(WC_SVR_FLAG_INHIBIT) #endif - #else /* !CONFIG_X86 */ + #else #error WOLFSSL_USE_SAVE_VECTOR_REGISTERS is set for an unimplemented architecture. - #endif /* !CONFIG_X86 */ + #endif #endif /* WOLFSSL_USE_SAVE_VECTOR_REGISTERS */ #ifdef WC_LINUXKM_USE_HEAP_WRAPPERS WOLFSSL_API extern void *wc_linuxkm_malloc(size_t size); diff --git a/linuxkm/module_hooks.c b/linuxkm/module_hooks.c index 55fc70e0a7f..1ed332be516 100644 --- a/linuxkm/module_hooks.c +++ b/linuxkm/module_hooks.c @@ -527,7 +527,9 @@ int wc_linuxkm_GenerateSeed_IntelRD(struct OS_Seed* os, byte* output, word32 sz) #endif /* WC_LINUXKM_RDSEED_IN_GLUE_LAYER */ -#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && defined(CONFIG_X86) +#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && \ + (defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64)) + /* arch-generic save/restore tracker (kernel_fpu_* on x86, kernel_neon_* on ARM) */ #include "linuxkm/x86_vector_register_glue.c" #endif @@ -1518,7 +1520,8 @@ static int set_up_wolfssl_linuxkm_pie_redirect_table(void) { wolfssl_linuxkm_pie_redirect_table.get_current = my_get_current_thread; -#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && defined(CONFIG_X86) +#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && \ + (defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64)) wolfssl_linuxkm_pie_redirect_table.allocate_wolfcrypt_linuxkm_fpu_states = allocate_wolfcrypt_linuxkm_fpu_states; wolfssl_linuxkm_pie_redirect_table.wc_can_save_vector_registers_x86 = wc_can_save_vector_registers_x86; wolfssl_linuxkm_pie_redirect_table.free_wolfcrypt_linuxkm_fpu_states = free_wolfcrypt_linuxkm_fpu_states; @@ -2047,7 +2050,10 @@ static ssize_t FIPS_optest_trig_handler(struct kobject *kobj, struct kobj_attrib int ret; int argc; const char *argv[3]; - char code_buf[5]; + /* Textual sysfs error code + NUL, plus headroom. Fits the v7.0.0 5-char + * codes (-1015 ML_KEM_PCT_E, -1016 ML_DSA_PCT_E, -1017 + * DRBG_SHA512_KAT_FIPS_E) that the old [5] rejected via the guard below. */ + char code_buf[8]; size_t corrected_count; int i; @@ -2063,7 +2069,7 @@ static ssize_t FIPS_optest_trig_handler(struct kobject *kobj, struct kobj_attrib corrected_count = count - 1; else corrected_count = count; - if ((corrected_count < 1) || (corrected_count > 4)) + if ((corrected_count < 1) || (corrected_count > (sizeof(code_buf) - 1))) return -EINVAL; XMEMCPY(code_buf, buf, corrected_count); code_buf[corrected_count] = 0; diff --git a/linuxkm/pie_redirect_table.c b/linuxkm/pie_redirect_table.c index 03be2e04fa0..1211c7f9d6a 100644 --- a/linuxkm/pie_redirect_table.c +++ b/linuxkm/pie_redirect_table.c @@ -53,8 +53,15 @@ const struct wolfssl_linuxkm_pie_redirect_table return &wolfssl_linuxkm_pie_redirect_table; } -/* placeholder implementations for missing functions. */ -#if defined(CONFIG_MIPS) +/* placeholder implementations for missing functions. + * + * ARM/ARM64 need these like MIPS: --enable-armasm omits -mgeneral-regs-only, + * so gcc auto-emits raw memcpy/memset libcalls for aggregate copies in the + * PIE FIPS container. WC_PIE_INDIRECT_SYM only redirects source-level + * XMEMCPY/XMEMSET, not compiler-emitted libcalls, and the in-core integrity + * check forbids ANY undefined symbol, so define them here. (The pure-C C1 + * build does not auto-vectorize and never references these.) */ +#if defined(CONFIG_MIPS) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) #undef memcpy void *memcpy(void *dest, const void *src, size_t n) { char *dest_i = (char *)dest; @@ -74,3 +81,68 @@ const struct wolfssl_linuxkm_pie_redirect_table return dest; } #endif + +#if defined(CONFIG_ARM) + /* 32-bit ARM's baseline ISA has no integer-divide, so gcc emits these EABI + * helpers for '/' and '%'. The kernel exports them + * (arch/arm/lib/lib1funcs.S), but the self-contained PIE FIPS container may + * not reference external symbols (in-core integrity forbids ANY undefined + * symbol), so provide them here. Restoring (bit-at-a-time) division -- + * correctness over speed; crypto-path divisions are on small + * sizes/indices. Per the EABI, __aeabi_*idivmod return a little-endian + * 64-bit value: quotient in r0 (low word), remainder in r1 (high word). */ + unsigned int __aeabi_uidiv(unsigned int n, unsigned int d); + unsigned int __aeabi_uidiv(unsigned int n, unsigned int d) { + unsigned int q = 0, r = 0; + int i; + if (d == 0) + return ~0u; + for (i = 31; i >= 0; i--) { + r = (r << 1) | ((n >> i) & 1u); + if (r >= d) { + r -= d; + q |= (1u << i); + } + } + return q; + } + + unsigned long long __aeabi_uidivmod(unsigned int n, unsigned int d); + unsigned long long __aeabi_uidivmod(unsigned int n, unsigned int d) { + unsigned int q = 0, r = 0; + int i; + if (d == 0) + return (unsigned long long)n << 32; /* quot=0, rem=n */ + for (i = 31; i >= 0; i--) { + r = (r << 1) | ((n >> i) & 1u); + if (r >= d) { + r -= d; + q |= (1u << i); + } + } + return ((unsigned long long)r << 32) | q; + } + + int __aeabi_idiv(int n, int d); + int __aeabi_idiv(int n, int d) { + int neg = (n < 0) ^ (d < 0); + unsigned int un = (n < 0) ? (unsigned int)(-(long)n) : (unsigned int)n; + unsigned int ud = (d < 0) ? (unsigned int)(-(long)d) : (unsigned int)d; + unsigned int uq = __aeabi_uidiv(un, ud); + return neg ? -(int)uq : (int)uq; + } + + unsigned long long __aeabi_idivmod(int n, int d); + unsigned long long __aeabi_idivmod(int n, int d) { + int nneg = (n < 0); + int qneg = (n < 0) ^ (d < 0); + unsigned int un = nneg ? (unsigned int)(-(long)n) : (unsigned int)n; + unsigned int ud = (d < 0) ? (unsigned int)(-(long)d) : (unsigned int)d; + unsigned long long um = __aeabi_uidivmod(un, ud); + unsigned int uq = (unsigned int)um; + unsigned int ur = (unsigned int)(um >> 32); + int q = qneg ? -(int)uq : (int)uq; + int r = nneg ? -(int)ur : (int)ur; + return ((unsigned long long)(unsigned int)r << 32) | (unsigned int)q; + } +#endif /* CONFIG_ARM */ diff --git a/linuxkm/x86_vector_register_glue.c b/linuxkm/x86_vector_register_glue.c index e33c3d719e8..51f508e1d23 100644 --- a/linuxkm/x86_vector_register_glue.c +++ b/linuxkm/x86_vector_register_glue.c @@ -23,8 +23,28 @@ /* included by linuxkm/module_hooks.c */ #ifndef WC_SKIP_INCLUDED_C_FILES -#if !defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) || !defined(CONFIG_X86) - #error x86_vector_register_glue.c included in non-vectorized/non-x86 project. +#if !defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) || \ + !(defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64)) + #error vector register glue included in non-vectorized or unsupported-arch project. +#endif + +/* The per-CPU tracker below is arch-neutral except for the call that + * claims/releases the SIMD/FP unit: + * x86 -> kernel_fpu_begin()/kernel_fpu_end() ( via + * in linuxkm_wc_port.h) + * ARM/ARM64 -> kernel_neon_begin()/kernel_neon_end() () + * Both obey the same context rules the tracker enforces (may_use_simd(), + * hard-IRQ/NMI rejection, preempt/bh/migration disable). The wc_*_x86 names + * are kept: this glue lives OUTSIDE the FIPS module boundary (reached only via + * the PIE redirect table), so the validated x86 symbol set stays byte-for-byte + * unchanged. */ +#if defined(CONFIG_X86) + #define WC_LINUXKM_FPU_BEGIN() kernel_fpu_begin() + #define WC_LINUXKM_FPU_END() kernel_fpu_end() +#elif defined(CONFIG_ARM) || defined(CONFIG_ARM64) + #include + #define WC_LINUXKM_FPU_BEGIN() kernel_neon_begin() + #define WC_LINUXKM_FPU_END() kernel_neon_end() #endif #ifdef WOLFSSL_LINUXKM_VERBOSE_DEBUG @@ -70,9 +90,11 @@ WARN_UNUSED_RESULT int allocate_wolfcrypt_linuxkm_fpu_states(void) wc_linuxkm_fpu_states_n_tracked * sizeof(wc_linuxkm_fpu_states[0])); if (! wc_linuxkm_fpu_states) { + /* cast to unsigned long to match %lu: size_t is 32-bit on arm32 but + * 64-bit on x86_64/arm64, so the product type is arch-dependent. */ pr_err("ERROR: allocation of %lu bytes for " "wc_linuxkm_fpu_states failed.\n", - nr_cpu_ids * sizeof(wc_linuxkm_fpu_states[0])); + (unsigned long)(nr_cpu_ids * sizeof(wc_linuxkm_fpu_states[0]))); return MEMORY_E; } @@ -454,10 +476,10 @@ WARN_UNUSED_RESULT int wc_save_vector_registers_x86(enum wc_svr_flags flags) #if IS_ENABLED(CONFIG_PREEMPT_RT) preempt_disable(); #endif - kernel_fpu_begin(); + WC_LINUXKM_FPU_BEGIN(); pstate = wc_linuxkm_fpu_state_assoc(1, 1); if (pstate == NULL) { - kernel_fpu_end(); + WC_LINUXKM_FPU_END(); #if IS_ENABLED(CONFIG_PREEMPT_RT) preempt_enable(); #endif @@ -521,7 +543,7 @@ void wc_restore_vector_registers_x86(enum wc_svr_flags flags) if (pstate->fpu_state == 0U) { wc_linuxkm_fpu_state_release(pstate); - kernel_fpu_end(); + WC_LINUXKM_FPU_END(); #if IS_ENABLED(CONFIG_PREEMPT_RT) preempt_enable(); #endif diff --git a/src/include.am b/src/include.am index 632feb67c1b..5b44bc34f87 100644 --- a/src/include.am +++ b/src/include.am @@ -111,6 +111,9 @@ if BUILD_AESNI src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_asm.S if BUILD_X86_ASM src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S +if BUILD_AESXTS +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_xts_x86_asm.S +endif else if BUILD_AESGCM src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S @@ -273,6 +276,9 @@ if BUILD_AESNI src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_asm.S if BUILD_X86_ASM src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S +if BUILD_AESXTS +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_xts_x86_asm.S +endif else if BUILD_AESGCM src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S @@ -604,6 +610,9 @@ if BUILD_AESNI src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_asm.S if BUILD_X86_ASM src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S +if BUILD_AESXTS +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_xts_x86_asm.S +endif else if BUILD_AESGCM src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S @@ -986,6 +995,9 @@ src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_asm.S src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_x86_64_asm.S if BUILD_X86_ASM src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S +if BUILD_AESXTS +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_xts_x86_asm.S +endif else if BUILD_AESGCM src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S @@ -1932,6 +1944,9 @@ src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_asm.S src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_x86_64_asm.S if BUILD_X86_ASM src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S +if BUILD_AESXTS +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_xts_x86_asm.S +endif else if BUILD_AESGCM src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S diff --git a/tests/api/test_aes.c b/tests/api/test_aes.c index 72221cd04ad..c55630b8288 100644 --- a/tests/api/test_aes.c +++ b/tests/api/test_aes.c @@ -693,7 +693,12 @@ static int test_wc_AesCbcEncryptDecrypt_WithKey(Aes* aes, byte* key, ExpectIntEQ(wc_AesCbcEncrypt(aes, cipher, vector, vector_len), 0); ExpectBufEQ(cipher, vector_enc, vector_len); -#ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS + /* BAD_LENGTH_E enforcement lives behind WOLFSSL_AES_CBC_LENGTH_CHECKS in + * non-FIPS aes.c. FIPSv2 (cert3389) uses wc_AesCbcEncrypt_fips, which + * predates the check and returns 0 on unaligned input; only v5.x+ carry + * the wrapper-level check. Skip the assertion for FIPSv2. */ +#if defined(WOLFSSL_AES_CBC_LENGTH_CHECKS) && \ + (!defined(HAVE_FIPS) || FIPS_VERSION_GE(5,0)) ExpectIntEQ(wc_AesCbcEncrypt(aes, cipher, vector, vector_len - 1), WC_NO_ERR_TRACE(BAD_LENGTH_E)); #endif @@ -703,7 +708,9 @@ static int test_wc_AesCbcEncryptDecrypt_WithKey(Aes* aes, byte* key, ExpectIntEQ(wc_AesCbcDecrypt(aes, decrypted, cipher, WC_AES_BLOCK_SIZE * 2), 0); ExpectBufEQ(decrypted, vector, vector_len); -#ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS +#if defined(WOLFSSL_AES_CBC_LENGTH_CHECKS) && \ + (!defined(HAVE_FIPS) || FIPS_VERSION_GE(5,0)) + /* Same FIPSv2 vs v5+ rationale as the encrypt assertion above. */ ExpectIntEQ(wc_AesCbcDecrypt(aes, decrypted, cipher, WC_AES_BLOCK_SIZE * 2 - 1), WC_NO_ERR_TRACE(BAD_LENGTH_E)); #else diff --git a/tests/api/test_evp_pkey.c b/tests/api/test_evp_pkey.c index 9bdd5b9339d..2e106d16d6a 100644 --- a/tests/api/test_evp_pkey.c +++ b/tests/api/test_evp_pkey.c @@ -1526,7 +1526,7 @@ static int test_wolfSSL_EVP_PKEY_sign_verify(int keyType) !defined(HAVE_SELFTEST) #if !defined(HAVE_FIPS) || (defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION>2)) { - ExpectNotNull(rsa = RSA_generate_key(2048, 3, NULL, NULL)); + ExpectNotNull(rsa = RSA_generate_key(2048, 65537, NULL, NULL)); ExpectIntEQ(EVP_PKEY_assign_RSA(pkey, rsa), WOLFSSL_SUCCESS); } #endif @@ -2159,7 +2159,7 @@ int test_wolfSSL_EVP_PKEY_encrypt(void) XMEMSET(outDec, 0, rsaKeySz); } - ExpectNotNull(rsa = RSA_generate_key(2048, 3, NULL, NULL)); + ExpectNotNull(rsa = RSA_generate_key(2048, 65537, NULL, NULL)); ExpectNotNull(pkey = wolfSSL_EVP_PKEY_new()); ExpectIntEQ(EVP_PKEY_assign_RSA(pkey, rsa), WOLFSSL_SUCCESS); if (EXPECT_FAIL()) { diff --git a/tests/api/test_ossl_rsa.c b/tests/api/test_ossl_rsa.c index dc0cee665ba..250d1df0070 100644 --- a/tests/api/test_ossl_rsa.c +++ b/tests/api/test_ossl_rsa.c @@ -65,7 +65,7 @@ int test_wolfSSL_RSA(void) RSA_free(rsa); rsa = NULL; - ExpectNotNull(rsa = RSA_generate_key(2048, 3, NULL, NULL)); + ExpectNotNull(rsa = RSA_generate_key(2048, 65537, NULL, NULL)); ExpectIntEQ(RSA_size(rsa), 256); #if (!defined(HAVE_FIPS) || FIPS_VERSION3_GT(6,0,0)) && !defined(HAVE_SELFTEST) @@ -306,7 +306,7 @@ int test_wolfSSL_RSA(void) rsa = NULL; #if !defined(USE_FAST_MATH) || (FP_MAX_BITS >= (3072*2)) - ExpectNotNull(rsa = RSA_generate_key(3072, 17, NULL, NULL)); + ExpectNotNull(rsa = RSA_generate_key(3072, 65537, NULL, NULL)); ExpectIntEQ(RSA_size(rsa), 384); ExpectIntEQ(RSA_bits(rsa), 3072); RSA_free(rsa); @@ -461,7 +461,7 @@ int test_wolfSSL_RSA_print(void) RSA_free(rsa); rsa = NULL; - ExpectNotNull(rsa = RSA_generate_key(2048, 3, NULL, NULL)); + ExpectNotNull(rsa = RSA_generate_key(2048, 65537, NULL, NULL)); ExpectIntEQ(RSA_print(bio, rsa, 0), 1); ExpectIntEQ(RSA_print(bio, rsa, 4), 1); @@ -644,11 +644,11 @@ int test_wolfSSL_RSA_meth(void) RSA_METHOD *rsa_meth = NULL; #ifdef WOLFSSL_KEY_GEN - ExpectNotNull(rsa = RSA_generate_key(2048, 3, NULL, NULL)); + ExpectNotNull(rsa = RSA_generate_key(2048, 65537, NULL, NULL)); RSA_free(rsa); rsa = NULL; #else - ExpectNull(rsa = RSA_generate_key(2048, 3, NULL, NULL)); + ExpectNull(rsa = RSA_generate_key(2048, 65537, NULL, NULL)); #endif ExpectNotNull(RSA_get_default_method()); diff --git a/tests/api/test_slhdsa.c b/tests/api/test_slhdsa.c index 988bbc579e0..922fc686da7 100644 --- a/tests/api/test_slhdsa.c +++ b/tests/api/test_slhdsa.c @@ -1081,12 +1081,14 @@ int test_wc_slhdsa_sign_hash(void) WC_HASH_TYPE_SHA256, sig, sigLen), WC_NO_ERR_TRACE(BAD_LENGTH_E)); - /* Unsupported hashType (FIPS 205 doesn't list WC_HASH_TYPE_NONE) hits - * the default branch of slhdsakey_validate_prehash. */ + /* WC_HASH_TYPE_NONE (pure SLH-DSA sentinel) is never a valid pre-hash + * (FIPS 205 Section 10.2.2 / Table 9), so HashSLH-DSA signing rejects it + * with an explicit early check (BAD_FUNC_ARG), not via the + * slhdsa_check_hash_for_n() switch default. */ sigLen = WC_SLHDSA_MAX_SIG_LEN; ExpectIntEQ(wc_SlhDsaKey_SignHash(&key, ctx, sizeof(ctx), hash, 32, WC_HASH_TYPE_NONE, sig, &sigLen, &rng), - WC_NO_ERR_TRACE(NOT_COMPILED_IN)); + WC_NO_ERR_TRACE(BAD_FUNC_ARG)); /* Test SignHash with SHA-256. */ sigLen = WC_SLHDSA_MAX_SIG_LEN; diff --git a/wolfcrypt/benchmark/fips_cast_bench.c b/wolfcrypt/benchmark/fips_cast_bench.c new file mode 100644 index 00000000000..19b0d7c1bfe --- /dev/null +++ b/wolfcrypt/benchmark/fips_cast_bench.c @@ -0,0 +1,354 @@ +/* fips_cast_bench.c + * + * Copyright (C) 2006-2026 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* FIPS CAST benchmark. + * + * Measures the wall-clock cost of each Conditional Algorithm Self-Test (CAST) + * in the wolfCrypt v7.0.0 FIPS module, so operators can budget power-on + * latency on constrained OEs (DSP, MCU) where each CAST is a boot-time delay. + * + * Compiled only under HAVE_FIPS (include.am BUILD_FIPS gate). Runs + * wc_RunCast_fips(id) per CAST, reporting mean/stddev/min/max plus the total + * for one wc_RunAllCast_fips() pass (the cost callers pay at app start). + * + * Citations: + * FIPS 140-3 sec 7.10 (Self-Tests) - CAST framework + * FIPS 140-3 IG 10.3.A - Algorithm-by-algorithm CAST coverage + * ISO/IEC 19790:2012 sec 7.10.2 - Conditional self-test execution + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#if !defined(WOLFSSL_USER_SETTINGS) && !defined(WOLFSSL_NO_OPTIONS_H) + #include +#endif +#include /* also picks up user_settings.h */ + +/* wc_RunCast_fips() / wc_RunAllCast_fips() are v7.0.0-only; older 140-3 + * modules (v5.x, v6.0.0) and FIPSv2 do not export them, so an older-flavor + * fips/ tree swapped in by fips-check.sh would fail to link. Gate on + * FIPS_VERSION3_GE(7,0,0); older flavors use the empty-main stub below so the + * build still produces an executable. */ +#if defined(HAVE_FIPS) && FIPS_VERSION3_GE(7,0,0) + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifdef _WIN32 + #define WIN32_LEAN_AND_MEAN + #include +#else + #include +#endif + + +#define BENCH_DEFAULT_ITERS 10 + +/* Map FIPS_CAST_* enum value to a printable name. Kept in sync with + * wolfssl/wolfcrypt/fips_test.h FipsCastId enum. */ +static const char* cast_name(int id) +{ + switch (id) { + case FIPS_CAST_AES_CBC: return "AES-CBC"; + case FIPS_CAST_AES_GCM: return "AES-GCM"; + case FIPS_CAST_HMAC_SHA1: return "HMAC-SHA-1"; + case FIPS_CAST_HMAC_SHA2_256: return "HMAC-SHA2-256"; + case FIPS_CAST_HMAC_SHA2_512: return "HMAC-SHA2-512"; + case FIPS_CAST_HMAC_SHA3_256: return "HMAC-SHA3-256"; + case FIPS_CAST_DRBG: return "DRBG (SHA-256)"; + case FIPS_CAST_RSA_SIGN_PKCS1v15: return "RSA-SIGN-PKCS1v15"; + case FIPS_CAST_ECC_CDH: return "ECC-CDH"; + case FIPS_CAST_ECC_PRIMITIVE_Z: return "ECC-Primitive-Z"; + case FIPS_CAST_DH_PRIMITIVE_Z: return "DH-Primitive-Z"; + case FIPS_CAST_ECDSA: return "ECDSA"; + case FIPS_CAST_KDF_TLS12: return "KDF-TLS12"; + case FIPS_CAST_KDF_TLS13: return "KDF-TLS13"; + case FIPS_CAST_KDF_SSH: return "KDF-SSH"; +#if defined(FIPS_VERSION_GE) && FIPS_VERSION_GE(6,0) + case FIPS_CAST_KDF_SRTP: return "KDF-SRTP"; + case FIPS_CAST_ED25519: return "Ed25519"; + case FIPS_CAST_ED448: return "Ed448"; + case FIPS_CAST_PBKDF2: return "PBKDF2"; +#endif +#if defined(FIPS_VERSION_GE) && FIPS_VERSION_GE(7,0) + case FIPS_CAST_AES_ECB: return "AES-ECB"; + case FIPS_CAST_ML_KEM: return "ML-KEM"; + case FIPS_CAST_ML_DSA: return "ML-DSA"; + case FIPS_CAST_LMS: return "LMS"; + case FIPS_CAST_XMSS: return "XMSS"; + case FIPS_CAST_DRBG_SHA512: return "DRBG (SHA-512)"; + case FIPS_CAST_SLH_DSA: return "SLH-DSA"; + case FIPS_CAST_AES_CMAC: return "AES-CMAC"; + case FIPS_CAST_SHAKE: return "SHAKE"; + case FIPS_CAST_AES_KW: return "AES-KW"; +#endif + default: return "(unknown)"; + } +} + + +/* Monotonic clock in nanoseconds. POSIX clock_gettime(CLOCK_MONOTONIC) on + * Unix-like systems; QueryPerformanceCounter on Windows. */ +static long long now_ns(void) +{ +#ifdef _WIN32 + static LARGE_INTEGER freq = { 0 }; + LARGE_INTEGER count; + if (freq.QuadPart == 0) + QueryPerformanceFrequency(&freq); + QueryPerformanceCounter(&count); + /* Multiply before divide to keep precision; freq is typically 10MHz. */ + return (long long)((count.QuadPart * 1000000000LL) / freq.QuadPart); +#else + struct timespec ts; + if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) + return 0; + return (long long)ts.tv_sec * 1000000000LL + (long long)ts.tv_nsec; +#endif +} + + +/* Run a single CAST iters times, populate stats (in milliseconds). + * Returns 0 on success, non-zero on first CAST failure. */ +static int run_one_cast(int id, int iters, + double* out_mean_ms, double* out_stddev_ms, + double* out_min_ms, double* out_max_ms) +{ + int i; + long long total = 0; + long long mn = LLONG_MAX; + long long mx = 0; + long long* samples; + double mean_ns; + double variance_acc = 0.0; + + if (iters <= 0) + return BAD_FUNC_ARG; + + samples = (long long*)XMALLOC((size_t)iters * sizeof(long long), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (samples == NULL) + return MEMORY_E; + + for (i = 0; i < iters; i++) { + long long t0, t1, dt; + int rc; + + t0 = now_ns(); + rc = wc_RunCast_fips(id); + t1 = now_ns(); + if (rc != 0) { + XFREE(samples, NULL, DYNAMIC_TYPE_TMP_BUFFER); + return rc; + } + dt = t1 - t0; + if (dt < 0) + dt = 0; + samples[i] = dt; + total += dt; + if (dt < mn) + mn = dt; + if (dt > mx) + mx = dt; + } + + mean_ns = (double)total / (double)iters; + for (i = 0; i < iters; i++) { + double d = (double)samples[i] - mean_ns; + variance_acc += d * d; + } + XFREE(samples, NULL, DYNAMIC_TYPE_TMP_BUFFER); + + *out_mean_ms = mean_ns / 1.0e6; + *out_stddev_ms = sqrt(variance_acc / (double)iters) / 1.0e6; + *out_min_ms = (double)mn / 1.0e6; + *out_max_ms = (double)mx / 1.0e6; + return 0; +} + + +static void usage(const char* prog) +{ + printf("usage: %s [-i ITERS] [-c CAST_ID] [-l]\n", prog); + printf(" -i ITERS iterations per CAST (default %d)\n", + BENCH_DEFAULT_ITERS); + printf(" -c CAST_ID benchmark only the named CAST id\n"); + printf(" -l list CAST ids and names; do not run\n"); + printf(" -h show this help\n"); +} + + +int main(int argc, char** argv) +{ + int iters = BENCH_DEFAULT_ITERS; + int single = -1; + int list_only = 0; + int i; + int first, last; + int failures = 0; + int run_count = 0; + double total_mean_ms = 0.0; + + for (i = 1; i < argc; i++) { + if (XSTRCMP(argv[i], "-i") == 0 && i + 1 < argc) { + iters = atoi(argv[++i]); + if (iters <= 0) { + fprintf(stderr, "-i requires a positive iteration count\n"); + return 2; + } + } else if (XSTRCMP(argv[i], "-c") == 0 && i + 1 < argc) { + single = atoi(argv[++i]); + } else if (XSTRCMP(argv[i], "-l") == 0) { + list_only = 1; + } else if (XSTRCMP(argv[i], "-h") == 0 + || XSTRCMP(argv[i], "--help") == 0) { + usage(argv[0]); + return 0; + } else { + fprintf(stderr, "unknown argument: %s\n", argv[i]); + usage(argv[0]); + return 2; + } + } + + if (list_only) { + printf("FIPS CAST IDs (FIPS_CAST_COUNT = %d):\n", FIPS_CAST_COUNT); + for (i = 0; i < FIPS_CAST_COUNT; i++) + printf(" %2d %s\n", i, cast_name(i)); + return 0; + } + + if (single >= 0 && single >= FIPS_CAST_COUNT) { + fprintf(stderr, "CAST id %d out of range (0..%d)\n", + single, FIPS_CAST_COUNT - 1); + return 2; + } + + printf("wolfCrypt FIPS CAST benchmark\n"); + printf("Library version: %s\n", LIBWOLFSSL_VERSION_STRING); + printf("FIPS_CAST_COUNT: %d\n", FIPS_CAST_COUNT); + printf("Iterations per CAST: %d\n", iters); + printf("Clock: %s\n", +#ifdef _WIN32 + "QueryPerformanceCounter" +#else + "clock_gettime(CLOCK_MONOTONIC)" +#endif + ); + printf("\n"); + + /* Register the default DRBG seed callback (mirrors benchmark.c and + * wolfcrypt/test/test.c). Under WC_RNG_SEED_CB (set by the FIPS optest + * CFLAGS) the RNG needs a seed generator before _InitRng can build a + * working DRBG; without it, wc_InitRng in the ECC_PRIMITIVE_Z and ECDSA + * CASTs returns -199 (RNG_FAILURE_E) and dependent CASTs cascade-fail. */ +#ifdef WC_RNG_SEED_CB + { + int seed_cb_rc = wc_SetSeed_Cb(WC_GENERATE_SEED_DEFAULT); + if (seed_cb_rc != 0) { + fprintf(stderr, + "wc_SetSeed_Cb returned %d - DRBG-using CASTs will fail.\n", + seed_cb_rc); + } + } +#endif + + /* Prime every CAST once via wc_RunAllCast_fips() so each reaches + * FIPS_CAST_STATE_SUCCESS before measuring. This isolates per-CAST KAT + * runtime from the recursive-CAST init chain a cold CAST triggers when + * its KAT calls FIPS primitives whose own CASTs are still in INIT. + * Customers calling wc_RunAllCast_fips() at boot pay this once, so + * priming matches that real-world workflow. */ + { + int prime_rc = wc_RunAllCast_fips(); + if (prime_rc != 0) { + fprintf(stderr, + "wc_RunAllCast_fips() prime returned %d - some CASTs may have failed.\n" + "Per-CAST measurements continue but failed CASTs will report errors.\n\n", + prime_rc); + } + } + + printf("ID | Name | Mean(ms) | StdDev(ms) | Min(ms) " + "| Max(ms)\n"); + printf("---+---------------------+----------+------------+---------" + "+---------\n"); + + first = (single >= 0) ? single : 0; + last = (single >= 0) ? single + 1 : FIPS_CAST_COUNT; + + for (i = first; i < last; i++) { + double mean_ms = 0, sd_ms = 0, mn_ms = 0, mx_ms = 0; + int rc = run_one_cast(i, iters, &mean_ms, &sd_ms, &mn_ms, &mx_ms); + if (rc != 0) { + printf("%2d | %-19s | FAILED rc=%d (%s)\n", + i, cast_name(i), rc, wc_GetErrorString(rc)); + failures++; + continue; + } + printf("%2d | %-19s | %8.3f | %10.3f | %7.3f | %7.3f\n", + i, cast_name(i), mean_ms, sd_ms, mn_ms, mx_ms); + total_mean_ms += mean_ms; + run_count++; + } + + printf("\n"); + if (run_count > 0) { + printf("Sum of mean CAST times (one wc_RunAllCast_fips() pass): " + "%.3f ms\n", total_mean_ms); + } + if (failures > 0) { + printf("WARN: %d CAST(s) failed.\n", failures); + return 1; + } + return 0; +} + +#else /* !(HAVE_FIPS && FIPS_VERSION3_GE(7,0,0)) */ + +#include + +int main(void) +{ +#ifndef HAVE_FIPS + fprintf(stderr, + "fips_cast_bench: built without HAVE_FIPS - nothing to measure\n"); +#else + fprintf(stderr, + "fips_cast_bench: requires v7.0.0+ FIPS module " + "(wc_RunCast_fips / wc_RunAllCast_fips were added in v7) - " + "nothing to measure on this older module flavor\n"); +#endif + return 0; +} + +#endif /* HAVE_FIPS && FIPS_VERSION3_GE(7,0,0) */ diff --git a/wolfcrypt/benchmark/include.am b/wolfcrypt/benchmark/include.am index 22cecbdaefe..130343a14e1 100644 --- a/wolfcrypt/benchmark/include.am +++ b/wolfcrypt/benchmark/include.am @@ -10,6 +10,16 @@ wolfcrypt_benchmark_benchmark_LDADD = src/libwolfssl@LIBSUFFIX@.la $(LIB_ wolfcrypt_benchmark_benchmark_DEPENDENCIES = src/libwolfssl@LIBSUFFIX@.la noinst_HEADERS += wolfcrypt/benchmark/benchmark.h +# FIPS CAST benchmark - measures wc_RunCast_fips() execution time per CAST. +# Helps operators of resource-constrained operational environments budget +# module power-on latency. Compiled only when FIPS is enabled. +if BUILD_FIPS +noinst_PROGRAMS += wolfcrypt/benchmark/fips_cast_bench +wolfcrypt_benchmark_fips_cast_bench_SOURCES = wolfcrypt/benchmark/fips_cast_bench.c +wolfcrypt_benchmark_fips_cast_bench_LDADD = src/libwolfssl@LIBSUFFIX@.la $(LIB_STATIC_ADD) -lm +wolfcrypt_benchmark_fips_cast_bench_DEPENDENCIES = src/libwolfssl@LIBSUFFIX@.la +endif + endif endif diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index 80b94efa14c..11bb62be64d 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -138,6 +138,15 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits #include +/* Dedicated GCM (PCLMUL/GHASH) asm is x86_64-only: the 32-bit aes_gcm_x86_asm.S + * is not position-independent and its .text relocations break the FIPS module + * in-core integrity in a shared object. 32-bit x86 GCM uses portable-C GHASH + * with AES-NI block encryption -- mirrors x86_64-gating of the GCM-AVX path. + * Kaleb 2026-06-17; see memory 32bit-x86-aesni-enablement. */ +#if defined(WOLFSSL_AESNI) && defined(WOLFSSL_X86_64_BUILD) + #define WC_AESNI_GCM +#endif + #ifdef WOLF_CRYPTO_CB #include #endif @@ -1161,6 +1170,122 @@ static void Check_CPU_support_HwCrypto(Aes* aes) } #endif /* __aarch64__ && !WOLFSSL_ARMASM_NO_HW_CRYPTO */ +/* In a Linux kernel module the 32-bit ARM AES asm (ARMv8 AArch32 AES/PMULL + + * NEON) MUST run between kernel_neon_begin()/end() or the first SIMD instruction + * faults "undefined instruction" (arm64 tolerates it, so wolfSSL never bracketed + * it). Wrap every AES_*_AARCH32 entry with SAVE/RESTORE_VECTOR_REGISTERS and + * #define-redirect the call sites below (defined first to avoid recursion). + * Scoped to !__aarch64__ so the aarch64 path is byte-identical. (FIPS 197 AES, + * SP 800-38D AES-GCM.) On save failure (process-context only) the op is + * skipped, not crashed -- never reached by POST/optest/harness. */ +#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && !defined(__aarch64__) && \ + !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) + static WC_INLINE void wc_svr_AES_set_key_AARCH32(const byte* userKey, + int keylen, byte* key, int dir) { + if (SAVE_VECTOR_REGISTERS2() != 0) return; + AES_set_key_AARCH32(userKey, keylen, key, dir); + RESTORE_VECTOR_REGISTERS(); + } + static WC_INLINE void wc_svr_AES_encrypt_AARCH32(const byte* inBlock, + byte* outBlock, byte* key, int nr) { + if (SAVE_VECTOR_REGISTERS2() != 0) return; + AES_encrypt_AARCH32(inBlock, outBlock, key, nr); + RESTORE_VECTOR_REGISTERS(); + } + static WC_INLINE void wc_svr_AES_decrypt_AARCH32(const byte* inBlock, + byte* outBlock, byte* key, int nr) { + if (SAVE_VECTOR_REGISTERS2() != 0) return; + AES_decrypt_AARCH32(inBlock, outBlock, key, nr); + RESTORE_VECTOR_REGISTERS(); + } + static WC_INLINE void wc_svr_AES_encrypt_blocks_AARCH32(const byte* in, + byte* out, word32 sz, byte* key, int nr) { + if (SAVE_VECTOR_REGISTERS2() != 0) return; + AES_encrypt_blocks_AARCH32(in, out, sz, key, nr); + RESTORE_VECTOR_REGISTERS(); + } + static WC_INLINE void wc_svr_AES_decrypt_blocks_AARCH32(const byte* in, + byte* out, word32 sz, byte* key, int nr) { + if (SAVE_VECTOR_REGISTERS2() != 0) return; + AES_decrypt_blocks_AARCH32(in, out, sz, key, nr); + RESTORE_VECTOR_REGISTERS(); + } + static WC_INLINE void wc_svr_AES_CBC_encrypt_AARCH32(const byte* in, + byte* out, word32 sz, byte* reg, byte* key, int rounds) { + if (SAVE_VECTOR_REGISTERS2() != 0) return; + AES_CBC_encrypt_AARCH32(in, out, sz, reg, key, rounds); + RESTORE_VECTOR_REGISTERS(); + } + static WC_INLINE void wc_svr_AES_CBC_decrypt_AARCH32(const byte* in, + byte* out, word32 sz, byte* reg, byte* key, int rounds) { + if (SAVE_VECTOR_REGISTERS2() != 0) return; + AES_CBC_decrypt_AARCH32(in, out, sz, reg, key, rounds); + RESTORE_VECTOR_REGISTERS(); + } + static WC_INLINE void wc_svr_AES_CTR_encrypt_AARCH32(const byte* in, + byte* out, word32 sz, byte* reg, byte* key, byte* tmp, word32* left, + word32 rounds) { + if (SAVE_VECTOR_REGISTERS2() != 0) return; + AES_CTR_encrypt_AARCH32(in, out, sz, reg, key, tmp, left, rounds); + RESTORE_VECTOR_REGISTERS(); + } + static WC_INLINE void wc_svr_AES_GCM_set_key_AARCH32(const byte* nonce, + const byte* key, byte* gcm_h, int nr) { + if (SAVE_VECTOR_REGISTERS2() != 0) return; + AES_GCM_set_key_AARCH32(nonce, key, gcm_h, nr); + RESTORE_VECTOR_REGISTERS(); + } + static WC_INLINE void wc_svr_AES_GCM_encrypt_AARCH32(const byte* in, + byte* out, word32 sz, const byte* nonce, word32 nonceSz, byte* tag, + word32 tagSz, const byte* aad, word32 aadSz, byte* key, byte* gcm_h, + byte* tmp, byte* reg, int nr) { + if (SAVE_VECTOR_REGISTERS2() != 0) return; + AES_GCM_encrypt_AARCH32(in, out, sz, nonce, nonceSz, tag, tagSz, aad, + aadSz, key, gcm_h, tmp, reg, nr); + RESTORE_VECTOR_REGISTERS(); + } + static WC_INLINE int wc_svr_AES_GCM_decrypt_AARCH32(const byte* in, + byte* out, word32 sz, const byte* nonce, word32 nonceSz, const byte* tag, + word32 tagSz, const byte* aad, word32 aadSz, byte* key, byte* gcm_h, + byte* tmp, byte* reg, int nr) { + int _ret, _svr = SAVE_VECTOR_REGISTERS2(); + if (_svr != 0) return _svr; + _ret = AES_GCM_decrypt_AARCH32(in, out, sz, nonce, nonceSz, tag, tagSz, + aad, aadSz, key, gcm_h, tmp, reg, nr); + RESTORE_VECTOR_REGISTERS(); + return _ret; + } + #define AES_set_key_AARCH32 wc_svr_AES_set_key_AARCH32 + #define AES_encrypt_AARCH32 wc_svr_AES_encrypt_AARCH32 + #define AES_decrypt_AARCH32 wc_svr_AES_decrypt_AARCH32 + #define AES_encrypt_blocks_AARCH32 wc_svr_AES_encrypt_blocks_AARCH32 + #define AES_decrypt_blocks_AARCH32 wc_svr_AES_decrypt_blocks_AARCH32 + #define AES_CBC_encrypt_AARCH32 wc_svr_AES_CBC_encrypt_AARCH32 + #define AES_CBC_decrypt_AARCH32 wc_svr_AES_CBC_decrypt_AARCH32 + #define AES_CTR_encrypt_AARCH32 wc_svr_AES_CTR_encrypt_AARCH32 + #define AES_GCM_set_key_AARCH32 wc_svr_AES_GCM_set_key_AARCH32 + #define AES_GCM_encrypt_AARCH32 wc_svr_AES_GCM_encrypt_AARCH32 + #define AES_GCM_decrypt_AARCH32 wc_svr_AES_GCM_decrypt_AARCH32 + #ifdef WOLFSSL_AES_XTS + static WC_INLINE void wc_svr_AES_XTS_encrypt_AARCH32(const byte* in, + byte* out, word32 sz, const byte* i, byte* key, byte* key2, byte* tmp, + int nr) { + if (SAVE_VECTOR_REGISTERS2() != 0) return; + AES_XTS_encrypt_AARCH32(in, out, sz, i, key, key2, tmp, nr); + RESTORE_VECTOR_REGISTERS(); + } + static WC_INLINE void wc_svr_AES_XTS_decrypt_AARCH32(const byte* in, + byte* out, word32 sz, const byte* i, byte* key, byte* key2, byte* tmp, + int nr) { + if (SAVE_VECTOR_REGISTERS2() != 0) return; + AES_XTS_decrypt_AARCH32(in, out, sz, i, key, key2, tmp, nr); + RESTORE_VECTOR_REGISTERS(); + } + #define AES_XTS_encrypt_AARCH32 wc_svr_AES_XTS_encrypt_AARCH32 + #define AES_XTS_decrypt_AARCH32 wc_svr_AES_XTS_decrypt_AARCH32 + #endif /* WOLFSSL_AES_XTS */ +#endif /* WOLFSSL_USE_SAVE_VECTOR_REGISTERS && !__aarch64__ && !NO_HW_CRYPTO */ + #if defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESCCM) || \ defined(WOLFSSL_AESGCM_STREAM) static WARN_UNUSED_RESULT int wc_AesEncrypt(Aes* aes, const byte* inBlock, @@ -4819,6 +4944,13 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt(Aes* aes, const byte* inBlock, static int AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv, int dir) { + /* Reject invalid AES key lengths early (FIPS 197: 128/192/256 only). + * wc_AesSetKeyDirect only bounds-checks keylen, so without this a + * zero/invalid keylen reaches here on 32-bit ARM armasm; the C path + * rejects it in wc_AesSetKeyLocal; check early and BAD_FUNC_ARG out. */ + if (userKey == NULL || (keylen != 16 && keylen != 24 && keylen != 32)) { + return BAD_FUNC_ARG; + } #if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB) || \ defined(WOLFSSL_AES_OFB) || defined(WOLFSSL_AES_XTS) || \ defined(WOLFSSL_AES_CTS) @@ -8082,8 +8214,15 @@ static WC_INLINE void IncrementGcmCounter(byte* inOutCtr) #endif #endif /* !FREESCALE_LTC_AES_GCM */ +/* SP 800-38D AES-GCM software GHASH (FlattenSzInBits length block, RIGHTSHIFTX, + * GCM table GMULT/GHASH below). On 32-bit ARM --enable-armasm the HW GCM only + * accelerates the one-shot path; the streaming GHASH has no 32-bit asm (only + * __aarch64__ + PMULL), so it uses this software path. Compile this block when + * WOLFSSL_AESGCM_STREAM is set even for arm32 armasm, else GHASH_FINAL fallback + * GHASH_LEN_BLOCK references an undefined FlattenSzInBits (arm64 gets it via + * __aarch64__). */ #if !defined(WOLFSSL_ARMASM) || defined(__aarch64__) || \ - defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) + defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) || defined(WOLFSSL_AESGCM_STREAM) #if defined(GCM_SMALL) || defined(GCM_TABLE) || defined(GCM_TABLE_4BIT) static WC_INLINE void FlattenSzInBits(byte* buf, word32 sz) @@ -8254,7 +8393,7 @@ void GenerateM0(Gcm* gcm) #endif #endif -#if defined(WOLFSSL_AESNI) && defined(GCM_TABLE_4BIT) && \ +#if defined(WC_AESNI_GCM) && defined(GCM_TABLE_4BIT) && \ defined(WC_C_DYNAMIC_FALLBACK) void GCM_generate_m0_aesni(const unsigned char *h, unsigned char *m) XASM_LINK("GCM_generate_m0_aesni"); @@ -8342,6 +8481,25 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) #ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO #if !defined(__aarch64__) AES_GCM_set_key_AARCH32(iv, (byte*)aes->key, aes->gcm.H, aes->rounds); + #if defined(GCM_TABLE) || defined(GCM_TABLE_4BIT) + { + /* The 32-bit ARM asm GCM keeps gcm->H in the PMULL (bit-reflected) + * form for its one-shot GHASH (vmull.p64). The C streaming GHASH + * uses gcm->M0, which GenerateM0() builds from the *standard* + * H = E_K(0) order -- not derivable from the PMULL H. Recompute the + * standard H into gcm->H, build M0, then restore the PMULL H for the + * one-shot path; without this streaming AES-GCM mis-authenticates + * (AES_GCM_AUTH_E). SP 800-38D AES-GCM GHASH. */ + ALIGN16 byte gcmStdH[WC_AES_BLOCK_SIZE]; + ALIGN16 byte gcmZero[WC_AES_BLOCK_SIZE]; + XMEMSET(gcmZero, 0, WC_AES_BLOCK_SIZE); + XMEMCPY(gcmStdH, aes->gcm.H, WC_AES_BLOCK_SIZE); + AES_encrypt_AARCH32(gcmZero, aes->gcm.H, (byte*)aes->key, + (int)aes->rounds); + GenerateM0(&aes->gcm); + XMEMCPY(aes->gcm.H, gcmStdH, WC_AES_BLOCK_SIZE); + } + #endif #else if (aes->use_aes_hw_crypto && aes->use_pmull_hw_crypto) { AES_GCM_set_key_AARCH64(iv, (byte*)aes->key, aes->gcm.H, @@ -8384,7 +8542,7 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) if (ret == 0) { #if defined(GCM_TABLE) || defined(GCM_TABLE_4BIT) - #if defined(WOLFSSL_AESNI) && defined(GCM_TABLE_4BIT) + #if defined(WC_AESNI_GCM) && defined(GCM_TABLE_4BIT) if (aes->use_aesni) { #if defined(WC_C_DYNAMIC_FALLBACK) #ifdef HAVE_INTEL_AVX2 @@ -8445,7 +8603,8 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) } -#ifdef WOLFSSL_AESNI + +#ifdef WC_AESNI_GCM void AES_GCM_encrypt_aesni(const unsigned char *in, unsigned char *out, const unsigned char* addt, const unsigned char* ivec, @@ -8533,8 +8692,13 @@ void AES_GCM_decrypt_vaes(const unsigned char *in, unsigned char *out, #endif /* WOLFSSL_AESNI */ +/* SP 800-38D software GHASH (GMULT / GHASH / GHASH_ONE_BLOCK_SW per table mode). + * As with FlattenSzInBits above, 32-bit ARM --enable-armasm has no asm streaming + * GHASH (only __aarch64__ + PMULL), so WOLFSSL_AESGCM_STREAM needs these symbols + * compiled even for arm32 armasm. Widen the guard (no effect on x86 / arm64, + * which already satisfy it -> their in-core hash is unchanged). */ #if !defined(WOLFSSL_ARMASM) || defined(__aarch64__) || \ - defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) + defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) || defined(WOLFSSL_AESGCM_STREAM) #if defined(GCM_SMALL) static void GMULT(byte* X, byte* Y) { @@ -10905,7 +11069,7 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, ret = AES_GCM_encrypt_ASM(aes, out, in, sz, iv, ivSz, authTag, authTagSz, authIn, authInSz); #else -#ifdef WOLFSSL_AESNI +#ifdef WC_AESNI_GCM if (aes->use_aesni) { #ifdef HAVE_INTEL_AVX512 if ((sz >= WC_AES_BLOCK_SIZE * WC_VAES_GCM_MIN_BLOCKS) && @@ -11538,7 +11702,7 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, const byte* authIn, word32 authInSz) { int ret; -#ifdef WOLFSSL_AESNI +#ifdef WC_AESNI_GCM int res = WC_NO_ERR_TRACE(AES_GCM_AUTH_E); #endif @@ -11687,7 +11851,7 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, authTagSz, authIn, authInSz); } #else -#ifdef WOLFSSL_AESNI +#ifdef WC_AESNI_GCM if (aes->use_aesni) { #ifdef HAVE_INTEL_AVX512 if ((sz >= WC_AES_BLOCK_SIZE * WC_VAES_GCM_MIN_BLOCKS) && @@ -11754,6 +11918,15 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, VECTOR_REGISTERS_POP; + /* FIPS 140-3 / SP 800-38D: on authentication failure, decrypted-but- + * unauthenticated plaintext in `out` must not be released to the caller. + * Wipe it so a caller that ignores the return value cannot observe plaintext + * from forged ciphertext. All paths (AES-NI, AVX1/2, ARM HW/NEON, C + * fallback) funnel through `ret` here, covering every sub-implementation. */ + if (ret == WC_NO_ERR_TRACE(AES_GCM_AUTH_E) && out != NULL && sz > 0) { + ForceZero(out, sz); + } + return ret; } #endif @@ -11922,7 +12095,7 @@ static WARN_UNUSED_RESULT int AesGcmFinal_C( return 0; } -#ifdef WOLFSSL_AESNI +#ifdef WC_AESNI_GCM #ifdef __cplusplus extern "C" { @@ -13403,7 +13576,7 @@ int wc_AesGcmInit(Aes* aes, const byte* key, word32 len, const byte* iv, if (iv != NULL) { /* Initialize with the IV. */ - #ifdef WOLFSSL_AESNI + #ifdef WC_AESNI_GCM if (aes->use_aesni) { SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = AesGcmInit_aesni(aes, iv, ivSz); @@ -13530,7 +13703,7 @@ int wc_AesGcmEncryptUpdate(Aes* aes, byte* out, const byte* in, word32 sz, if (ret == 0) { /* Encrypt with AAD and/or plaintext. */ - #ifdef WOLFSSL_AESNI + #ifdef WC_AESNI_GCM if (aes->use_aesni) { SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = AesGcmEncryptUpdate_aesni(aes, out, in, sz, authIn, authInSz); @@ -13592,7 +13765,7 @@ int wc_AesGcmEncryptFinal(Aes* aes, byte* authTag, word32 authTagSz) if (ret == 0) { /* Calculate authentication tag. */ - #ifdef WOLFSSL_AESNI + #ifdef WC_AESNI_GCM if (aes->use_aesni) { SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = AesGcmEncryptFinal_aesni(aes, authTag, authTagSz); @@ -13676,7 +13849,7 @@ int wc_AesGcmDecryptUpdate(Aes* aes, byte* out, const byte* in, word32 sz, if (ret == 0) { /* Decrypt with AAD and/or cipher text. */ - #ifdef WOLFSSL_AESNI + #ifdef WC_AESNI_GCM if (aes->use_aesni) { SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = AesGcmDecryptUpdate_aesni(aes, out, in, sz, authIn, authInSz); @@ -13736,7 +13909,7 @@ int wc_AesGcmDecryptFinal(Aes* aes, const byte* authTag, word32 authTagSz) if (ret == 0) { /* Calculate authentication tag and compare with one passed in.. */ - #ifdef WOLFSSL_AESNI + #ifdef WC_AESNI_GCM if (aes->use_aesni) { SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = AesGcmDecryptFinal_aesni(aes, authTag, authTagSz); @@ -13763,6 +13936,10 @@ int wc_AesGcmDecryptFinal(Aes* aes, const byte* authTag, word32 authTagSz) } } + /* Final cannot zeroize prior Update output buffers (it does not see them). + * On AES_GCM_AUTH_E the caller must treat all Update-produced plaintext as + * invalid and wipe it. See PL-R34 Security Policy section 8 (Operational + * Rules). */ return ret; } #endif /* HAVE_AES_DECRYPT || HAVE_AESGCM_DECRYPT */ diff --git a/wolfcrypt/src/aes_xts_x86_asm.S b/wolfcrypt/src/aes_xts_x86_asm.S new file mode 100644 index 00000000000..253d755fd66 --- /dev/null +++ b/wolfcrypt/src/aes_xts_x86_asm.S @@ -0,0 +1,840 @@ +/* aes_xts_x86_asm + * + * Copyright (C) 2006-2026 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifdef WOLFSSL_USER_SETTINGS +#include "wolfssl/wolfcrypt/settings.h" +#endif + +#ifndef HAVE_INTEL_AVX1 +#define HAVE_INTEL_AVX1 +#endif /* HAVE_INTEL_AVX1 */ +#ifndef NO_AVX2_SUPPORT +#ifndef HAVE_INTEL_AVX2 +#define HAVE_INTEL_AVX2 +#endif /* HAVE_INTEL_AVX2 */ +#endif /* NO_AVX2_SUPPORT */ + +#ifdef WOLFSSL_AES_XTS +#ifdef WOLFSSL_X86_BUILD + # 32-bit Intel (i386) AES-NI AES-XTS: single-block ports of the x86_64 + # AES_XTS_*_aesni routines, same algorithm and KAT output but limited to + # xmm0-7 and the i386 cdecl stack ABI (no xmm8-15/r8-r15, no 4-block + # pipeline). Still AES-NI-accelerated. Added 2026-06-17 so 32-bit Intel + # builds (host_cpu=x86 -> WOLFSSL_X86_BUILD) link and run AES-XTS. The + # GF(2^128) constant {0x87,1,1,1} is built on the stack (PIC-safe; a 32-bit + # shared object would otherwise need a GOT relocation for a .data constant). + # void AES_XTS_init_aesni(unsigned char* i, const unsigned char* tweak_key, + # int tweak_nr); +.text +.globl AES_XTS_init_aesni +.type AES_XTS_init_aesni,@function +.align 16 +AES_XTS_init_aesni: + movl 4(%esp), %eax + movdqu (%eax), %xmm2 + movl 8(%esp), %ecx + pxor (%ecx), %xmm2 + movdqu 16(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + movdqu 32(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + movdqu 48(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + movdqu 64(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + movdqu 80(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + movdqu 96(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + movdqu 112(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + movdqu 128(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + movdqu 144(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + cmpl $11, 12(%esp) + movdqu 160(%ecx), %xmm0 + jl L_AES_XTS_init_aesni_enclast_1 + aesenc %xmm0, %xmm2 + movdqu 176(%ecx), %xmm1 + aesenc %xmm1, %xmm2 + cmpl $13, 12(%esp) + movdqu 192(%ecx), %xmm0 + jl L_AES_XTS_init_aesni_enclast_1 + aesenc %xmm0, %xmm2 + movdqu 208(%ecx), %xmm1 + aesenc %xmm1, %xmm2 + movdqu 224(%ecx), %xmm0 +L_AES_XTS_init_aesni_enclast_1: + aesenclast %xmm0, %xmm2 + movdqu %xmm2, (%eax) + ret +.size AES_XTS_init_aesni,.-AES_XTS_init_aesni + # void AES_XTS_encrypt_aesni(const unsigned char* in, unsigned char* out, + # word32 sz, const unsigned char* i, const unsigned char* key, + # const unsigned char* key2, int nr); +.text +.globl AES_XTS_encrypt_aesni +.type AES_XTS_encrypt_aesni,@function +.align 16 +AES_XTS_encrypt_aesni: + pushl %edi + pushl %ebx + subl $16, %esp + movl $0x87, (%esp) + movl $0x01, 4(%esp) + movl $0x01, 8(%esp) + movl $0x01, 12(%esp) + movdqu (%esp), %xmm6 + movl 40(%esp), %eax + movdqu (%eax), %xmm2 + movl 48(%esp), %ecx + pxor (%ecx), %xmm2 + movdqu 16(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + movdqu 32(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + movdqu 48(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + movdqu 64(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + movdqu 80(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + movdqu 96(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + movdqu 112(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + movdqu 128(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + movdqu 144(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + cmpl $11, 52(%esp) + movdqu 160(%ecx), %xmm0 + jl L_AES_XTS_encrypt_aesni_enclast_2 + aesenc %xmm0, %xmm2 + movdqu 176(%ecx), %xmm1 + aesenc %xmm1, %xmm2 + cmpl $13, 52(%esp) + movdqu 192(%ecx), %xmm0 + jl L_AES_XTS_encrypt_aesni_enclast_2 + aesenc %xmm0, %xmm2 + movdqu 208(%ecx), %xmm1 + aesenc %xmm1, %xmm2 + movdqu 224(%ecx), %xmm0 +L_AES_XTS_encrypt_aesni_enclast_2: + aesenclast %xmm0, %xmm2 + xorl %edi, %edi + movl 36(%esp), %edx + andl $0xfffffff0, %edx +L_AES_XTS_encrypt_aesni_loop: + cmpl %edx, %edi + jge L_AES_XTS_encrypt_aesni_loop_done + movl 28(%esp), %eax + movdqu (%eax,%edi,1), %xmm3 + pxor %xmm2, %xmm3 + movl 44(%esp), %ecx + pxor (%ecx), %xmm3 + movdqu 16(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 32(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 48(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 64(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 80(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 96(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 112(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 128(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 144(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + cmpl $11, 52(%esp) + movdqu 160(%ecx), %xmm0 + jl L_AES_XTS_encrypt_aesni_enclast_3 + aesenc %xmm0, %xmm3 + movdqu 176(%ecx), %xmm1 + aesenc %xmm1, %xmm3 + cmpl $13, 52(%esp) + movdqu 192(%ecx), %xmm0 + jl L_AES_XTS_encrypt_aesni_enclast_3 + aesenc %xmm0, %xmm3 + movdqu 208(%ecx), %xmm1 + aesenc %xmm1, %xmm3 + movdqu 224(%ecx), %xmm0 +L_AES_XTS_encrypt_aesni_enclast_3: + aesenclast %xmm0, %xmm3 + pxor %xmm2, %xmm3 + movl 32(%esp), %eax + movdqu %xmm3, (%eax,%edi,1) + movdqa %xmm2, %xmm4 + psrad $31, %xmm4 + pslld $0x01, %xmm2 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm6, %xmm4 + pxor %xmm4, %xmm2 + addl $16, %edi + jmp L_AES_XTS_encrypt_aesni_loop +L_AES_XTS_encrypt_aesni_loop_done: + movl 36(%esp), %eax + cmpl %eax, %edi + je L_AES_XTS_encrypt_aesni_done + subl $16, %edi + movl 32(%esp), %eax + movdqu (%eax,%edi,1), %xmm5 + addl $16, %edi + movdqu %xmm5, (%esp) + xorl %edx, %edx +L_AES_XTS_encrypt_aesni_cts: + movzbl (%esp,%edx,1), %ecx + movl 28(%esp), %eax + movzbl (%eax,%edi,1), %ebx + movl 32(%esp), %eax + movb %cl, (%eax,%edi,1) + movb %bl, (%esp,%edx,1) + incl %edi + incl %edx + movl 36(%esp), %eax + cmpl %eax, %edi + jl L_AES_XTS_encrypt_aesni_cts + subl %edx, %edi + movdqu (%esp), %xmm3 + subl $16, %edi + pxor %xmm2, %xmm3 + movl 44(%esp), %ecx + pxor (%ecx), %xmm3 + movdqu 16(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 32(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 48(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 64(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 80(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 96(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 112(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 128(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 144(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + cmpl $11, 52(%esp) + movdqu 160(%ecx), %xmm0 + jl L_AES_XTS_encrypt_aesni_enclast_4 + aesenc %xmm0, %xmm3 + movdqu 176(%ecx), %xmm1 + aesenc %xmm1, %xmm3 + cmpl $13, 52(%esp) + movdqu 192(%ecx), %xmm0 + jl L_AES_XTS_encrypt_aesni_enclast_4 + aesenc %xmm0, %xmm3 + movdqu 208(%ecx), %xmm1 + aesenc %xmm1, %xmm3 + movdqu 224(%ecx), %xmm0 +L_AES_XTS_encrypt_aesni_enclast_4: + aesenclast %xmm0, %xmm3 + pxor %xmm2, %xmm3 + movl 32(%esp), %eax + movdqu %xmm3, (%eax,%edi,1) +L_AES_XTS_encrypt_aesni_done: + addl $16, %esp + popl %ebx + popl %edi + ret +.size AES_XTS_encrypt_aesni,.-AES_XTS_encrypt_aesni + # void AES_XTS_encrypt_update_aesni(const unsigned char* in, + # unsigned char* out, word32 sz, const unsigned char* key, + # unsigned char* i, int nr); Tweak is read (already encrypted) from *i + # and the advanced tweak written back to *i. +.text +.globl AES_XTS_encrypt_update_aesni +.type AES_XTS_encrypt_update_aesni,@function +.align 16 +AES_XTS_encrypt_update_aesni: + pushl %edi + pushl %ebx + subl $16, %esp + movl $0x87, (%esp) + movl $0x01, 4(%esp) + movl $0x01, 8(%esp) + movl $0x01, 12(%esp) + movdqu (%esp), %xmm6 + movl 44(%esp), %eax + movdqu (%eax), %xmm2 + xorl %edi, %edi + movl 36(%esp), %edx + andl $0xfffffff0, %edx +L_AES_XTS_encrypt_update_aesni_loop: + cmpl %edx, %edi + jge L_AES_XTS_encrypt_update_aesni_loop_done + movl 28(%esp), %eax + movdqu (%eax,%edi,1), %xmm3 + pxor %xmm2, %xmm3 + movl 40(%esp), %ecx + pxor (%ecx), %xmm3 + movdqu 16(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 32(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 48(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 64(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 80(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 96(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 112(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 128(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 144(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + cmpl $11, 48(%esp) + movdqu 160(%ecx), %xmm0 + jl L_AES_XTS_encrypt_update_aesni_enclast_5 + aesenc %xmm0, %xmm3 + movdqu 176(%ecx), %xmm1 + aesenc %xmm1, %xmm3 + cmpl $13, 48(%esp) + movdqu 192(%ecx), %xmm0 + jl L_AES_XTS_encrypt_update_aesni_enclast_5 + aesenc %xmm0, %xmm3 + movdqu 208(%ecx), %xmm1 + aesenc %xmm1, %xmm3 + movdqu 224(%ecx), %xmm0 +L_AES_XTS_encrypt_update_aesni_enclast_5: + aesenclast %xmm0, %xmm3 + pxor %xmm2, %xmm3 + movl 32(%esp), %eax + movdqu %xmm3, (%eax,%edi,1) + movdqa %xmm2, %xmm4 + psrad $31, %xmm4 + pslld $0x01, %xmm2 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm6, %xmm4 + pxor %xmm4, %xmm2 + addl $16, %edi + jmp L_AES_XTS_encrypt_update_aesni_loop +L_AES_XTS_encrypt_update_aesni_loop_done: + movl 36(%esp), %eax + cmpl %eax, %edi + je L_AES_XTS_encrypt_update_aesni_done + subl $16, %edi + movl 32(%esp), %eax + movdqu (%eax,%edi,1), %xmm5 + addl $16, %edi + movdqu %xmm5, (%esp) + xorl %edx, %edx +L_AES_XTS_encrypt_update_aesni_cts: + movzbl (%esp,%edx,1), %ecx + movl 28(%esp), %eax + movzbl (%eax,%edi,1), %ebx + movl 32(%esp), %eax + movb %cl, (%eax,%edi,1) + movb %bl, (%esp,%edx,1) + incl %edi + incl %edx + movl 36(%esp), %eax + cmpl %eax, %edi + jl L_AES_XTS_encrypt_update_aesni_cts + subl %edx, %edi + movdqu (%esp), %xmm3 + subl $16, %edi + pxor %xmm2, %xmm3 + movl 40(%esp), %ecx + pxor (%ecx), %xmm3 + movdqu 16(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 32(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 48(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 64(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 80(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 96(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 112(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 128(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + movdqu 144(%ecx), %xmm0 + aesenc %xmm0, %xmm3 + cmpl $11, 48(%esp) + movdqu 160(%ecx), %xmm0 + jl L_AES_XTS_encrypt_update_aesni_enclast_6 + aesenc %xmm0, %xmm3 + movdqu 176(%ecx), %xmm1 + aesenc %xmm1, %xmm3 + cmpl $13, 48(%esp) + movdqu 192(%ecx), %xmm0 + jl L_AES_XTS_encrypt_update_aesni_enclast_6 + aesenc %xmm0, %xmm3 + movdqu 208(%ecx), %xmm1 + aesenc %xmm1, %xmm3 + movdqu 224(%ecx), %xmm0 +L_AES_XTS_encrypt_update_aesni_enclast_6: + aesenclast %xmm0, %xmm3 + pxor %xmm2, %xmm3 + movl 32(%esp), %eax + movdqu %xmm3, (%eax,%edi,1) +L_AES_XTS_encrypt_update_aesni_done: + movl 44(%esp), %eax + movdqu %xmm2, (%eax) + addl $16, %esp + popl %ebx + popl %edi + ret +.size AES_XTS_encrypt_update_aesni,.-AES_XTS_encrypt_update_aesni + # void AES_XTS_decrypt_aesni(const unsigned char* in, unsigned char* out, + # word32 sz, const unsigned char* i, const unsigned char* key, + # const unsigned char* key2, int nr); +.text +.globl AES_XTS_decrypt_aesni +.type AES_XTS_decrypt_aesni,@function +.align 16 +AES_XTS_decrypt_aesni: + pushl %edi + pushl %ebx + subl $16, %esp + movl $0x87, (%esp) + movl $0x01, 4(%esp) + movl $0x01, 8(%esp) + movl $0x01, 12(%esp) + movdqu (%esp), %xmm6 + movl 40(%esp), %eax + movdqu (%eax), %xmm2 + movl 48(%esp), %ecx + pxor (%ecx), %xmm2 + movdqu 16(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + movdqu 32(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + movdqu 48(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + movdqu 64(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + movdqu 80(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + movdqu 96(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + movdqu 112(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + movdqu 128(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + movdqu 144(%ecx), %xmm0 + aesenc %xmm0, %xmm2 + cmpl $11, 52(%esp) + movdqu 160(%ecx), %xmm0 + jl L_AES_XTS_decrypt_aesni_enclast_7 + aesenc %xmm0, %xmm2 + movdqu 176(%ecx), %xmm1 + aesenc %xmm1, %xmm2 + cmpl $13, 52(%esp) + movdqu 192(%ecx), %xmm0 + jl L_AES_XTS_decrypt_aesni_enclast_7 + aesenc %xmm0, %xmm2 + movdqu 208(%ecx), %xmm1 + aesenc %xmm1, %xmm2 + movdqu 224(%ecx), %xmm0 +L_AES_XTS_decrypt_aesni_enclast_7: + aesenclast %xmm0, %xmm2 + xorl %edi, %edi + movl 36(%esp), %eax + movl %eax, %edx + andl $0xfffffff0, %edx + cmpl %eax, %edx + je L_AES_XTS_decrypt_aesni_bound + subl $16, %edx +L_AES_XTS_decrypt_aesni_bound: +L_AES_XTS_decrypt_aesni_loop: + cmpl %edx, %edi + jge L_AES_XTS_decrypt_aesni_loop_done + movl 28(%esp), %eax + movdqu (%eax,%edi,1), %xmm3 + pxor %xmm2, %xmm3 + movl 44(%esp), %ecx + pxor (%ecx), %xmm3 + movdqu 16(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 32(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 48(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 64(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 80(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 96(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 112(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 128(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 144(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + cmpl $11, 52(%esp) + movdqu 160(%ecx), %xmm0 + jl L_AES_XTS_decrypt_aesni_declast_8 + aesdec %xmm0, %xmm3 + movdqu 176(%ecx), %xmm1 + aesdec %xmm1, %xmm3 + cmpl $13, 52(%esp) + movdqu 192(%ecx), %xmm0 + jl L_AES_XTS_decrypt_aesni_declast_8 + aesdec %xmm0, %xmm3 + movdqu 208(%ecx), %xmm1 + aesdec %xmm1, %xmm3 + movdqu 224(%ecx), %xmm0 +L_AES_XTS_decrypt_aesni_declast_8: + aesdeclast %xmm0, %xmm3 + pxor %xmm2, %xmm3 + movl 32(%esp), %eax + movdqu %xmm3, (%eax,%edi,1) + movdqa %xmm2, %xmm4 + psrad $31, %xmm4 + pslld $0x01, %xmm2 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm6, %xmm4 + pxor %xmm4, %xmm2 + addl $16, %edi + jmp L_AES_XTS_decrypt_aesni_loop +L_AES_XTS_decrypt_aesni_loop_done: + movl 36(%esp), %eax + cmpl %eax, %edi + je L_AES_XTS_decrypt_aesni_done + movdqa %xmm2, %xmm4 + movdqa %xmm2, %xmm5 + psrad $31, %xmm4 + pslld $0x01, %xmm5 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm6, %xmm4 + pxor %xmm4, %xmm5 + movl 28(%esp), %eax + movdqu (%eax,%edi,1), %xmm3 + pxor %xmm5, %xmm3 + movl 44(%esp), %ecx + pxor (%ecx), %xmm3 + movdqu 16(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 32(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 48(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 64(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 80(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 96(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 112(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 128(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 144(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + cmpl $11, 52(%esp) + movdqu 160(%ecx), %xmm0 + jl L_AES_XTS_decrypt_aesni_declast_9 + aesdec %xmm0, %xmm3 + movdqu 176(%ecx), %xmm1 + aesdec %xmm1, %xmm3 + cmpl $13, 52(%esp) + movdqu 192(%ecx), %xmm0 + jl L_AES_XTS_decrypt_aesni_declast_9 + aesdec %xmm0, %xmm3 + movdqu 208(%ecx), %xmm1 + aesdec %xmm1, %xmm3 + movdqu 224(%ecx), %xmm0 +L_AES_XTS_decrypt_aesni_declast_9: + aesdeclast %xmm0, %xmm3 + pxor %xmm5, %xmm3 + movdqu %xmm3, (%esp) + addl $16, %edi + xorl %edx, %edx +L_AES_XTS_decrypt_aesni_cts: + movzbl (%esp,%edx,1), %ecx + movl 28(%esp), %eax + movzbl (%eax,%edi,1), %ebx + movl 32(%esp), %eax + movb %cl, (%eax,%edi,1) + movb %bl, (%esp,%edx,1) + incl %edi + incl %edx + movl 36(%esp), %eax + cmpl %eax, %edi + jl L_AES_XTS_decrypt_aesni_cts + subl %edx, %edi + movdqu (%esp), %xmm3 + pxor %xmm2, %xmm3 + movl 44(%esp), %ecx + pxor (%ecx), %xmm3 + movdqu 16(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 32(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 48(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 64(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 80(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 96(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 112(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 128(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 144(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + cmpl $11, 52(%esp) + movdqu 160(%ecx), %xmm0 + jl L_AES_XTS_decrypt_aesni_declast_10 + aesdec %xmm0, %xmm3 + movdqu 176(%ecx), %xmm1 + aesdec %xmm1, %xmm3 + cmpl $13, 52(%esp) + movdqu 192(%ecx), %xmm0 + jl L_AES_XTS_decrypt_aesni_declast_10 + aesdec %xmm0, %xmm3 + movdqu 208(%ecx), %xmm1 + aesdec %xmm1, %xmm3 + movdqu 224(%ecx), %xmm0 +L_AES_XTS_decrypt_aesni_declast_10: + aesdeclast %xmm0, %xmm3 + pxor %xmm2, %xmm3 + subl $16, %edi + movl 32(%esp), %eax + movdqu %xmm3, (%eax,%edi,1) +L_AES_XTS_decrypt_aesni_done: + addl $16, %esp + popl %ebx + popl %edi + ret +.size AES_XTS_decrypt_aesni,.-AES_XTS_decrypt_aesni + # void AES_XTS_decrypt_update_aesni(const unsigned char* in, + # unsigned char* out, word32 sz, const unsigned char* key, + # unsigned char* i, int nr); Tweak is read from *i and the advanced + # tweak written back to *i. +.text +.globl AES_XTS_decrypt_update_aesni +.type AES_XTS_decrypt_update_aesni,@function +.align 16 +AES_XTS_decrypt_update_aesni: + pushl %edi + pushl %ebx + subl $16, %esp + movl $0x87, (%esp) + movl $0x01, 4(%esp) + movl $0x01, 8(%esp) + movl $0x01, 12(%esp) + movdqu (%esp), %xmm6 + movl 44(%esp), %eax + movdqu (%eax), %xmm2 + xorl %edi, %edi + movl 36(%esp), %eax + movl %eax, %edx + andl $0xfffffff0, %edx + cmpl %eax, %edx + je L_AES_XTS_decrypt_update_aesni_bound + subl $16, %edx +L_AES_XTS_decrypt_update_aesni_bound: +L_AES_XTS_decrypt_update_aesni_loop: + cmpl %edx, %edi + jge L_AES_XTS_decrypt_update_aesni_loop_done + movl 28(%esp), %eax + movdqu (%eax,%edi,1), %xmm3 + pxor %xmm2, %xmm3 + movl 40(%esp), %ecx + pxor (%ecx), %xmm3 + movdqu 16(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 32(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 48(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 64(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 80(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 96(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 112(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 128(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 144(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + cmpl $11, 48(%esp) + movdqu 160(%ecx), %xmm0 + jl L_AES_XTS_decrypt_update_aesni_declast_11 + aesdec %xmm0, %xmm3 + movdqu 176(%ecx), %xmm1 + aesdec %xmm1, %xmm3 + cmpl $13, 48(%esp) + movdqu 192(%ecx), %xmm0 + jl L_AES_XTS_decrypt_update_aesni_declast_11 + aesdec %xmm0, %xmm3 + movdqu 208(%ecx), %xmm1 + aesdec %xmm1, %xmm3 + movdqu 224(%ecx), %xmm0 +L_AES_XTS_decrypt_update_aesni_declast_11: + aesdeclast %xmm0, %xmm3 + pxor %xmm2, %xmm3 + movl 32(%esp), %eax + movdqu %xmm3, (%eax,%edi,1) + movdqa %xmm2, %xmm4 + psrad $31, %xmm4 + pslld $0x01, %xmm2 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm6, %xmm4 + pxor %xmm4, %xmm2 + addl $16, %edi + jmp L_AES_XTS_decrypt_update_aesni_loop +L_AES_XTS_decrypt_update_aesni_loop_done: + movl 36(%esp), %eax + cmpl %eax, %edi + je L_AES_XTS_decrypt_update_aesni_done + movdqa %xmm2, %xmm4 + movdqa %xmm2, %xmm5 + psrad $31, %xmm4 + pslld $0x01, %xmm5 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm6, %xmm4 + pxor %xmm4, %xmm5 + movl 28(%esp), %eax + movdqu (%eax,%edi,1), %xmm3 + pxor %xmm5, %xmm3 + movl 40(%esp), %ecx + pxor (%ecx), %xmm3 + movdqu 16(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 32(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 48(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 64(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 80(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 96(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 112(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 128(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 144(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + cmpl $11, 48(%esp) + movdqu 160(%ecx), %xmm0 + jl L_AES_XTS_decrypt_update_aesni_declast_12 + aesdec %xmm0, %xmm3 + movdqu 176(%ecx), %xmm1 + aesdec %xmm1, %xmm3 + cmpl $13, 48(%esp) + movdqu 192(%ecx), %xmm0 + jl L_AES_XTS_decrypt_update_aesni_declast_12 + aesdec %xmm0, %xmm3 + movdqu 208(%ecx), %xmm1 + aesdec %xmm1, %xmm3 + movdqu 224(%ecx), %xmm0 +L_AES_XTS_decrypt_update_aesni_declast_12: + aesdeclast %xmm0, %xmm3 + pxor %xmm5, %xmm3 + movdqu %xmm3, (%esp) + addl $16, %edi + xorl %edx, %edx +L_AES_XTS_decrypt_update_aesni_cts: + movzbl (%esp,%edx,1), %ecx + movl 28(%esp), %eax + movzbl (%eax,%edi,1), %ebx + movl 32(%esp), %eax + movb %cl, (%eax,%edi,1) + movb %bl, (%esp,%edx,1) + incl %edi + incl %edx + movl 36(%esp), %eax + cmpl %eax, %edi + jl L_AES_XTS_decrypt_update_aesni_cts + subl %edx, %edi + movdqu (%esp), %xmm3 + pxor %xmm2, %xmm3 + movl 40(%esp), %ecx + pxor (%ecx), %xmm3 + movdqu 16(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 32(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 48(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 64(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 80(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 96(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 112(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 128(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + movdqu 144(%ecx), %xmm0 + aesdec %xmm0, %xmm3 + cmpl $11, 48(%esp) + movdqu 160(%ecx), %xmm0 + jl L_AES_XTS_decrypt_update_aesni_declast_13 + aesdec %xmm0, %xmm3 + movdqu 176(%ecx), %xmm1 + aesdec %xmm1, %xmm3 + cmpl $13, 48(%esp) + movdqu 192(%ecx), %xmm0 + jl L_AES_XTS_decrypt_update_aesni_declast_13 + aesdec %xmm0, %xmm3 + movdqu 208(%ecx), %xmm1 + aesdec %xmm1, %xmm3 + movdqu 224(%ecx), %xmm0 +L_AES_XTS_decrypt_update_aesni_declast_13: + aesdeclast %xmm0, %xmm3 + pxor %xmm2, %xmm3 + subl $16, %edi + movl 32(%esp), %eax + movdqu %xmm3, (%eax,%edi,1) +L_AES_XTS_decrypt_update_aesni_done: + movl 44(%esp), %eax + movdqu %xmm2, (%eax) + addl $16, %esp + popl %ebx + popl %edi + ret +.size AES_XTS_decrypt_update_aesni,.-AES_XTS_decrypt_update_aesni +#endif /* WOLFSSL_X86_BUILD */ +#endif /* WOLFSSL_AES_XTS */ + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/wolfcrypt/src/cpuid.c b/wolfcrypt/src/cpuid.c index b1ac7e1136f..69aa26a5dc5 100644 --- a/wolfcrypt/src/cpuid.c +++ b/wolfcrypt/src/cpuid.c @@ -75,6 +75,20 @@ #define cpuid(a,b,c) __cpuidex((int*)a,b,c) #endif /* _MSC_VER */ + /* On the 32-bit x86 Linux kernel (WOLFSSL_LINUXKM + WOLFSSL_X86_BUILD), + * (pulled in via processor.h -> math_emu.h -> + * ptrace.h on i386 only) #defines EAX/EBX/ECX/EDX as ptrace register + * indices (EAX=6, EBX=0, ECX=1, EDX=2). We reuse these names as + * cpuid()-result array indices (0..3), so the clash is a real bug, not a + * cosmetic redefinition: leaving the kernel's values in place would + * otherwise index reg[6] (past "unsigned int reg[5]") and mis-compare the + * vendor string. #undef so our indices win. No-op where the names are + * not predefined (x86_64 + * kernel, all user-space), so i386-kernel codegen is byte-identical. */ + #undef EAX + #undef EBX + #undef ECX + #undef EDX #define EAX 0 #define EBX 1 #define ECX 2 diff --git a/wolfcrypt/src/dh.c b/wolfcrypt/src/dh.c index 55d82c0e233..a537eceb4d6 100644 --- a/wolfcrypt/src/dh.c +++ b/wolfcrypt/src/dh.c @@ -1424,8 +1424,18 @@ int wc_DhGeneratePublic(DhKey* key, byte* priv, word32 privSz, #if FIPS_VERSION_GE(5,0) || defined(WOLFSSL_VALIDATE_DH_KEYGEN) if (ret == 0) ret = _ffc_validate_public_key(key, pub, *pubSz, NULL, 0, 0); - if (ret == 0) - ret = _ffc_pairwise_consistency_test(key, pub, *pubSz, priv, privSz); + if (ret == 0) { + /* FFC key-pair PCT per SP 800-56A r3 sec 5.6.2.1.4, required + * after KeyGen by FIPS 140-3 IG 10.3.B. Under FIPS, failure is + * remapped to DH_PCT_E so DEGRADE_STATE moves + * FIPS_CAST_DH_PRIMITIVE_Z to the error state. */ + ret = _ffc_pairwise_consistency_test(key, pub, *pubSz, priv, + privSz); + #ifdef HAVE_FIPS + if (ret != 0) + ret = DH_PCT_E; + #endif + } #endif /* FIPS V5 or later || WOLFSSL_VALIDATE_DH_KEYGEN */ return ret; @@ -1448,8 +1458,18 @@ static int wc_DhGenerateKeyPair_Sync(DhKey* key, WC_RNG* rng, #if FIPS_VERSION_GE(5,0) || defined(WOLFSSL_VALIDATE_DH_KEYGEN) if (ret == 0) ret = _ffc_validate_public_key(key, pub, *pubSz, NULL, 0, 0); - if (ret == 0) - ret = _ffc_pairwise_consistency_test(key, pub, *pubSz, priv, *privSz); + if (ret == 0) { + /* FFC key-pair PCT per SP 800-56A r3 sec 5.6.2.1.4, required + * after KeyGen by FIPS 140-3 IG 10.3.B. Under FIPS, failure is + * remapped to DH_PCT_E so DEGRADE_STATE moves + * FIPS_CAST_DH_PRIMITIVE_Z to the error state. */ + ret = _ffc_pairwise_consistency_test(key, pub, *pubSz, priv, + *privSz); + #ifdef HAVE_FIPS + if (ret != 0) + ret = DH_PCT_E; + #endif + } #endif /* FIPS V5 or later || WOLFSSL_VALIDATE_DH_KEYGEN */ return ret; diff --git a/wolfcrypt/src/error.c b/wolfcrypt/src/error.c index 0f70a84cc8b..be0acd033d3 100644 --- a/wolfcrypt/src/error.c +++ b/wolfcrypt/src/error.c @@ -692,6 +692,21 @@ const char* wc_GetErrorString(int error) case SLH_DSA_KAT_FIPS_E: return "SLH-DSA Known Answer Test check FIPS error"; + case SLH_DSA_PCT_E: + return "wolfcrypt SLH-DSA Pairwise Consistency Test Failure"; + + case CMAC_KAT_FIPS_E: + return "AES-CMAC Known Answer Test FIPS error"; + + case SHAKE_KAT_FIPS_E: + return "SHAKE Known Answer Test FIPS error"; + + case DH_PCT_E: + return "wolfcrypt DH (FFC) Pairwise Consistency Test Failure"; + + case AES_KW_KAT_FIPS_E: + return "AES-KW Known Answer Test FIPS error"; + case SEQ_OVERFLOW_E: return "Sequence counter would overflow"; diff --git a/wolfcrypt/src/ge_operations.c b/wolfcrypt/src/ge_operations.c index 5d3157628d2..3c2eea5160a 100644 --- a/wolfcrypt/src/ge_operations.c +++ b/wolfcrypt/src/ge_operations.c @@ -10196,9 +10196,13 @@ void ge_tobytes_nct(unsigned char *s,const ge_p2 *h) /* if HAVE_ED25519 but not HAVE_CURVE25519, and an asm implementation is built, * then curve25519() won't get its WOLFSSL_LOCAL attribute unless we dummy-call * it here. - */ + * Requires the asm port to emit curve25519() when X25519 is off -- true for + * x86 and 64-bit ARM, but the 32-bit ARM port gates curve25519() on + * HAVE_CURVE25519, so the dummy-call would be an undefined symbol there. + * Exclude arm32 armasm (RFC 7748 / SP 800-186 X25519). */ #if defined(CURVED25519_ASM) && defined(WOLFSSL_API_PREFIX_MAP) && \ - !defined(HAVE_CURVE25519) && !defined(FREESCALE_LTC_ECC) + !defined(HAVE_CURVE25519) && !defined(FREESCALE_LTC_ECC) && \ + (!defined(WOLFSSL_ARMASM) || defined(__aarch64__)) WOLFSSL_LOCAL void _wc_curve25519_dummy(void); WOLFSSL_LOCAL void _wc_curve25519_dummy(void) { (void)curve25519((byte *)0, (byte *)0, (const byte *)0); diff --git a/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S b/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S index 6d2f0172994..fd817fd2471 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S @@ -27,6 +27,14 @@ #include +/* Honor WC_SHA3_NO_ASM as sha3.c does (set by KERNEL_MODE_DEFAULTS): suppress + * this NEON BlockSha3 so it doesn't multiply-define against sha3.c's C BlockSha3 + * on arm32. arm64's asm is gated on WOLFSSL_ARMASM_CRYPTO_SHA3 so never hit this; + * the arm32 NEON path was only gated on WOLFSSL_ARMASM_NO_NEON. FIPS 202. */ +#ifdef WC_SHA3_NO_ASM + #undef WOLFSSL_ARMASM +#endif + #ifdef WOLFSSL_ARMASM #if !defined(__aarch64__) && !defined(WOLFSSL_ARMASM_THUMB2) #ifndef WOLFSSL_ARMASM_INLINE diff --git a/wolfcrypt/src/random.c b/wolfcrypt/src/random.c index eb519650248..cfd415f53b5 100644 --- a/wolfcrypt/src/random.c +++ b/wolfcrypt/src/random.c @@ -418,6 +418,20 @@ int wc_DrbgState_MutexFree(void) static int LockDrbgState(void) { #ifndef SINGLE_THREADED +#ifndef WOLFSSL_MUTEX_INITIALIZER + /* drbgStateMutex needs run-time init on platforms without a static mutex + * initializer (e.g. Windows CRITICAL_SECTION). The FIPS pre-operational + * self-test locks the DRBG from a load-time constructor that runs before + * wolfCrypt_Init(), and locking an uninitialized CRITICAL_SECTION is UB + * (faults on the degraded CAST re-run). Init on demand here -- idempotent, + * and the first lock is the single-threaded POST so it is race-free. + * Guards the SP 800-90A DRBG enable/disable state. */ + { + int initRet = wc_DrbgState_MutexInit(); + if (initRet != 0) + return initRet; + } +#endif return wc_LockMutex(&drbgStateMutex); #else return 0; @@ -1825,7 +1839,7 @@ static int _InitRng(WC_RNG* rng, byte* nonce, word32 nonceSz, #endif #ifdef HAVE_INTEL_RDRAND - /* if CPU supports RDRAND, use it directly and by-pass DRBG init */ + /* if CPU supports RDRAND, use it directly and bypass DRBG init */ if (IS_INTEL_RDRAND(intel_flags)) { #ifdef HAVE_HASHDRBG rng->status = DRBG_OK; @@ -3619,23 +3633,70 @@ int wc_FreeNetRandom(void) #if defined(HAVE_INTEL_RDRAND) || defined(HAVE_INTEL_RDSEED) || \ defined(HAVE_AMD_RDSEED) -#ifdef WOLFSSL_ASYNC_CRYPT - /* need more retries if multiple cores */ - #define INTELRD_RETRY (32 * 8) -#else - #define INTELRD_RETRY 32 +/* Bounds the RDRAND/RDSEED retry loop below. RDSEED legitimately sets CF=0 + * until the on-chip entropy is replenished; per Intel's DRNG guidance software + * must retry. Overridable via -D for OEs needing a different budget. */ +#ifndef INTELRD_RETRY + #if defined(WOLFSSL_LINUXKM) + /* Linux kernel module: boot-time FIPS CASTs poll RDSEED during + * module_init while the RNG is warming up and RDSEED is contended + * (especially virtualized, funnelled to a busy host CPU). CF=0 then + * far exceeds the 32-retry userspace default, making + * --enable-{amd,intel}rdseed modules fail the ECDSA CAST and refuse to + * load. The budget is a ceiling, not a fixed cost -- RDSEED succeeds in + * ~1 read once entropy is up, so post-boot use is unaffected. */ + #define INTELRD_RETRY 100000 + #elif defined(WOLFSSL_ASYNC_CRYPT) + /* need more retries if multiple cores */ + #define INTELRD_RETRY (32 * 8) + #else + #define INTELRD_RETRY 32 + #endif #endif #if defined(HAVE_INTEL_RDSEED) || defined(HAVE_AMD_RDSEED) +/* Vendor tag for the optional FIPS_CODE_REVIEW evidence prints below. Intel + * and AMD RDSEED share the one x86 RDSEED primitive; exactly one of + * HAVE_INTEL_RDSEED / HAVE_AMD_RDSEED is set per OE, so this resolves cleanly. */ +#if defined(HAVE_AMD_RDSEED) +#define WC_RDSEED_VENDOR "AMD" +#else +#define WC_RDSEED_VENDOR "Intel" +#endif + #ifndef USE_INTEL_INTRINSICS - /* return 0 on success */ + /* return 0 on success. Per the E27 Public Use Document (CMVP entropy + * disclosure), wolfSSL polls the x86 Carry Flag to check each RDSEED: + * CF=1 -> dest holds 64 bits of conditioned entropy, usable; + * CF=0 -> seed pool empty this cycle, dest unusable, must retry + * (IntelRDseed64_r below loops up to INTELRD_RETRY times). + * "setc %1" materialises CF into (ok); the "=qm" constraint pins it to a + * q-class register so setc can target its low byte. */ static WC_INLINE int IntelRDseed64(word64* seed) { unsigned char ok; __asm__ volatile("rdseed %0; setc %1":"=r"(*seed), "=qm"(ok)); +#ifdef FIPS_CODE_REVIEW + /* One-shot tracer: confirm this path is alive on the first call, then + * go silent -- RDSEED fires per 64-bit chunk, so per-chunk prints would + * flood the sanity-log. Per-request volume is shown by the outer + * wc_GenerateSeed_IntelRD print below. */ + { + static int printed_asm = 0; + if (!printed_asm) { + printed_asm = 1; + printf("FIPS_CODE_REVIEW IntelRDseed64 [asm path, %s] " + "(one-shot): delivered %u bits, CF=%u\n", + WC_RDSEED_VENDOR, (unsigned)(sizeof(word64) * 8u), + (unsigned)ok); + } + } +#endif + /* CF set (ok != 0) -> 64 bits captured in *seed, return 0; CF clear -> + * sample invalid, return -1 so IntelRDseed64_r() retries. */ return (ok) ? 0 : -1; } @@ -3643,7 +3704,14 @@ int wc_FreeNetRandom(void) /* The compiler Visual Studio uses does not allow inline assembly. * It does allow for Intel intrinsic functions. */ - /* return 0 on success */ + /* return 0 on success. + * + * E27 PUD (NIST CMVP) cited path: _rdseed64_step is the compiler intrinsic + * front-end for the same RDSEED instruction documented in the asm path + * above. The intrinsic returns 1 when CF was set by the underlying RDSEED + * (i.e. the 64-bit conditioned entropy sample in *seed is valid this + * cycle) and 0 when CF was clear (caller MUST retry; *seed MUST NOT be + * consumed). */ # ifdef __GNUC__ __attribute__((target("rdseed"))) # endif @@ -3652,6 +3720,23 @@ int wc_FreeNetRandom(void) int ok; ok = _rdseed64_step((unsigned long long*) seed); +#ifdef FIPS_CODE_REVIEW + /* One-shot tracer; see asm-path comment above for rationale. */ + { + static int printed_intrinsic = 0; + if (!printed_intrinsic) { + printed_intrinsic = 1; + printf("FIPS_CODE_REVIEW IntelRDseed64 [intrinsic path, %s] " + "(one-shot): delivered %u bits, " + "intrinsic_ret=%d (== CF)\n", + WC_RDSEED_VENDOR, (unsigned)(sizeof(word64) * 8u), ok); + } + } +#endif + /* intrinsic_ret == 1 -> CF was set, 64 bits of conditioned entropy + * captured in *seed; return 0 to signal success to the retry wrapper. + * intrinsic_ret == 0 -> CF was clear; return -1 so the retry wrapper + * re-attempts. */ return (ok) ? 0 : -1; } @@ -3664,6 +3749,12 @@ static WC_INLINE int IntelRDseed64_r(word64* rnd) for (i = 0; i < INTELRD_RETRY; i++) { if (IntelRDseed64(rnd) == 0) return 0; + /* Give the hardware entropy source a chance to replenish between + * attempts (Intel DRNG guidance) and yield the CPU when it is safe to + * block. WC_RELAX_LONG_LOOP() is a no-op where blocking is unsafe, so + * this only ever helps -- e.g. it lets other work (and the entropy + * conditioner) run during a long boot-time RDSEED starvation. */ + WC_RELAX_LONG_LOOP(); } return -1; } @@ -3677,6 +3768,19 @@ static int wc_GenerateSeed_IntelRD(OS_Seed* os, byte* output, word32 sz) (void)os; +#ifdef FIPS_CODE_REVIEW + /* Each conditioned entropy sample produced by IntelRDseed64() is 64 bits + * wide. This entry-level trace makes the per-request entropy volume + * obvious in evidence logs: sz bytes requested -> ceil(sz/8) RDSEED + * invocations expected (plus the two-or-three sanity-status reads on the + * first ever call into this function). */ + printf("FIPS_CODE_REVIEW wc_GenerateSeed_IntelRD [%s]: " + "requested %u bytes = %u bits " + "(expect %u RDSEED 64-bit samples)\n", + WC_RDSEED_VENDOR, (unsigned)sz, (unsigned)(sz * 8u), + (unsigned)((sz + sizeof(word64) - 1u) / sizeof(word64))); +#endif + if (!IS_INTEL_RDSEED(intel_flags)) return -1; diff --git a/wolfcrypt/src/rsa.c b/wolfcrypt/src/rsa.c index cd66eab2efd..d1be58cfa5f 100644 --- a/wolfcrypt/src/rsa.c +++ b/wolfcrypt/src/rsa.c @@ -5155,9 +5155,12 @@ static WC_INLINE int RsaSizeCheck(int size) } #ifdef HAVE_FIPS - /* Key size requirements for CAVP */ + /* Approved RSA key sizes per FIPS 186-5 sec 5.1 and NIST SP 800-131Ar2 + * sec 4 Table 2 - 2048, 3072, 4096 only (1024 disallowed since + * 2014-01-01). wc_MakeRsaKey_fips gates on WC_RSA_FIPS_GEN_MIN, but + * RsaSizeCheck is also reached by internal paths bypassing that wrapper - + * defense-in-depth removal of 1024 here closes the gap. */ switch (size) { - case 1024: case 2048: case 3072: case 4096: @@ -5417,6 +5420,18 @@ int wc_MakeRsaKey(RsaKey* key, int size, long e, WC_RNG* rng) goto out; } +#ifdef HAVE_FIPS + /* FIPS 186-5 sec 5.2: 2^16 + 1 <= e < 2^256, e odd. The non-FIPS check + * above only requires e >= 3 odd. e is a long (<= 64 bits) so the upper + * bound holds structurally; enforce the 65537 lower bound explicitly. + * Defense-in-depth - FIPS callers conventionally pass e = 65537 + * (RSA_F4). */ + if (e < 65537L) { + err = BAD_FUNC_ARG; + goto out; + } +#endif + #if defined(WOLFSSL_CRYPTOCELL) err = cc310_RSA_GenerateKeyPair(key, size, e); goto out; diff --git a/wolfcrypt/src/sha256.c b/wolfcrypt/src/sha256.c index e0aafdcb2ef..7fb1891ce06 100644 --- a/wolfcrypt/src/sha256.c +++ b/wolfcrypt/src/sha256.c @@ -1231,14 +1231,31 @@ int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId) return ret; } +/* 32-bit ARM SHA-256 NEON/crypto transforms use vector registers, so in a + * kernel module they need SAVE/RESTORE_VECTOR_REGISTERS (kernel_neon_begin/ + * end) or SIMD faults "undefined instruction". !__aarch64__-scoped so + * aarch64 is unchanged. (FIPS 180-4 SHA-256.) */ +#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && !defined(__aarch64__) && \ + !defined(WOLFSSL_ARMASM_THUMB2) && !defined(WOLFSSL_ARMASM_NO_NEON) + #define WC_SHA256_ARM_SVR_BEGIN(fail) SAVE_VECTOR_REGISTERS(fail) + #define WC_SHA256_ARM_SVR_END() RESTORE_VECTOR_REGISTERS() +#else + #define WC_SHA256_ARM_SVR_BEGIN(fail) WC_DO_NOTHING + #define WC_SHA256_ARM_SVR_END() WC_DO_NOTHING +#endif + static WC_INLINE int Transform_Sha256(wc_Sha256* sha256, const byte* data) { #if defined(WOLFSSL_ARMASM_THUMB2) || defined(WOLFSSL_ARMASM_NO_NEON) Transform_Sha256_Len_base(sha256, data, WC_SHA256_BLOCK_SIZE); -#elif defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - Transform_Sha256_Len_neon(sha256, data, WC_SHA256_BLOCK_SIZE); #else + WC_SHA256_ARM_SVR_BEGIN(return _svr_ret;); + #if defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) + Transform_Sha256_Len_neon(sha256, data, WC_SHA256_BLOCK_SIZE); + #else Transform_Sha256_Len_crypto(sha256, data, WC_SHA256_BLOCK_SIZE); + #endif + WC_SHA256_ARM_SVR_END(); #endif return 0; } @@ -1248,10 +1265,14 @@ static WC_INLINE int Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, { #if defined(WOLFSSL_ARMASM_THUMB2) || defined(WOLFSSL_ARMASM_NO_NEON) Transform_Sha256_Len_base(sha256, data, len); -#elif defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - Transform_Sha256_Len_neon(sha256, data, len); #else + WC_SHA256_ARM_SVR_BEGIN(return _svr_ret;); + #if defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) + Transform_Sha256_Len_neon(sha256, data, len); + #else Transform_Sha256_Len_crypto(sha256, data, len); + #endif + WC_SHA256_ARM_SVR_END(); #endif return 0; } diff --git a/wolfcrypt/src/sha512.c b/wolfcrypt/src/sha512.c index 9eb6635e375..79444fc9c89 100644 --- a/wolfcrypt/src/sha512.c +++ b/wolfcrypt/src/sha512.c @@ -1457,15 +1457,31 @@ static void (*Transform_Sha512_p)(wc_Sha512* sha512, const byte* data) = NULL; static void (*Transform_Sha512_Len_p)(wc_Sha512* sha512, const byte* data, word32 len) = NULL; +/* 32-bit ARM SHA-512 NEON asm below needs SAVE/RESTORE_VECTOR_REGISTERS + * (kernel_neon_begin/end) in a Linux kernel module, else the first NEON insn + * faults "undefined instruction". Scoped to !__aarch64__ so aarch64 and the + * THUMB2/NO_NEON base path stay unchanged. (FIPS 180-4 SHA-512.) */ +#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && !defined(__aarch64__) && \ + !defined(WOLFSSL_ARMASM_THUMB2) && !defined(WOLFSSL_ARMASM_NO_NEON) + #define WC_SHA512_ARM_SVR_BEGIN(fail) SAVE_VECTOR_REGISTERS(fail) + #define WC_SHA512_ARM_SVR_END() RESTORE_VECTOR_REGISTERS() +#else + #define WC_SHA512_ARM_SVR_BEGIN(fail) WC_DO_NOTHING + #define WC_SHA512_ARM_SVR_END() WC_DO_NOTHING +#endif static WC_INLINE int Transform_Sha512(wc_Sha512 *sha512, const byte* data) { + WC_SHA512_ARM_SVR_BEGIN(return _svr_ret;); (*Transform_Sha512_p)(sha512, data); + WC_SHA512_ARM_SVR_END(); return 0; } static WC_INLINE int Transform_Sha512_Len(wc_Sha512 *sha512, const byte* data, word32 len) { + WC_SHA512_ARM_SVR_BEGIN(return _svr_ret;); (*Transform_Sha512_Len_p)(sha512, data, len); + WC_SHA512_ARM_SVR_END(); return 0; } diff --git a/wolfcrypt/src/wc_lms.c b/wolfcrypt/src/wc_lms.c index 595b93622d8..09dd20b3b75 100644 --- a/wolfcrypt/src/wc_lms.c +++ b/wolfcrypt/src/wc_lms.c @@ -28,6 +28,14 @@ #if FIPS_VERSION3_GE(2,0,0) /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ #define FIPS_NO_WRAPPERS + + /* Windows orders the FIPS in-core integrity boundary by named linker + * sections. Keep LMS (SP 800-208) code/const inside the boundary, + * sorted between sha3 (.fipsA$n) and fips.c (.fipsA$o). */ + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$nc") + #pragma const_seg(".fipsB$nc") + #endif #endif #include #include diff --git a/wolfcrypt/src/wc_lms_impl.c b/wolfcrypt/src/wc_lms_impl.c index e88c032d87e..0df71d4d5b5 100644 --- a/wolfcrypt/src/wc_lms_impl.c +++ b/wolfcrypt/src/wc_lms_impl.c @@ -41,6 +41,15 @@ #include +#if FIPS_VERSION3_GE(2,0,0) + /* Keep this LMS (SP 800-208) implementation's code/const inside the FIPS + * in-core integrity boundary (Windows orders it by named sections). */ + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$ne") + #pragma const_seg(".fipsB$ne") + #endif +#endif + #include #ifdef NO_INLINE @@ -2319,7 +2328,10 @@ static int wc_lms_treehash_update(LmsState* state, LmsPrivState* privState, byte* left = dp + LMS_D_LEN; byte* temp = left + params->hash_len; WC_DECLARE_VAR(stack, byte, (LMS_MAX_HEIGHT + 1) * LMS_MAX_NODE_LEN, 0); - byte* sp; + /* Init to NULL: sp is set and used only on the ret==0 path, but 32-bit ARM + * gcc cannot correlate the two separate `if (ret == 0)` guards and reports a + * false-positive -Wmaybe-uninitialized (x86_64/aarch64 gcc do not). */ + byte* sp = NULL; word32 max_cb = (word32)1 << params->cacheBits; word32 i; diff --git a/wolfcrypt/src/wc_mldsa.c b/wolfcrypt/src/wc_mldsa.c index 79d18dddb67..209a2883d5c 100644 --- a/wolfcrypt/src/wc_mldsa.c +++ b/wolfcrypt/src/wc_mldsa.c @@ -142,6 +142,14 @@ #if FIPS_VERSION3_GE(2,0,0) /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ #define FIPS_NO_WRAPPERS + + /* Windows orders the FIPS in-core integrity boundary by named linker + * sections. Keep ML-DSA (FIPS 204) code/const inside the boundary, + * sorted between sha3 (.fipsA$n) and fips.c (.fipsA$o). */ + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$nc") + #pragma const_seg(".fipsB$nc") + #endif #endif #ifndef WOLFSSL_MLDSA_NO_ASN1 @@ -772,8 +780,95 @@ static int mldsa_hash256_ctx_msg(wc_Shake* shake256, const byte* tr, * @return 0 on success. * @return BAD_FUNC_ARG if hash algorithm not known. */ -static int mldsa_get_hash_oid(int hash, byte* oidBuffer, word32* oidLen) +/* HashML-DSA PH-vs-paramSet enforcement. + * + * FIPS 204 sec. 5.4 (Table 4) restricts the HashML-DSA pre-hash PH to + * algorithms whose collision-resistance strength meets or exceeds the + * paramSet's security level; enforced for both sigGen and sigVer. Returns + * 0 for an approved (hashAlg, level) pair, else BAD_FUNC_ARG (including any + * hash not on the approved list). + */ +static int mldsa_check_hash_for_level(int hashAlg, byte level) { + int strengthBits; /* collision-resistance strength of the chosen hash */ + int requiredBits; /* security level required by the paramSet */ + + switch (hashAlg) { + #ifndef NO_SHA256 + case WC_HASH_TYPE_SHA256: + strengthBits = 128; + break; + #endif + #ifdef WOLFSSL_SHA384 + case WC_HASH_TYPE_SHA384: + strengthBits = 192; + break; + #endif + #ifdef WOLFSSL_SHA512 + case WC_HASH_TYPE_SHA512: + strengthBits = 256; + break; + #ifndef WOLFSSL_NOSHA512_256 + case WC_HASH_TYPE_SHA512_256: + /* SHA-512/256 has 128-bit collision resistance (truncated). */ + strengthBits = 128; + break; + #endif + #endif + #ifdef WOLFSSL_SHA3 + #ifndef WOLFSSL_NOSHA3_256 + case WC_HASH_TYPE_SHA3_256: + strengthBits = 128; + break; + #endif + #ifndef WOLFSSL_NOSHA3_384 + case WC_HASH_TYPE_SHA3_384: + strengthBits = 192; + break; + #endif + #ifndef WOLFSSL_NOSHA3_512 + case WC_HASH_TYPE_SHA3_512: + strengthBits = 256; + break; + #endif + #endif + #ifdef WOLFSSL_SHAKE128 + case WC_HASH_TYPE_SHAKE128: + strengthBits = 128; + break; + #endif + #ifdef WOLFSSL_SHAKE256 + case WC_HASH_TYPE_SHAKE256: + strengthBits = 256; + break; + #endif + default: + /* Hash not on the FIPS 204 Table 4 approved list (e.g. SHA-224, + * SHA-512/224, SHA3-224, MD5). Reject regardless of level. */ + return BAD_FUNC_ARG; + } + + switch (level) { + case WC_ML_DSA_44: + requiredBits = 128; + break; + case WC_ML_DSA_65: + requiredBits = 192; + break; + case WC_ML_DSA_87: + requiredBits = 256; + break; + default: + return BAD_FUNC_ARG; + } + + if (strengthBits < requiredBits) { + return BAD_FUNC_ARG; + } + return 0; +} + +static int mldsa_get_hash_oid(int hash, byte* oidBuffer, word32* oidLen){ int ret = 0; const byte* oid; @@ -9467,11 +9562,17 @@ static int mldsa_sign_ctx_hash_with_seed(wc_MlDsaKey* key, byte oidMsgHash[MLDSA_HASH_OID_LEN + WC_MAX_DIGEST_SIZE]; word32 oidMsgHashLen = 0; - /* Check that the input hash length is valid. */ + /* Check that the input hash length is valid (guards against caller-side + * buffer overruns before we touch hash). */ if ((int)hashLen != wc_HashGetDigestSize((enum wc_HashType)hashAlg)) { ret = BAD_LENGTH_E; } + /* FIPS 204 sec. 5.4 Table 4: enforce hash <-> paramSet matching. */ + if (ret == 0) { + ret = mldsa_check_hash_for_level(hashAlg, key->level); + } + if (ret == 0) { XMEMCPY(seedMu, seed, MLDSA_RND_SZ); @@ -10140,12 +10241,17 @@ static int mldsa_verify_ctx_hash(wc_MlDsaKey* key, const byte* ctx, if ((key == NULL) || (key->params == NULL)) { ret = BAD_FUNC_ARG; } - /* Check that the input hash length is valid. */ + /* Check that the input hash length is valid (guards against caller-side + * buffer overruns before we touch hash). */ if ((ret == 0) && ((int)hashLen != wc_HashGetDigestSize((enum wc_HashType)hashAlg))) { ret = BAD_LENGTH_E; } + /* FIPS 204 sec. 5.4 Table 4: enforce hash <-> paramSet matching. */ + if (ret == 0) { + ret = mldsa_check_hash_for_level(hashAlg, key->level); + } if (ret == 0) { /* Step 6: Hash public key. */ diff --git a/wolfcrypt/src/wc_mlkem.c b/wolfcrypt/src/wc_mlkem.c index eb96e9526b7..f4ec2d39fa5 100644 --- a/wolfcrypt/src/wc_mlkem.c +++ b/wolfcrypt/src/wc_mlkem.c @@ -83,6 +83,14 @@ #if FIPS_VERSION3_GE(2,0,0) /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ #define FIPS_NO_WRAPPERS + + /* Windows orders the FIPS in-core integrity boundary by named linker + * sections. Keep ML-KEM (FIPS 203) code/const inside the boundary, + * sorted between sha3 (.fipsA$n) and fips.c (.fipsA$o). */ + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$na") + #pragma const_seg(".fipsB$na") + #endif #endif #include @@ -696,49 +704,11 @@ int wc_MlKemKey_MakeKey(MlKemKey* key, WC_RNG* rng) ret = wc_MlKemKey_MakeKeyWithRandom(key, rand, sizeof(rand)); } -#ifdef HAVE_FIPS - /* Pairwise Consistency Test (PCT) per FIPS 140-3 / ISO 19790:2012 - * Section 7.10.3.3: encapsulate with ek, decapsulate with dk, - * verify shared secrets match. */ - if (ret == 0) { - WC_DECLARE_VAR(pct_ct, byte, WC_ML_KEM_MAX_CIPHER_TEXT_SIZE, - key->heap); - byte pct_ss1[WC_ML_KEM_SS_SZ]; - byte pct_ss2[WC_ML_KEM_SS_SZ]; - word32 ctSz = 0; - - WC_ALLOC_VAR_EX(pct_ct, byte, WC_ML_KEM_MAX_CIPHER_TEXT_SIZE, - key->heap, DYNAMIC_TYPE_TMP_BUFFER, ret = MEMORY_E); - - if (ret == 0) - ret = wc_MlKemKey_CipherTextSize(key, &ctSz); - - if (ret == 0) - ret = wc_MlKemKey_Encapsulate(key, pct_ct, pct_ss1, rng); - - if (ret == 0) - ret = wc_MlKemKey_Decapsulate(key, pct_ss2, pct_ct, ctSz); - - if (ret == 0) { - if (XMEMCMP(pct_ss1, pct_ss2, WC_ML_KEM_SS_SZ) != 0) - ret = ML_KEM_PCT_E; - } - - ForceZero(pct_ss1, sizeof(pct_ss1)); - ForceZero(pct_ss2, sizeof(pct_ss2)); - if (WC_VAR_OK(pct_ct)) - ForceZero(pct_ct, WC_ML_KEM_MAX_CIPHER_TEXT_SIZE); - - WC_FREE_VAR_EX(pct_ct, key->heap, DYNAMIC_TYPE_TMP_BUFFER); - - /* FIPS 140-3 IG 10.3.A (TE10.35.02): a key pair that fails the PCT - * must be rendered unusable. Zeroize the generated key material so - * a caller that ignores the return value cannot use it. */ - if (ret != 0) { - wc_MlKemKey_Free(key); - } - } -#endif /* HAVE_FIPS */ + /* PCT now lives in wc_MlKemKey_MakeKeyWithRandom() (called above) so both + * the random-seeded path (here) and the caller-supplied-seed path exercise + * the FIPS 140-3 IG 10.3.A 1.B Pairwise Consistency Test. + * Audit A16-1: PCT previously lived only here, leaving the + * deterministic-seed entry uncovered. */ /* Ensure seeds are zeroized. */ ForceZero((void*)rand, (word32)sizeof(rand)); @@ -987,8 +957,67 @@ int wc_MlKemKey_MakeKeyWithRandom(MlKemKey* key, const unsigned char* rand, ForceZero(e, (size_t)(k * MLKEM_N) * sizeof(sword16)); #endif - /* Note: PCT is performed in wc_MlKemKey_MakeKey() which calls this - * function and has the RNG parameter needed for encapsulation. */ +#ifdef HAVE_FIPS + /* Pairwise Consistency Test (PCT) per FIPS 140-3 IG 10.3.A 1.B and + * ISO/IEC 19790:2012 Section 7.10.3.3: encapsulate with the generated + * encapsulation key (ek), decapsulate with the matching decapsulation + * key (dk), and verify the recovered shared secret matches. This is a + * deterministic key-gen path with no caller RNG, so the PCT uses + * wc_MlKemKey_EncapsulateWithRandom() with a fixed 32-byte `m` (FIPS 203 + * Algorithm 17 input); `m` need not be unpredictable for a PCT roundtrip. + * + * Audit A16-1: PCT previously lived only in wc_MlKemKey_MakeKey (which + * seeds `rand` from the DRBG), leaving this deterministic-seed entry + * without PCT coverage. */ + if (ret == 0) { + WC_DECLARE_VAR(pct_ct, byte, WC_ML_KEM_MAX_CIPHER_TEXT_SIZE, + key->heap); + byte pct_ss1[WC_ML_KEM_SS_SZ]; + byte pct_ss2[WC_ML_KEM_SS_SZ]; + word32 pct_ctSz = 0; + /* Fixed 32-byte test pattern for FIPS 203 Alg 17 `m` parameter. + * Value is arbitrary - PCT only requires encap/decap roundtrip, + * not encap unpredictability. */ + static const byte pct_m[WC_ML_KEM_ENC_RAND_SZ] = { + 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, + 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, + 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, + 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB + }; + + WC_ALLOC_VAR_EX(pct_ct, byte, WC_ML_KEM_MAX_CIPHER_TEXT_SIZE, + key->heap, DYNAMIC_TYPE_TMP_BUFFER, ret = MEMORY_E); + + if (ret == 0) + ret = wc_MlKemKey_CipherTextSize(key, &pct_ctSz); + + if (ret == 0) + ret = wc_MlKemKey_EncapsulateWithRandom(key, pct_ct, pct_ss1, + pct_m, (int)sizeof(pct_m)); + + if (ret == 0) + ret = wc_MlKemKey_Decapsulate(key, pct_ss2, pct_ct, pct_ctSz); + + if (ret == 0) { + if (XMEMCMP(pct_ss1, pct_ss2, WC_ML_KEM_SS_SZ) != 0) + ret = ML_KEM_PCT_E; + } + + ForceZero(pct_ss1, sizeof(pct_ss1)); + ForceZero(pct_ss2, sizeof(pct_ss2)); + if (WC_VAR_OK(pct_ct)) + ForceZero(pct_ct, WC_ML_KEM_MAX_CIPHER_TEXT_SIZE); + + WC_FREE_VAR_EX(pct_ct, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + + /* FIPS 140-3 IG 10.3.A (TE10.35.02): a key pair that fails the PCT + * must be rendered unusable. Zeroize the generated key material so + * a caller that ignores the return value cannot use it. */ + if (ret != 0) { + wc_MlKemKey_Free(key); + } + } +#endif /* HAVE_FIPS */ return ret; } diff --git a/wolfcrypt/src/wc_mlkem_poly.c b/wolfcrypt/src/wc_mlkem_poly.c index cd067b46e78..8ed957f77dc 100644 --- a/wolfcrypt/src/wc_mlkem_poly.c +++ b/wolfcrypt/src/wc_mlkem_poly.c @@ -71,6 +71,15 @@ #include +#if FIPS_VERSION3_GE(2,0,0) + /* Keep this ML-KEM (FIPS 203) implementation's code/const inside the FIPS + * in-core integrity boundary (Windows orders it by named sections). */ + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$nb") + #pragma const_seg(".fipsB$nb") + #endif +#endif + #ifdef WC_MLKEM_NO_ASM #undef USE_INTEL_SPEEDUP #undef WOLFSSL_ARMASM diff --git a/wolfcrypt/src/wc_slhdsa.c b/wolfcrypt/src/wc_slhdsa.c index 4d24d5ff968..02de84d1092 100644 --- a/wolfcrypt/src/wc_slhdsa.c +++ b/wolfcrypt/src/wc_slhdsa.c @@ -26,6 +26,14 @@ #if FIPS_VERSION3_GE(2,0,0) /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ #define FIPS_NO_WRAPPERS + + /* Windows orders the FIPS in-core integrity boundary by named linker + * sections. Keep SLH-DSA (FIPS 205) code/const inside the boundary, + * sorted between sha3 (.fipsA$n) and fips.c (.fipsA$o). */ + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$nh") + #pragma const_seg(".fipsB$nh") + #endif #endif #include @@ -3286,6 +3294,9 @@ static int slhdsakey_wots_pkgen_chain_c(SlhDsaKey* key, const byte* sk_seed, if (ret == 0) XMEMSET(sk, 0, (SLHDSA_MAX_MSG_SZ + 3) * SLHDSA_MAX_N); if (ret == 0) { + /* Zero the WOTS+ leaf buffer up front: defensive clearing of secret + * key material that also avoids a -Wmaybe-uninitialized read of sk. */ + XMEMSET(sk, 0, (SLHDSA_MAX_MSG_SZ + 3) * SLHDSA_MAX_N); /* Step 4. len consecutive addresses. */ for (i = 0; i < len; i++) { /* Step 5. Set chain address for WOTS PRF. */ @@ -7006,6 +7017,46 @@ int wc_SlhDsaKey_MakeKey(SlhDsaKey* key, WC_RNG* rng) key->sk + 2 * n, n); } +#ifdef HAVE_FIPS + /* Pairwise Consistency Test (PCT) per FIPS 140-3 IG 10.3.A (TE10.35.02): + * sign with the new sk, verify with the matching pk. SLH-DSA (FIPS 205) + * is stateless, so the relaxed PCT rule for stateful HBS (LMS/XMSS) does + * not apply -- PCT runs on every KeyGen. SignDeterministic avoids + * consuming RNG state; sigLen is heap-allocated for this exact variant + * (~8 KB to 50 KB across SLH-DSA variants). */ + if (ret == 0) { + static const byte pct_msg[] = "wolfSSL SLH-DSA PCT"; + word32 pct_sigLen = key->params->sigLen; + byte* pct_sig = (byte*)XMALLOC(pct_sigLen, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + word32 pct_sigSz = pct_sigLen; + + if (pct_sig == NULL) { + ret = MEMORY_E; + } + if (ret == 0) { + ret = wc_SlhDsaKey_SignDeterministic(key, NULL, 0, + pct_msg, sizeof(pct_msg), pct_sig, &pct_sigSz); + } + if (ret == 0) { + ret = wc_SlhDsaKey_Verify(key, NULL, 0, + pct_msg, sizeof(pct_msg), pct_sig, pct_sigSz); + if (ret != 0) { + ret = SLH_DSA_PCT_E; + } + } + if (pct_sig != NULL) { + ForceZero(pct_sig, pct_sigLen); + XFREE(pct_sig, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + /* IG 10.3.A (TE10.35.02): a key pair that fails the PCT must be + * rendered unusable. */ + if (ret != 0) { + wc_SlhDsaKey_Free(key); + } + } +#endif /* HAVE_FIPS */ + return ret; } @@ -7952,6 +8003,97 @@ static const byte slhdsakey_oid_sha3_512[] = { #endif #endif +/* HashSLH-DSA PH-vs-paramSet enforcement. + * + * FIPS 205 sec. 10.2.2 (Table 9): the pre-hash PH must have collision- + * resistance >= the paramSet security level (key->params->n in bytes): + * n = 16 (128-bit): SHA2-256, SHA2-384, SHA2-512, SHA2-512/256, + * SHA3-256, SHA3-384, SHA3-512, + * SHAKE-128, SHAKE-256 + * n = 24 (192-bit): SHA2-384, SHA2-512, SHA3-384, SHA3-512, SHAKE-256 + * n = 32 (256-bit): SHA2-512, SHA3-512, SHAKE-256 + * + * Returns 0 if approved, else BAD_FUNC_ARG (including any hash off the list, + * e.g. SHA-224, SHA-512/224, SHA3-224). + */ +static int slhdsa_check_hash_for_n(enum wc_HashType hashType, byte n) +{ + int strengthBits; + int requiredBits; + + switch ((int)hashType) { + #ifndef NO_SHA256 + case WC_HASH_TYPE_SHA256: + strengthBits = 128; + break; + #endif + #ifdef WOLFSSL_SHA384 + case WC_HASH_TYPE_SHA384: + strengthBits = 192; + break; + #endif + #ifdef WOLFSSL_SHA512 + case WC_HASH_TYPE_SHA512: + strengthBits = 256; + break; + #ifndef WOLFSSL_NOSHA512_256 + case WC_HASH_TYPE_SHA512_256: + /* SHA-512/256 has 128-bit collision resistance (truncated). */ + strengthBits = 128; + break; + #endif + #endif + #ifdef WOLFSSL_SHA3 + #ifndef WOLFSSL_NOSHA3_256 + case WC_HASH_TYPE_SHA3_256: + strengthBits = 128; + break; + #endif + #ifndef WOLFSSL_NOSHA3_384 + case WC_HASH_TYPE_SHA3_384: + strengthBits = 192; + break; + #endif + #ifndef WOLFSSL_NOSHA3_512 + case WC_HASH_TYPE_SHA3_512: + strengthBits = 256; + break; + #endif + #endif + #ifdef WOLFSSL_SHAKE128 + case WC_HASH_TYPE_SHAKE128: + strengthBits = 128; + break; + #endif + #ifdef WOLFSSL_SHAKE256 + case WC_HASH_TYPE_SHAKE256: + strengthBits = 256; + break; + #endif + default: + /* Hash not on the FIPS 205 Table 9 approved list. */ + return BAD_FUNC_ARG; + } + + if (n == WC_SLHDSA_N_128) { + requiredBits = 128; + } + else if (n == WC_SLHDSA_N_192) { + requiredBits = 192; + } + else if (n == WC_SLHDSA_N_256) { + requiredBits = 256; + } + else { + return BAD_FUNC_ARG; + } + + if (strengthBits < requiredBits) { + return BAD_FUNC_ARG; + } + return 0; +} + /* Validate the caller-supplied pre-hashed digest length and look up the * corresponding OID for the chosen hash algorithm. * @@ -8169,6 +8311,14 @@ static int slhdsakey_signhash_external(SlhDsaKey* key, const byte* ctx, (sigSz == NULL)) { ret = BAD_FUNC_ARG; } + /* HashSLH-DSA requires an explicit, approved pre-hash algorithm. + * WC_HASH_TYPE_NONE (the "pure SLH-DSA" sentinel) is never valid here; + * reject it explicitly (FIPS 205 Section 10.2.2 / Table 9) rather than + * via the slhdsa_check_hash_for_n() switch default, so it survives any + * future reorder of the validators or a NONE case added to that switch. */ + else if (hashType == WC_HASH_TYPE_NONE) { + ret = BAD_FUNC_ARG; + } /* Check sig buffer is large enough to hold generated signature. */ else if (*sigSz < key->params->sigLen) { ret = BAD_LENGTH_E; @@ -8178,6 +8328,12 @@ static int slhdsakey_signhash_external(SlhDsaKey* key, const byte* ctx, /* Alg 23, Step 6: Return error. */ ret = BAD_FUNC_ARG; } + /* FIPS 205 sec. 10.2.2 Table 9: enforce PH <-> paramSet matching before + * pre-hashing the message. Rejects PHs whose collision-resistance + * strength is below the paramSet's security level (n). */ + if (ret == 0) { + ret = slhdsa_check_hash_for_n(hashType, key->params->n); + } if (ret == 0) { /* Alg 23, Steps 8-23: Validate caller-supplied pre-hashed digest length * and select OID for the chosen hash algorithm. */ @@ -8412,8 +8568,11 @@ int wc_SlhDsaKey_SignHash(SlhDsaKey* key, const byte* ctx, byte ctxSz, ret = MISSING_KEY; } /* First sanity check on hashType; the downstream prehash validator does - * the detailed check for the actual type. */ - else if ((word32)hashType > (word32)WC_HASH_TYPE_MAX) { + * the detailed check. Reject WC_HASH_TYPE_NONE here too -- never a valid + * pre-hash (FIPS 205 Section 10.2.2 / Table 9) -- so a known-invalid call + * fails before consuming DRBG output below. */ + else if ((hashType == WC_HASH_TYPE_NONE) || + ((word32)hashType > (word32)WC_HASH_TYPE_MAX)) { ret = BAD_FUNC_ARG; } @@ -8542,6 +8701,12 @@ int wc_SlhDsaKey_VerifyHash(SlhDsaKey* key, const byte* ctx, byte ctxSz, } #ifdef WOLF_CRYPTO_CB + /* FIPS 205 sec. 10.2.2 Table 9: enforce PH <-> paramSet matching. + * Rejects PHs whose collision-resistance strength is below the + * paramSet's security level (n). */ + if (ret == 0) { + ret = slhdsa_check_hash_for_n(hashType, key->params->n); + } if (ret == 0) { #ifndef WOLF_CRYPTO_CB_FIND if (key->devId != INVALID_DEVID) diff --git a/wolfcrypt/src/wc_xmss.c b/wolfcrypt/src/wc_xmss.c index b4f4c761850..3f0105030c6 100644 --- a/wolfcrypt/src/wc_xmss.c +++ b/wolfcrypt/src/wc_xmss.c @@ -28,6 +28,14 @@ #if FIPS_VERSION3_GE(2,0,0) /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ #define FIPS_NO_WRAPPERS + + /* Windows orders the FIPS in-core integrity boundary by named linker + * sections. Keep XMSS (SP 800-208) code/const inside the boundary, + * sorted between sha3 (.fipsA$n) and fips.c (.fipsA$o). */ + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$nf") + #pragma const_seg(".fipsB$nf") + #endif #endif #include #include diff --git a/wolfcrypt/src/wc_xmss_impl.c b/wolfcrypt/src/wc_xmss_impl.c index 9029fca4a06..135fc401116 100644 --- a/wolfcrypt/src/wc_xmss_impl.c +++ b/wolfcrypt/src/wc_xmss_impl.c @@ -33,6 +33,15 @@ #include +#if FIPS_VERSION3_GE(2,0,0) + /* Keep this XMSS (SP 800-208) implementation's code/const inside the FIPS + * in-core integrity boundary (Windows orders it by named sections). */ + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$ng") + #pragma const_seg(".fipsB$ng") + #endif +#endif + #include #include diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c index 6e574e3b311..7ca294cff77 100644 --- a/wolfcrypt/test/test.c +++ b/wolfcrypt/test/test.c @@ -137,7 +137,7 @@ static const byte const_byte_array[] = "A+Gd\0\0\0"; esp_start_heap = esp_this_heap; \ } \ ESP_LOGI(ESPIDF_TAG, "%s #%d; Heap free: %d", \ - ((b) ? (b) : ""), /* breadcumb string */ \ + ((b) ? (b) : ""), /* breadcrumb string */ \ ((i) ? (i) : 0), /* index */ \ esp_this_heap); @@ -56056,6 +56056,132 @@ static wc_test_ret_t mldsa_param_test(int param, WC_RNG* rng) #endif return ret; } + +#if !defined(WOLFSSL_DILITHIUM_NO_SIGN) && \ + !defined(WOLFSSL_DILITHIUM_NO_VERIFY) +/* Negative test: HashML-DSA must reject a pre-hash whose collision resistance + * is below the parameter set's claimed security strength (FIPS 204 sec. 5.4, + * Table 4: approved PH per level). Targets here: + * ML-DSA-65 (192-bit): SHA-256 (128-bit) disallowed. + * ML-DSA-87 (256-bit): SHA-384 (192-bit) disallowed. + * Asserts sigGen and sigVer both reject (non-zero). Fails until the in-module + * hash-vs-paramSet check exists in wc_dilithium_{sign,verify}_ctx_hash. */ +static wc_test_ret_t mldsa_hash_paramset_rejection_test(WC_RNG* rng) +{ + wc_test_ret_t ret = 0; + int i; +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + dilithium_key* key = NULL; + byte* sig = NULL; +#else + dilithium_key key[1]; + byte sig[DILITHIUM_MAX_SIG_SIZE]; +#endif + word32 sigLen; + int verified; + + /* Fixed-content digests; only the (paramSet, hashAlg, hashLen) tuple + * matters for rejection. Sizes match each digest length so the length + * sanity check in wc_dilithium_*_ctx_hash() does not short-circuit before + * the hash-vs-paramSet gate under test. */ + static const byte hash32[32] = { /* SHA-256 digest size */ + 0xBA,0x78,0x16,0xBF,0x8F,0x01,0xCF,0xEA, + 0x41,0x41,0x40,0xDE,0x5D,0xAE,0x22,0x23, + 0xB0,0x03,0x61,0xA3,0x96,0x17,0x7A,0x9C, + 0xB4,0x10,0xFF,0x61,0xF2,0x00,0x15,0xAD + }; + static const byte hash48[48] = { /* SHA-384 digest size */ + 0xCB,0x00,0x75,0x3F,0x45,0xA3,0x5E,0x8B, + 0xB5,0xA0,0x3D,0x69,0x9A,0xC6,0x50,0x07, + 0x27,0x2C,0x32,0xAB,0x0E,0xDE,0xD1,0x63, + 0x1A,0x8B,0x60,0x5A,0x43,0xFF,0x5B,0xED, + 0x80,0x86,0x07,0x2B,0xA1,0xE7,0xCC,0x23, + 0x58,0xBA,0xEC,0xA1,0x34,0xC8,0x25,0xA7 + }; + + struct { + int level; + int hashAlg; + const byte* hash; + word32 hashLen; + } forbidden[] = { + /* ML-DSA-65 needs >=192-bit collision strength; SHA-256 = 128-bit. */ + { WC_ML_DSA_65, WC_HASH_TYPE_SHA256, hash32, 32 }, + /* ML-DSA-87 needs >=256-bit collision strength; SHA-384 = 192-bit. */ + { WC_ML_DSA_87, WC_HASH_TYPE_SHA384, hash48, 48 } + }; + +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + key = (dilithium_key*)XMALLOC(sizeof(*key), HEAP_HINT, + DYNAMIC_TYPE_TMP_BUFFER); + sig = (byte*)XMALLOC(DILITHIUM_MAX_SIG_SIZE, HEAP_HINT, + DYNAMIC_TYPE_TMP_BUFFER); + if ((key == NULL) || (sig == NULL)) { + ERROR_OUT(WC_TEST_RET_ENC_ERRNO, neg_out); + } +#endif + XMEMSET(sig, 0, DILITHIUM_MAX_SIG_SIZE); + + for (i = 0; i < (int)(sizeof(forbidden) / sizeof(forbidden[0])); i++) { + #ifdef WOLFSSL_NO_ML_DSA_65 + if (forbidden[i].level == WC_ML_DSA_65) continue; + #endif + #ifdef WOLFSSL_NO_ML_DSA_87 + if (forbidden[i].level == WC_ML_DSA_87) continue; + #endif + + ret = wc_dilithium_init_ex(key, NULL, devId); + if (ret != 0) { + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), neg_out); + } + ret = wc_dilithium_set_level(key, (byte)forbidden[i].level); + if (ret != 0) { + wc_dilithium_free(key); + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), neg_out); + } + ret = wc_dilithium_make_key(key, rng); + if (ret != 0) { + wc_dilithium_free(key); + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), neg_out); + } + + sigLen = (word32)wc_dilithium_sig_size(key); + + /* sigGen with disallowed PH must be REJECTED. */ + PRIVATE_KEY_UNLOCK(); + ret = wc_dilithium_sign_ctx_hash(NULL, 0, forbidden[i].hashAlg, + forbidden[i].hash, forbidden[i].hashLen, sig, &sigLen, key, rng); + PRIVATE_KEY_LOCK(); + if (ret == 0) { + /* Module did NOT reject -- this is the missing-enforcement bug. */ + wc_dilithium_free(key); + ERROR_OUT(WC_TEST_RET_ENC_NC, neg_out); + } + + /* sigVer with disallowed PH must ALSO be REJECTED. */ + verified = -1; + sigLen = (word32)wc_dilithium_sig_size(key); + ret = wc_dilithium_verify_ctx_hash(sig, sigLen, NULL, 0, + forbidden[i].hashAlg, forbidden[i].hash, forbidden[i].hashLen, + &verified, key); + if (ret == 0) { + wc_dilithium_free(key); + ERROR_OUT(WC_TEST_RET_ENC_NC, neg_out); + } + + wc_dilithium_free(key); + ret = 0; + } + +neg_out: +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + if (sig != NULL) XFREE(sig, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + if (key != NULL) XFREE(key, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ret; +} +#endif /* !WOLFSSL_DILITHIUM_NO_SIGN && !WOLFSSL_DILITHIUM_NO_VERIFY */ + #endif #if defined(WC_MLDSA_CACHE_MATRIX_A) && \ @@ -56501,6 +56627,18 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t mldsa_test(void) #endif /* (WOLFSSL_MLDSA_PUBLIC_KEY && !WOLFSSL_MLDSA_NO_VERIFY) || * (WOLFSSL_MLDSA_PRIVATE_KEY && !WOLFSSL_MLDSA_NO_SIGN) */ +#if !defined(WOLFSSL_MLDSA_NO_MAKE_KEY) && \ + !defined(WOLFSSL_MLDSA_NO_SIGN) && \ + !defined(WOLFSSL_MLDSA_NO_VERIFY) && \ + (!defined(WOLFSSL_NO_ML_DSA_65) || !defined(WOLFSSL_NO_ML_DSA_87)) + /* FIPS 204 sec. 5.4 -- HashML-DSA must reject pre-hashes weaker than + * the parameter set's security level. */ + ret = mldsa_hash_paramset_rejection_test(&rng); + if (ret != 0) { + ERROR_OUT(ret, out); + } +#endif + #if !defined(WOLFSSL_MLDSA_NO_MAKE_KEY) || \ !defined(WOLFSSL_MLDSA_NO_VERIFY) || \ defined(WOLFSSL_MLDSA_PRIVATE_KEY) || \ @@ -57946,29 +58084,17 @@ static wc_test_ret_t slhdsa_test_param(enum SlhDsaParam param) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); } - /* HashSLH-DSA takes the caller's pre-hashed digest as input. */ + /* HashSLH-DSA takes the caller's pre-hashed digest as input. SHAKE-256 is + * approved for all SLH-DSA-{128,192,256} variants (FIPS 205 sec. 10.2.2 + * Table 9), so use it unconditionally for the positive round-trip -- it + * never trips the hash-vs-paramSet gate for higher-security paramSets. */ { -#ifdef WOLFSSL_SLHDSA_SHA2 - enum wc_HashType phType = SLHDSA_IS_SHA2(param) ? - WC_HASH_TYPE_SHA256 : WC_HASH_TYPE_SHAKE256; -#else enum wc_HashType phType = WC_HASH_TYPE_SHAKE256; -#endif byte digest[WC_SHA3_512_DIGEST_SIZE]; - word32 digestLen; + word32 digestLen = WC_SHA3_512_DIGEST_SIZE; -#ifdef WOLFSSL_SLHDSA_SHA2 - if (phType == WC_HASH_TYPE_SHA256) { - ret = wc_Sha256Hash(msg, (word32)sizeof(msg), digest); - digestLen = WC_SHA256_DIGEST_SIZE; - } - else -#endif - { - ret = wc_Shake256Hash(msg, (word32)sizeof(msg), digest, - WC_SHA3_512_DIGEST_SIZE); - digestLen = WC_SHA3_512_DIGEST_SIZE; - } + ret = wc_Shake256Hash(msg, (word32)sizeof(msg), digest, + WC_SHA3_512_DIGEST_SIZE); if (ret != 0) { ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); } @@ -57987,9 +58113,11 @@ static wc_test_ret_t slhdsa_test_param(enum SlhDsaParam param) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); } - /* Additional pre-hash test: SHA-384 exercises a different OID path */ + /* Additional pre-hash test: SHA-384 exercises a different OID path. Skip + * for SLH-DSA-256 -- SHA-384 (192-bit collision) is below its 256-bit + * security level (FIPS 205 sec. 10.2.2 Table 9). */ #ifdef WOLFSSL_SHA384 - { + if (key->params->n != WC_SLHDSA_N_256) { byte digest384[WC_SHA384_DIGEST_SIZE]; ret = wc_Sha384Hash(msg, (word32)sizeof(msg), digest384); @@ -58049,6 +58177,87 @@ static wc_test_ret_t slhdsa_test_param(enum SlhDsaParam param) return ret; } + +/* Negative test: HashSLH-DSA must reject a pre-hash whose collision resistance + * is below the parameter set's claimed security strength (FIPS 205 sec. + * 10.2.2, Table 9: approved PH per level). Here a 192/256-bit paramSet is + * given SHA-256 (128-bit, approved only for 128-bit paramSets). Asserts + * sigGen and sigVer both reject. Fails until the in-module hash-vs-paramSet + * check exists in wc_SlhDsaKey_{Sign,Verify}Hash. */ +static wc_test_ret_t slhdsa_hash_paramset_rejection_test(enum SlhDsaParam param) +{ + int ret = 0; + WC_RNG rng; + SlhDsaKey key[1]; + byte sig[WC_SLHDSA_MAX_SIG_LEN]; + word32 sigLen; + static const byte msg[] = { + 0x48,0x65,0x6c,0x6c,0x6f,0x20,0x57,0x6f, + 0x72,0x6c,0x64,0x21 + }; + byte ctx[1]; + /* SHA-256 (128-bit collision) is approved only for 128-bit paramSets, so + * any 192/256-bit paramSet must reject it. */ + enum wc_HashType badHash = WC_HASH_TYPE_SHA256; + + XMEMSET(&key, 0, sizeof(key)); + +#ifndef HAVE_FIPS + ret = wc_InitRng_ex(&rng, HEAP_HINT, devId); +#else + ret = wc_InitRng(&rng); +#endif + if (ret != 0) return WC_TEST_RET_ENC_EC(ret); + + ret = wc_SlhDsaKey_Init(key, param, NULL, INVALID_DEVID); + if (ret != 0) { + wc_FreeRng(&rng); + return WC_TEST_RET_ENC_EC(ret); + } + + ret = wc_SlhDsaKey_MakeKey(key, &rng); + if (ret != 0) { + wc_SlhDsaKey_Free(key); + wc_FreeRng(&rng); + return WC_TEST_RET_ENC_EC(ret); + } + + /* Only enforce on paramSets above 128-bit security; SHA-256 is approved + * for 128-bit so wouldn't be a rejection target there. */ + if (key->params->n == WC_SLHDSA_N_128) { + wc_SlhDsaKey_Free(key); + wc_FreeRng(&rng); + return 0; + } + + /* sigGen with too-weak PH must be REJECTED. */ + sigLen = WC_SLHDSA_MAX_SIG_LEN; + PRIVATE_KEY_UNLOCK(); + ret = wc_SlhDsaKey_SignHash(key, ctx, 0, msg, (word32)sizeof(msg), + badHash, sig, &sigLen, &rng); + PRIVATE_KEY_LOCK(); + if (ret == 0) { + /* Module did NOT reject -- this is the missing-enforcement bug. */ + wc_SlhDsaKey_Free(key); + wc_FreeRng(&rng); + return WC_TEST_RET_ENC_NC; + } + + /* sigVer with too-weak PH must ALSO be REJECTED. */ + sigLen = WC_SLHDSA_MAX_SIG_LEN; + XMEMSET(sig, 0, sigLen); + ret = wc_SlhDsaKey_VerifyHash(key, ctx, 0, msg, (word32)sizeof(msg), + badHash, sig, sigLen); + if (ret == 0) { + wc_SlhDsaKey_Free(key); + wc_FreeRng(&rng); + return WC_TEST_RET_ENC_NC; + } + + wc_SlhDsaKey_Free(key); + wc_FreeRng(&rng); + return 0; +} #endif /* True iff slhdsa_test() actually emits at least one `goto out;` / @@ -60042,6 +60251,39 @@ wc_test_ret_t slhdsa_test(void) } #endif + /* FIPS 205 sec. 10.2.2 -- HashSLH-DSA must reject pre-hashes below the + * paramSet's security level. Use any available 192/256-bit paramSet; + * 128-bit paramSets allow SHA-256 so are not useful targets here. */ +#ifdef WOLFSSL_SLHDSA_PARAM_192S + ret = slhdsa_hash_paramset_rejection_test(SLHDSA_SHAKE192S); + if (ret != 0) { + wc_test_render_error_message("SLHDSA_SHAKE192S (hash-paramset reject)", + 0); + goto out; + } +#elif defined(WOLFSSL_SLHDSA_PARAM_256S) + ret = slhdsa_hash_paramset_rejection_test(SLHDSA_SHAKE256S); + if (ret != 0) { + wc_test_render_error_message("SLHDSA_SHAKE256S (hash-paramset reject)", + 0); + goto out; + } +#elif defined(WOLFSSL_SLHDSA_PARAM_SHA2_192S) + ret = slhdsa_hash_paramset_rejection_test(SLHDSA_SHA2_192S); + if (ret != 0) { + wc_test_render_error_message("SLHDSA_SHA2_192S (hash-paramset reject)", + 0); + goto out; + } +#elif defined(WOLFSSL_SLHDSA_PARAM_SHA2_256S) + ret = slhdsa_hash_paramset_rejection_test(SLHDSA_SHA2_256S); + if (ret != 0) { + wc_test_render_error_message("SLHDSA_SHA2_256S (hash-paramset reject)", + 0); + goto out; + } +#endif + #endif /* !WOLFSSL_SLHDSA_VERIFY_ONLY */ #if defined(WOLF_PRIVATE_KEY_ID) && \ diff --git a/wolfssl/wolfcrypt/aes.h b/wolfssl/wolfcrypt/aes.h index c0d6a789bf9..e5f434a0e80 100644 --- a/wolfssl/wolfcrypt/aes.h +++ b/wolfssl/wolfcrypt/aes.h @@ -66,8 +66,13 @@ typedef struct Gcm { #endif WOLFSSL_LOCAL void GenerateM0(Gcm* gcm); +/* This two-byte-pointer GMULT is the GCM_SMALL form; GCM_TABLE/ + * GCM_TABLE_4BIT use a static GMULT taking byte m[N][16]. Scope to GCM_SMALL + * so it doesn't clash with the table-mode GMULT on 32-bit ARM armasm + + * WOLFSSL_AESGCM_STREAM (streaming path now uses the software table GHASH). + * See SP 800-38D AES-GCM GHASH. */ #if !defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) + !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) && defined(GCM_SMALL) WOLFSSL_LOCAL void GMULT(byte* X, byte* Y); #endif WOLFSSL_LOCAL void WC_ARG_NOT_NULL(1) GHASH(Gcm* gcm, const byte* a, diff --git a/wolfssl/wolfcrypt/error-crypt.h b/wolfssl/wolfcrypt/error-crypt.h index 5b089f118b4..286a8739f44 100644 --- a/wolfssl/wolfcrypt/error-crypt.h +++ b/wolfssl/wolfcrypt/error-crypt.h @@ -327,9 +327,17 @@ enum wolfCrypt_ErrorCodes { ML_DSA_PCT_E = -1016, /* ML-DSA Pairwise Consistency Test failure */ DRBG_SHA512_KAT_FIPS_E = -1017, /* SHA-512 DRBG KAT failure */ SLH_DSA_KAT_FIPS_E = -1018, /* SLH-DSA CAST KAT failure */ - - WC_SPAN2_LAST_E = -1018, /* Update to indicate last used error code */ - WC_LAST_E = -1018, /* the last code used either here or in + SLH_DSA_PCT_E = -1019, /* SLH-DSA Pairwise Consistency Test failure */ + CMAC_KAT_FIPS_E = -1020, /* AES-CMAC KAT failure (vendor-elected) */ + SHAKE_KAT_FIPS_E = -1021, /* SHAKE KAT failure (vendor-elected) */ + DH_PCT_E = -1022, /* DH (FFC) Pairwise Consistency Test + * failure (SP 800-56A r3 sec 5.6.2.1.4, + * FIPS 140-3 IG 10.3.B) */ + AES_KW_KAT_FIPS_E = -1023, /* AES-KW KAT failure (vendor-elected, + * SP 800-38F sec 6.2 / RFC 3394) */ + + WC_SPAN2_LAST_E = -1023, /* Update to indicate last used error code */ + WC_LAST_E = -1023, /* the last code used either here or in * error-ssl.h */ WC_SPAN2_MIN_CODE_E = -1999, /* Last usable code in span 2 */ diff --git a/wolfssl/wolfcrypt/fips_test.h b/wolfssl/wolfcrypt/fips_test.h index de2b506df2c..38d65af5a36 100644 --- a/wolfssl/wolfcrypt/fips_test.h +++ b/wolfssl/wolfcrypt/fips_test.h @@ -31,8 +31,22 @@ extern "C" { #endif -/* Added for FIPS v5.3 or later */ -#if defined(FIPS_VERSION_GE) && FIPS_VERSION_GE(5,3) +/* Added for FIPS v5.3 or later. + * + * v7.0.0+ upgrades the in-core integrity HMAC to SHA-512 (512-bit key) for + * NSA 2.0 compliance, leaving no SHA-256 integrity material in the module. + * v5.3 and v6.x retain HMAC-SHA-256. + */ +#if defined(FIPS_VERSION_GE) && FIPS_VERSION_GE(7,0) + #ifdef WOLFSSL_SHA512 + #define FIPS_IN_CORE_DIGEST_SIZE 64 + #define FIPS_IN_CORE_HASH_TYPE WC_SHA512 + #define FIPS_IN_CORE_KEY_SZ 64 + #define FIPS_IN_CORE_VERIFY_SZ FIPS_IN_CORE_KEY_SZ + #else + #error FIPS v7+ integrity test requires WOLFSSL_SHA512 + #endif +#elif defined(FIPS_VERSION_GE) && FIPS_VERSION_GE(5,3) /* Determine FIPS in core hash type and size */ #ifndef NO_SHA256 #define FIPS_IN_CORE_DIGEST_SIZE 32 @@ -62,7 +76,10 @@ enum FipsCastId { FIPS_CAST_RSA_SIGN_PKCS1v15 = 7, FIPS_CAST_ECC_CDH = 8, FIPS_CAST_ECC_PRIMITIVE_Z = 9, - FIPS_CAST_DH_PRIMITIVE_Z = 10, + FIPS_CAST_DH_PRIMITIVE_Z = 10, /* RETIRED (v7+): classic DH dropped + * from the FIPS 140-3 v7 PQ module + * boundary. Kept for ABI; do not + * reuse this id. */ FIPS_CAST_ECDSA = 11, FIPS_CAST_KDF_TLS12 = 12, FIPS_CAST_KDF_TLS13 = 13, @@ -80,7 +97,10 @@ enum FipsCastId { FIPS_CAST_XMSS = 23, FIPS_CAST_DRBG_SHA512 = 24, FIPS_CAST_SLH_DSA = 25, - FIPS_CAST_COUNT = 26 + FIPS_CAST_AES_CMAC = 26, + FIPS_CAST_SHAKE = 27, + FIPS_CAST_AES_KW = 28, + FIPS_CAST_COUNT = 29 }; enum FipsCastStateId { diff --git a/wolfssl/wolfcrypt/random.h b/wolfssl/wolfcrypt/random.h index 102f05d6b55..339c9f6fa13 100644 --- a/wolfssl/wolfcrypt/random.h +++ b/wolfssl/wolfcrypt/random.h @@ -57,8 +57,12 @@ #define DRBG_SEED_LEN (440/8) #endif +/* Size of the DRBG seed (SHA-512) */ #ifdef WOLFSSL_DRBG_SHA512 - #define DRBG_SHA512_SEED_LEN (888/8) /* 111 bytes per SP 800-90A Table 2 */ + #ifndef DRBG_SHA512_SEED_LEN + #define DRBG_SHA512_SEED_LEN (888/8) /* 111 bytes per SP 800-90A + * Table 2 */ + #endif #endif @@ -212,12 +216,16 @@ struct OS_Seed { */ #define ENTROPY_SCALE_FACTOR (512) #elif defined(HAVE_INTEL_RDSEED) || defined(HAVE_INTEL_RDRAND) - /* The value of 2 applies to Intel's RDSEED which provides about - * 0.5 bits minimum of entropy per bit. The value of 4 gives a - * conservative margin for FIPS. */ + /* Intel RDSEED nominally provides ~0.5 bits min entropy per bit + * (NIST CMVP cert3389 PUD). As of v7, FIPS mode uses scale=512 on + * Intel too (was 8), matching the AMD worst case: AMD "Tyzen + * V1xxxx" PUD Table 3 documents 0.656040 bits per 128-bit block as + * the floor across the CMVP-validated AMD family. One worst-case + * seeding budget then covers any x86 OE. Non-FIPS Intel keeps the + * lighter scale=2 (Intel-PUD-derived) for performance. */ #if defined(HAVE_FIPS) && defined(HAVE_FIPS_VERSION) && \ (HAVE_FIPS_VERSION >= 2) - #define ENTROPY_SCALE_FACTOR (2*4) + #define ENTROPY_SCALE_FACTOR (512) #else /* Not FIPS, but Intel RDSEED, only double. */ #define ENTROPY_SCALE_FACTOR (2) diff --git a/wolfssl/wolfcrypt/settings.h b/wolfssl/wolfcrypt/settings.h index e3a3b884a46..eee88558145 100644 --- a/wolfssl/wolfcrypt/settings.h +++ b/wolfssl/wolfcrypt/settings.h @@ -557,6 +557,15 @@ #endif /* blinding adds API not available yet in FIPS mode */ #undef WC_RSA_BLINDING + + /* NIST SP 800-38A sec 6.2: CBC requires plaintext a multiple of the + * block size, and the cipher does not pad (project_aes_no_padding_policy). + * Force the wc_AesCbcEncrypt / wc_AesCbcDecrypt block-alignment check + * so a length not a multiple of WC_AES_BLOCK_SIZE returns BAD_LENGTH_E + * instead of silently truncating to the largest aligned prefix. */ + #ifndef WOLFSSL_AES_CBC_LENGTH_CHECKS + #define WOLFSSL_AES_CBC_LENGTH_CHECKS + #endif #endif /* old FIPS has only AES_BLOCK_SIZE. */