diff --git a/.github/workflows/hol_light.yml b/.github/workflows/hol_light.yml index eb01bc5e9..e1e9a7f61 100644 --- a/.github/workflows/hol_light.yml +++ b/.github/workflows/hol_light.yml @@ -9,6 +9,7 @@ on: branches: ["main"] paths: - '.github/workflows/hol_light.yml' + - 'proofs/hol_light/common/**/*.ml' - 'proofs/hol_light/aarch64/Makefile' - 'proofs/hol_light/aarch64/**/*.S' - 'proofs/hol_light/aarch64/**/*.ml' @@ -23,6 +24,7 @@ on: branches: ["main"] paths: - '.github/workflows/hol_light.yml' + - 'proofs/hol_light/common/**/*.ml' - 'proofs/hol_light/aarch64/Makefile' - 'proofs/hol_light/aarch64/**/*.S' - 'proofs/hol_light/aarch64/**/*.ml' @@ -79,8 +81,12 @@ jobs: matrix: proof: # Dependencies on {name}.{S,ml} are implicit + - name: mldsa_ntt + needs: ["mldsa_specs.ml", "mldsa_zetas.ml", "aarch64_utils.ml", "subroutine_signatures.ml"] - name: mldsa_poly_caddq needs: ["aarch64_utils.ml"] + - name: mldsa_rej_uniform + needs: ["mldsa_specs.ml", "aarch64_utils.ml", "mldsa_rej_uniform_table.ml"] name: AArch64 HOL Light proof for ${{ matrix.proof.name }}.S runs-on: pqcp-arm64 if: github.repository_owner == 'pq-code-package' && !github.event.pull_request.head.repo.fork @@ -106,8 +112,8 @@ jobs: done done - # Always re-run upon change to nix files for HOL-Light - if [[ "$changed_files" == *"nix/"* ]] || [[ "$changed_files" == *"hol_light.yml"* ]] || [[ "$changed_files" == *"flake"* ]] || [[ "$changed_files" == *"proofs/hol_light/aarch64/Makefile"* ]]; then + # Always re-run upon change to nix files, common specs, or HOL-Light config + if [[ "$changed_files" == *"nix/"* ]] || [[ "$changed_files" == *"hol_light.yml"* ]] || [[ "$changed_files" == *"flake"* ]] || [[ "$changed_files" == *"proofs/hol_light/aarch64/Makefile"* ]] || [[ "$changed_files" == *"proofs/hol_light/common/"* ]]; then run_needed=1 fi @@ -153,7 +159,7 @@ jobs: # TODO: Enable when we have a cheaper proof # make -C proofs/hol_light/x86_64 mldsa/mldsa_ntt.o # echo 'needs "x86_64/proofs/mldsa_ntt.ml";;' | hol.sh - echo 'needs "x86/proofs/base.ml";; needs "x86_64/proofs/mldsa_specs.ml";; #quit;;' | hol.sh + echo 'needs "x86/proofs/base.ml";; needs "common/mldsa_specs.ml";; #quit;;' | hol.sh hol_light_proofs_x86_64: needs: [ hol_light_bytecode_x86_64 ] strategy: @@ -190,8 +196,8 @@ jobs: done done - # Always re-run upon change to nix files for HOL-Light - if [[ "$changed_files" == *"nix/"* ]] || [[ "$changed_files" == *"hol_light.yml"* ]] || [[ "$changed_files" == *"flake"* ]] || [[ "$changed_files" == *"proofs/hol_light/x86_64/Makefile"* ]]; then + # Always re-run upon change to nix files, common specs, or HOL-Light config + if [[ "$changed_files" == *"nix/"* ]] || [[ "$changed_files" == *"hol_light.yml"* ]] || [[ "$changed_files" == *"flake"* ]] || [[ "$changed_files" == *"proofs/hol_light/x86_64/Makefile"* ]] || [[ "$changed_files" == *"proofs/hol_light/common/"* ]]; then run_needed=1 fi diff --git a/dev/aarch64_clean/src/arith_native_aarch64.h b/dev/aarch64_clean/src/arith_native_aarch64.h index f78a1487d..49bbe5533 100644 --- a/dev/aarch64_clean/src/arith_native_aarch64.h +++ b/dev/aarch64_clean/src/arith_native_aarch64.h @@ -50,7 +50,20 @@ extern const uint8_t mld_polyz_unpack_19_indices[]; #define MLD_AARCH64_REJ_UNIFORM_ETA4_BUFLEN (2 * 136) #define mld_ntt_asm MLD_NAMESPACE(ntt_asm) -void mld_ntt_asm(int32_t *, const int32_t *, const int32_t *); +void mld_ntt_asm(int32_t *r, const int32_t *zetas_l123456, + const int32_t *zetas_l78) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/aarch64/proofs/mldsa_ntt.ml */ +__contract__( + requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N)) + requires(array_abs_bound(r, 0, MLDSA_N, 8380417)) + requires(zetas_l123456 == mld_aarch64_ntt_zetas_layer123456) + requires(zetas_l78 == mld_aarch64_ntt_zetas_layer78) + assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N)) + /* check-magic: off */ + ensures(array_abs_bound(r, 0, MLDSA_N, 75423753)) + /* check-magic: on */ +); #define mld_intt_asm MLD_NAMESPACE(intt_asm) void mld_intt_asm(int32_t *, const int32_t *, const int32_t *); diff --git a/dev/aarch64_opt/src/arith_native_aarch64.h b/dev/aarch64_opt/src/arith_native_aarch64.h index f78a1487d..49bbe5533 100644 --- a/dev/aarch64_opt/src/arith_native_aarch64.h +++ b/dev/aarch64_opt/src/arith_native_aarch64.h @@ -50,7 +50,20 @@ extern const uint8_t mld_polyz_unpack_19_indices[]; #define MLD_AARCH64_REJ_UNIFORM_ETA4_BUFLEN (2 * 136) #define mld_ntt_asm MLD_NAMESPACE(ntt_asm) -void mld_ntt_asm(int32_t *, const int32_t *, const int32_t *); +void mld_ntt_asm(int32_t *r, const int32_t *zetas_l123456, + const int32_t *zetas_l78) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/aarch64/proofs/mldsa_ntt.ml */ +__contract__( + requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N)) + requires(array_abs_bound(r, 0, MLDSA_N, 8380417)) + requires(zetas_l123456 == mld_aarch64_ntt_zetas_layer123456) + requires(zetas_l78 == mld_aarch64_ntt_zetas_layer78) + assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N)) + /* check-magic: off */ + ensures(array_abs_bound(r, 0, MLDSA_N, 75423753)) + /* check-magic: on */ +); #define mld_intt_asm MLD_NAMESPACE(intt_asm) void mld_intt_asm(int32_t *, const int32_t *, const int32_t *); diff --git a/mldsa/src/native/aarch64/src/arith_native_aarch64.h b/mldsa/src/native/aarch64/src/arith_native_aarch64.h index f78a1487d..49bbe5533 100644 --- a/mldsa/src/native/aarch64/src/arith_native_aarch64.h +++ b/mldsa/src/native/aarch64/src/arith_native_aarch64.h @@ -50,7 +50,20 @@ extern const uint8_t mld_polyz_unpack_19_indices[]; #define MLD_AARCH64_REJ_UNIFORM_ETA4_BUFLEN (2 * 136) #define mld_ntt_asm MLD_NAMESPACE(ntt_asm) -void mld_ntt_asm(int32_t *, const int32_t *, const int32_t *); +void mld_ntt_asm(int32_t *r, const int32_t *zetas_l123456, + const int32_t *zetas_l78) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/aarch64/proofs/mldsa_ntt.ml */ +__contract__( + requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N)) + requires(array_abs_bound(r, 0, MLDSA_N, 8380417)) + requires(zetas_l123456 == mld_aarch64_ntt_zetas_layer123456) + requires(zetas_l78 == mld_aarch64_ntt_zetas_layer78) + assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N)) + /* check-magic: off */ + ensures(array_abs_bound(r, 0, MLDSA_N, 75423753)) + /* check-magic: on */ +); #define mld_intt_asm MLD_NAMESPACE(intt_asm) void mld_intt_asm(int32_t *, const int32_t *, const int32_t *); diff --git a/proofs/cbmc/ntt_native_aarch64/Makefile b/proofs/cbmc/ntt_native_aarch64/Makefile new file mode 100644 index 000000000..3a9f1dbca --- /dev/null +++ b/proofs/cbmc/ntt_native_aarch64/Makefile @@ -0,0 +1,58 @@ +# Copyright (c) The mldsa-native project authors +# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + +include ../Makefile_params.common + +HARNESS_ENTRY = harness +HARNESS_FILE = ntt_native_aarch64_harness + +# This should be a unique identifier for this proof, and will appear on the +# Litani dashboard. It can be human-readable and contain spaces if you wish. +PROOF_UID = ntt_native_aarch64 + +# We need to set MLD_CHECK_APIS as otherwise mldsa/src/native/api.h won't be +# included, which contains the CBMC specifications. +DEFINES += -DMLD_CONFIG_USE_NATIVE_BACKEND_ARITH -DMLD_CONFIG_ARITH_BACKEND_FILE="\"$(SRCDIR)/mldsa/src/native/aarch64/meta.h\"" -DMLD_CHECK_APIS +INCLUDES += + +REMOVE_FUNCTION_BODY += +UNWINDSET += + +PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c +PROJECT_SOURCES += $(SRCDIR)/mldsa/src/poly.c $(SRCDIR)/mldsa/src/native/aarch64/src/aarch64_zetas.c + +CHECK_FUNCTION_CONTRACTS=mld_ntt_native +USE_FUNCTION_CONTRACTS=mld_ntt_asm +USE_FUNCTION_CONTRACTS+=mld_sys_check_capability +APPLY_LOOP_CONTRACTS=on +USE_DYNAMIC_FRAMES=1 + +# Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead +EXTERNAL_SAT_SOLVER= +CBMCFLAGS=--smt2 + +FUNCTION_NAME = ntt_native_aarch64 + +# If this proof is found to consume huge amounts of RAM, you can set the +# EXPENSIVE variable. With new enough versions of the proof tools, this will +# restrict the number of EXPENSIVE CBMC jobs running at once. See the +# documentation in Makefile.common under the "Job Pools" heading for details. +# EXPENSIVE = true + +# This function is large enough to need... +CBMC_OBJECT_BITS = 8 + +# If you require access to a file-local ("static") function or object to conduct +# your proof, set the following (and do not include the original source file +# ("mldsa/src/poly.c") in PROJECT_SOURCES). +# REWRITTEN_SOURCES = $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i +# include ../Makefile.common +# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_SOURCE = $(SRCDIR)/mldsa/src/poly.c +# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_FUNCTIONS = foo bar +# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_OBJECTS = baz +# Care is required with variables on the left-hand side: REWRITTEN_SOURCES must +# be set before including Makefile.common, but any use of variables on the +# left-hand side requires those variables to be defined. Hence, _SOURCE, +# _FUNCTIONS, _OBJECTS is set after including Makefile.common. + +include ../Makefile.common diff --git a/proofs/cbmc/ntt_native_aarch64/ntt_native_aarch64_harness.c b/proofs/cbmc/ntt_native_aarch64/ntt_native_aarch64_harness.c new file mode 100644 index 000000000..a985c3492 --- /dev/null +++ b/proofs/cbmc/ntt_native_aarch64/ntt_native_aarch64_harness.c @@ -0,0 +1,16 @@ +// Copyright (c) The mldsa-native project authors +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + +#include +#include "cbmc.h" +#include "params.h" + + +int mld_ntt_native(int32_t data[MLDSA_N]); + +void harness(void) +{ + int32_t *r; + int t; + t = mld_ntt_native(r); +} diff --git a/proofs/hol_light/aarch64/Makefile b/proofs/hol_light/aarch64/Makefile index 91c8bc96d..db8f3ea39 100644 --- a/proofs/hol_light/aarch64/Makefile +++ b/proofs/hol_light/aarch64/Makefile @@ -53,7 +53,9 @@ endif SPLIT=tr ';' '\n' -OBJ = mldsa/mldsa_poly_caddq.o +OBJ = mldsa/mldsa_ntt.o \ + mldsa/mldsa_poly_caddq.o \ + mldsa/mldsa_rej_uniform.o # According to # https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms, diff --git a/proofs/hol_light/aarch64/mldsa/mldsa_ntt.S b/proofs/hol_light/aarch64/mldsa/mldsa_ntt.S new file mode 100644 index 000000000..393258843 --- /dev/null +++ b/proofs/hol_light/aarch64/mldsa/mldsa_ntt.S @@ -0,0 +1,615 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + + +/* + * WARNING: This file is auto-derived from the mldsa-native source file + * dev/aarch64_opt/src/ntt.S using scripts/simpasm. Do not modify it directly. + */ + +#if defined(__ELF__) +.section .note.GNU-stack,"",@progbits +#endif + +.text +.balign 4 +#ifdef __APPLE__ +.global _PQCP_MLDSA_NATIVE_MLDSA44_ntt_asm +_PQCP_MLDSA_NATIVE_MLDSA44_ntt_asm: +#else +.global PQCP_MLDSA_NATIVE_MLDSA44_ntt_asm +PQCP_MLDSA_NATIVE_MLDSA44_ntt_asm: +#endif + + .cfi_startproc + sub sp, sp, #0x40 + stp d8, d9, [sp] + stp d10, d11, [sp, #0x10] + stp d12, d13, [sp, #0x20] + stp d14, d15, [sp, #0x30] + mov w5, #0xe001 // =57345 + movk w5, #0x7f, lsl #16 + dup v7.4s, w5 + mov x3, x0 + mov x4, #0x8 // =8 + ldr q0, [x1], #0x40 + ldur q1, [x1, #-0x30] + ldur q2, [x1, #-0x20] + ldur q3, [x1, #-0x10] + ldr q23, [x0, #0x390] + ldr q13, [x0, #0x380] + ldr q22, [x0, #0x80] + ldr q26, [x0, #0x190] + ldr q8, [x0, #0x280] + ldr q6, [x0, #0x210] + mul v10.4s, v13.4s, v0.s[0] + sqrdmulh v13.4s, v13.4s, v0.s[1] + mul v12.4s, v8.4s, v0.s[0] + sqrdmulh v27.4s, v8.4s, v0.s[1] + mul v4.4s, v6.4s, v0.s[0] + mls v10.4s, v13.4s, v7.s[0] + ldr q13, [x0, #0x180] + sqrdmulh v14.4s, v23.4s, v0.s[1] + mls v12.4s, v27.4s, v7.s[0] + add v31.4s, v13.4s, v10.4s + sub v13.4s, v13.4s, v10.4s + mul v10.4s, v23.4s, v0.s[0] + sqrdmulh v8.4s, v13.4s, v1.s[1] + sub v18.4s, v22.4s, v12.4s + mls v10.4s, v14.4s, v7.s[0] + mul v13.4s, v13.4s, v1.s[0] + mls v13.4s, v8.4s, v7.s[0] + sub v29.4s, v26.4s, v10.4s + add v25.4s, v26.4s, v10.4s + mul v10.4s, v31.4s, v0.s[2] + mul v14.4s, v25.4s, v0.s[2] + add v17.4s, v18.4s, v13.4s + sub v15.4s, v18.4s, v13.4s + sqrdmulh v13.4s, v31.4s, v0.s[3] + sqrdmulh v20.4s, v15.4s, v3.s[1] + sqrdmulh v5.4s, v17.4s, v2.s[3] + mls v10.4s, v13.4s, v7.s[0] + ldr q13, [x0, #0x300] + mul v18.4s, v17.4s, v2.s[2] + add v31.4s, v22.4s, v12.4s + mul v23.4s, v15.4s, v3.s[0] + ldr q17, [x0, #0x90] + add v19.4s, v31.4s, v10.4s + sub v16.4s, v31.4s, v10.4s + mul v10.4s, v13.4s, v0.s[0] + sqrdmulh v13.4s, v13.4s, v0.s[1] + sqrdmulh v27.4s, v16.4s, v2.s[1] + mul v11.4s, v16.4s, v2.s[0] + mls v10.4s, v13.4s, v7.s[0] + ldr q13, [x0, #0x290] + ldr q22, [x0, #0x100] + mls v11.4s, v27.4s, v7.s[0] + sqrdmulh v15.4s, v13.4s, v0.s[1] + sub v12.4s, v22.4s, v10.4s + add v30.4s, v22.4s, v10.4s + mul v10.4s, v13.4s, v0.s[0] + ldr q28, [x0] + sqrdmulh v13.4s, v25.4s, v0.s[3] + sqrdmulh v27.4s, v30.4s, v0.s[3] + mls v10.4s, v15.4s, v7.s[0] + mls v14.4s, v13.4s, v7.s[0] + ldr q13, [x0, #0x200] + sqrdmulh v25.4s, v12.4s, v1.s[1] + add v24.4s, v17.4s, v10.4s + sub v21.4s, v17.4s, v10.4s + sqrdmulh v8.4s, v13.4s, v0.s[1] + sub v9.4s, v24.4s, v14.4s + mul v26.4s, v12.4s, v1.s[0] + mul v13.4s, v13.4s, v0.s[0] + mls v13.4s, v8.4s, v7.s[0] + mul v8.4s, v30.4s, v0.s[2] + mls v8.4s, v27.4s, v7.s[0] + add v16.4s, v28.4s, v13.4s + sub v10.4s, v28.4s, v13.4s + mls v26.4s, v25.4s, v7.s[0] + sqrdmulh v12.4s, v19.4s, v1.s[3] + sub v25.4s, v16.4s, v8.4s + mls v23.4s, v20.4s, v7.s[0] + sub v22.4s, v25.4s, v11.4s + sqrdmulh v20.4s, v9.4s, v2.s[1] + sub v15.4s, v10.4s, v26.4s + sub x4, x4, #0x2 + +Lntt_layer123_start: + add v31.4s, v10.4s, v26.4s + mul v17.4s, v19.4s, v1.s[2] + add v26.4s, v15.4s, v23.4s + ldr q30, [x0, #0x2a0] + sub v13.4s, v15.4s, v23.4s + mul v23.4s, v29.4s, v1.s[0] + add v25.4s, v25.4s, v11.4s + str q22, [x0, #0x180] + mul v11.4s, v9.4s, v2.s[0] + str q13, [x0, #0x380] + ldr q28, [x0, #0x10] + add v10.4s, v16.4s, v8.4s + mls v17.4s, v12.4s, v7.s[0] + ldr q13, [x0, #0x3a0] + str q26, [x0, #0x300] + sqrdmulh v27.4s, v30.4s, v0.s[1] + mls v18.4s, v5.4s, v7.s[0] + ldr q9, [x0, #0x1a0] + sub v16.4s, v10.4s, v17.4s + add v15.4s, v10.4s, v17.4s + sqrdmulh v10.4s, v6.4s, v0.s[1] + str q16, [x0, #0x80] + str q15, [x0], #0x10 + sqrdmulh v19.4s, v13.4s, v0.s[1] + sub v15.4s, v31.4s, v18.4s + mul v8.4s, v13.4s, v0.s[0] + add v26.4s, v31.4s, v18.4s + str q15, [x0, #0x270] + sqrdmulh v13.4s, v29.4s, v1.s[1] + str q26, [x0, #0x1f0] + mls v8.4s, v19.4s, v7.s[0] + mls v11.4s, v20.4s, v7.s[0] + mls v23.4s, v13.4s, v7.s[0] + add v22.4s, v9.4s, v8.4s + ldr q6, [x0, #0x210] + sub v29.4s, v9.4s, v8.4s + mul v17.4s, v30.4s, v0.s[0] + ldr q9, [x0, #0x300] + sqrdmulh v13.4s, v22.4s, v0.s[3] + add v18.4s, v21.4s, v23.4s + mls v4.4s, v10.4s, v7.s[0] + sub v31.4s, v21.4s, v23.4s + sqrdmulh v16.4s, v31.4s, v3.s[1] + add v19.4s, v24.4s, v14.4s + mul v14.4s, v22.4s, v0.s[2] + sub v10.4s, v28.4s, v4.4s + mls v14.4s, v13.4s, v7.s[0] + ldr q13, [x0, #0x100] + sqrdmulh v22.4s, v9.4s, v0.s[1] + mul v8.4s, v9.4s, v0.s[0] + mul v23.4s, v31.4s, v3.s[0] + mls v8.4s, v22.4s, v7.s[0] + mls v23.4s, v16.4s, v7.s[0] + add v16.4s, v28.4s, v4.4s + ldr q22, [x0, #0x90] + mul v4.4s, v6.4s, v0.s[0] + mls v17.4s, v27.4s, v7.s[0] + add v21.4s, v13.4s, v8.4s + sub v27.4s, v13.4s, v8.4s + sqrdmulh v31.4s, v21.4s, v0.s[3] + str q25, [x0, #0xf0] + mul v8.4s, v21.4s, v0.s[2] + add v24.4s, v22.4s, v17.4s + sub v21.4s, v22.4s, v17.4s + sqrdmulh v5.4s, v18.4s, v2.s[3] + mls v8.4s, v31.4s, v7.s[0] + sub v9.4s, v24.4s, v14.4s + sqrdmulh v20.4s, v27.4s, v1.s[1] + mul v26.4s, v27.4s, v1.s[0] + sub v25.4s, v16.4s, v8.4s + mul v18.4s, v18.4s, v2.s[2] + sub v22.4s, v25.4s, v11.4s + mls v26.4s, v20.4s, v7.s[0] + sqrdmulh v20.4s, v9.4s, v2.s[1] + sqrdmulh v12.4s, v19.4s, v1.s[3] + sub v15.4s, v10.4s, v26.4s + subs x4, x4, #0x1 + cbnz x4, Lntt_layer123_start + add v13.4s, v10.4s, v26.4s + mls v18.4s, v5.4s, v7.s[0] + str q22, [x0, #0x180] + add v27.4s, v16.4s, v8.4s + mul v22.4s, v19.4s, v1.s[2] + add v26.4s, v24.4s, v14.4s + ldr q31, [x0, #0x110] + sub v14.4s, v15.4s, v23.4s + add v17.4s, v15.4s, v23.4s + mls v22.4s, v12.4s, v7.s[0] + add v28.4s, v13.4s, v18.4s + str q14, [x0, #0x380] + sqrdmulh v24.4s, v6.4s, v0.s[1] + add v5.4s, v25.4s, v11.4s + sub v19.4s, v13.4s, v18.4s + str q17, [x0, #0x300] + str q5, [x0, #0x100] + mul v16.4s, v9.4s, v2.s[0] + ldr q18, [x0, #0x310] + str q19, [x0, #0x280] + mls v16.4s, v20.4s, v7.s[0] + str q28, [x0, #0x200] + add v13.4s, v27.4s, v22.4s + ldr q15, [x0, #0x10] + sub v10.4s, v27.4s, v22.4s + mls v4.4s, v24.4s, v7.s[0] + str q13, [x0], #0x10 + str q10, [x0, #0x70] + sqrdmulh v12.4s, v29.4s, v1.s[1] + mul v23.4s, v29.4s, v1.s[0] + mul v8.4s, v26.4s, v1.s[2] + add v20.4s, v15.4s, v4.4s + sub v6.4s, v15.4s, v4.4s + mls v23.4s, v12.4s, v7.s[0] + sqrdmulh v22.4s, v18.4s, v0.s[1] + mul v5.4s, v18.4s, v0.s[0] + sub v28.4s, v21.4s, v23.4s + sqrdmulh v10.4s, v26.4s, v1.s[3] + mls v5.4s, v22.4s, v7.s[0] + sqrdmulh v30.4s, v28.4s, v3.s[1] + add v4.4s, v21.4s, v23.4s + mls v8.4s, v10.4s, v7.s[0] + add v12.4s, v31.4s, v5.4s + sub v9.4s, v31.4s, v5.4s + sqrdmulh v25.4s, v4.4s, v2.s[3] + sqrdmulh v15.4s, v9.4s, v1.s[1] + sqrdmulh v31.4s, v12.4s, v0.s[3] + mul v18.4s, v12.4s, v0.s[2] + mul v11.4s, v9.4s, v1.s[0] + mls v18.4s, v31.4s, v7.s[0] + mul v29.4s, v4.4s, v2.s[2] + mls v29.4s, v25.4s, v7.s[0] + add v23.4s, v20.4s, v18.4s + mls v11.4s, v15.4s, v7.s[0] + sub v31.4s, v20.4s, v18.4s + add v17.4s, v23.4s, v8.4s + add v5.4s, v31.4s, v16.4s + mul v24.4s, v28.4s, v3.s[0] + str q17, [x0], #0x10 + sub v19.4s, v31.4s, v16.4s + mls v24.4s, v30.4s, v7.s[0] + str q5, [x0, #0xf0] + add v31.4s, v6.4s, v11.4s + sub v26.4s, v23.4s, v8.4s + str q19, [x0, #0x170] + add v4.4s, v31.4s, v29.4s + sub v13.4s, v6.4s, v11.4s + str q26, [x0, #0x70] + sub v11.4s, v31.4s, v29.4s + sub v22.4s, v13.4s, v24.4s + add v23.4s, v13.4s, v24.4s + str q4, [x0, #0x1f0] + str q11, [x0, #0x270] + str q23, [x0, #0x2f0] + str q22, [x0, #0x370] + mov x0, x3 + mov x4, #0x8 // =8 + ldr q9, [x0, #0x40] + ldr q23, [x1], #0x40 + ldr q21, [x2, #0x60] + ldr q1, [x0, #0x20] + ldur q14, [x1, #-0x30] + ldr q13, [x0] + ldr q11, [x2, #0x50] + sqrdmulh v16.4s, v9.4s, v23.s[1] + ldr q17, [x0, #0x50] + mul v15.4s, v9.4s, v23.s[0] + ldr q30, [x0, #0x70] + ldr q27, [x0, #0x60] + ldr q8, [x2, #0x30] + sqrdmulh v12.4s, v17.4s, v23.s[1] + ldr q6, [x0, #0x30] + mls v15.4s, v16.4s, v7.s[0] + sqrdmulh v18.4s, v27.4s, v23.s[1] + sqrdmulh v19.4s, v30.4s, v23.s[1] + add v5.4s, v13.4s, v15.4s + mul v25.4s, v27.4s, v23.s[0] + sub v26.4s, v13.4s, v15.4s + mls v25.4s, v18.4s, v7.s[0] + mul v10.4s, v17.4s, v23.s[0] + mls v10.4s, v12.4s, v7.s[0] + mul v4.4s, v30.4s, v23.s[0] + sub v22.4s, v1.4s, v25.4s + mls v4.4s, v19.4s, v7.s[0] + add v28.4s, v1.4s, v25.4s + sqrdmulh v19.4s, v28.4s, v23.s[3] + sqrdmulh v9.4s, v22.4s, v14.s[1] + add v2.4s, v6.4s, v4.4s + mul v0.4s, v28.4s, v23.s[2] + sqrdmulh v27.4s, v2.4s, v23.s[3] + sub v17.4s, v6.4s, v4.4s + mul v3.4s, v2.4s, v23.s[2] + sqrdmulh v20.4s, v17.4s, v14.s[1] + ldr q1, [x0, #0x10] + mls v3.4s, v27.4s, v7.s[0] + mls v0.4s, v19.4s, v7.s[0] + ldur q16, [x1, #-0x20] + add v31.4s, v1.4s, v10.4s + mul v30.4s, v17.4s, v14.s[0] + mls v30.4s, v20.4s, v7.s[0] + add v27.4s, v31.4s, v3.4s + sub v23.4s, v1.4s, v10.4s + sub v24.4s, v31.4s, v3.4s + sqrdmulh v4.4s, v27.4s, v14.s[3] + sqrdmulh v10.4s, v24.4s, v16.s[1] + mul v18.4s, v24.4s, v16.s[0] + add v15.4s, v23.4s, v30.4s + sub v23.4s, v23.4s, v30.4s + mul v29.4s, v27.4s, v14.s[2] + sub v2.4s, v5.4s, v0.4s + add v12.4s, v5.4s, v0.4s + mls v18.4s, v10.4s, v7.s[0] + ldur q3, [x1, #-0x10] + mls v29.4s, v4.4s, v7.s[0] + mul v4.4s, v22.4s, v14.s[0] + add v1.4s, v2.4s, v18.4s + sub v24.4s, v2.4s, v18.4s + mls v4.4s, v9.4s, v7.s[0] + ldr q20, [x2, #0x10] + add v25.4s, v12.4s, v29.4s + mul v9.4s, v23.4s, v3.s[0] + sub v5.4s, v12.4s, v29.4s + sqrdmulh v31.4s, v23.4s, v3.s[1] + trn2 v6.4s, v1.4s, v24.4s + trn2 v10.4s, v25.4s, v5.4s + sqrdmulh v13.4s, v15.4s, v16.s[3] + trn2 v30.2d, v10.2d, v6.2d + ldr q3, [x2], #0xc0 + mul v12.4s, v15.4s, v16.s[2] + trn1 v27.2d, v10.2d, v6.2d + mls v9.4s, v31.4s, v7.s[0] + trn1 v22.4s, v25.4s, v5.4s + sub v6.4s, v26.4s, v4.4s + mls v12.4s, v13.4s, v7.s[0] + trn1 v1.4s, v1.4s, v24.4s + add v13.4s, v26.4s, v4.4s + trn2 v10.2d, v22.2d, v1.2d + mul v28.4s, v30.4s, v3.4s + sub v31.4s, v6.4s, v9.4s + sub x4, x4, #0x1 + +Lntt_layer45678_start: + add v2.4s, v13.4s, v12.4s + sqrdmulh v5.4s, v30.4s, v20.4s + sub v25.4s, v13.4s, v12.4s + add v17.4s, v6.4s, v9.4s + mul v19.4s, v10.4s, v3.4s + trn2 v4.4s, v2.4s, v25.4s + ldur q24, [x2, #-0x50] + trn2 v29.4s, v17.4s, v31.4s + sqrdmulh v15.4s, v10.4s, v20.4s + mls v28.4s, v5.4s, v7.s[0] + trn2 v3.2d, v4.2d, v29.2d + sqrdmulh v12.4s, v3.4s, v24.4s + mul v16.4s, v3.4s, v21.4s + mls v19.4s, v15.4s, v7.s[0] + ldur q10, [x2, #-0xa0] + add v13.4s, v27.4s, v28.4s + mls v16.4s, v12.4s, v7.s[0] + sqrdmulh v9.4s, v13.4s, v8.4s + sub v30.4s, v27.4s, v28.4s + ldr q18, [x1], #0x40 + mul v8.4s, v13.4s, v10.4s + ldr q10, [x0, #0xd0] + sqrdmulh v14.4s, v30.4s, v11.4s + ldr q23, [x0, #0xe0] + sqrdmulh v13.4s, v10.4s, v18.s[1] + sqrdmulh v12.4s, v23.4s, v18.s[1] + ldur q6, [x2, #-0x80] + mul v3.4s, v10.4s, v18.s[0] + mls v3.4s, v13.4s, v7.s[0] + ldr q13, [x0, #0xf0] + trn1 v27.4s, v2.4s, v25.4s + mul v2.4s, v30.4s, v6.4s + trn1 v20.4s, v17.4s, v31.4s + trn1 v25.2d, v4.2d, v29.2d + sqrdmulh v10.4s, v13.4s, v18.s[1] + trn2 v5.2d, v27.2d, v20.2d + ldur q6, [x2, #-0x10] + mls v8.4s, v9.4s, v7.s[0] + sub v15.4s, v25.4s, v16.4s + sqrdmulh v31.4s, v5.4s, v24.4s + sqrdmulh v30.4s, v15.4s, v6.4s + ldur q9, [x2, #-0x30] + mul v4.4s, v5.4s, v21.4s + ldur q21, [x2, #-0x40] + ldur q6, [x2, #-0x20] + add v5.4s, v25.4s, v16.4s + mls v4.4s, v31.4s, v7.s[0] + mul v0.4s, v5.4s, v21.4s + mul v17.4s, v13.4s, v18.s[0] + mls v17.4s, v10.4s, v7.s[0] + ldr q28, [x0, #0xb0] + sqrdmulh v26.4s, v5.4s, v9.4s + mul v9.4s, v15.4s, v6.4s + trn1 v6.2d, v22.2d, v1.2d + mls v9.4s, v30.4s, v7.s[0] + add v25.4s, v28.4s, v17.4s + mls v2.4s, v14.4s, v7.s[0] + trn1 v5.2d, v27.2d, v20.2d + ldr q20, [x2, #0x10] + mul v29.4s, v25.4s, v18.s[2] + add v15.4s, v6.4s, v19.4s + ldr q30, [x0, #0xc0] + sub v19.4s, v6.4s, v19.4s + add v31.4s, v15.4s, v8.4s + mls v0.4s, v26.4s, v7.s[0] + ldur q14, [x1, #-0x30] + add v21.4s, v19.4s, v2.4s + sub v24.4s, v19.4s, v2.4s + sqrdmulh v27.4s, v25.4s, v18.s[3] + sub v26.4s, v15.4s, v8.4s + ldr q2, [x0, #0x90] + mul v16.4s, v30.4s, v18.s[0] + sub v25.4s, v28.4s, v17.4s + trn1 v11.4s, v31.4s, v26.4s + ldr q1, [x0, #0xa0] + trn1 v6.4s, v21.4s, v24.4s + sqrdmulh v13.4s, v25.4s, v14.s[1] + add v8.4s, v2.4s, v3.4s + trn2 v28.2d, v11.2d, v6.2d + sqrdmulh v19.4s, v30.4s, v18.s[1] + sub v10.4s, v5.4s, v4.4s + ldur q22, [x1, #-0x20] + str q28, [x0, #0x20] + mls v29.4s, v27.4s, v7.s[0] + add v15.4s, v10.4s, v9.4s + mul v25.4s, v25.4s, v14.s[0] + ldur q27, [x1, #-0x10] + trn2 v17.4s, v31.4s, v26.4s + trn2 v21.4s, v21.4s, v24.4s + mls v16.4s, v19.4s, v7.s[0] + sub v24.4s, v8.4s, v29.4s + sub v10.4s, v10.4s, v9.4s + mls v25.4s, v13.4s, v7.s[0] + trn1 v13.2d, v11.2d, v6.2d + ldr q28, [x0, #0x80] + sqrdmulh v30.4s, v24.4s, v22.s[1] + trn2 v19.2d, v17.2d, v21.2d + trn1 v6.2d, v17.2d, v21.2d + mul v31.4s, v23.4s, v18.s[0] + str q13, [x0], #0x80 + stur q6, [x0, #-0x70] + stur q19, [x0, #-0x50] + ldr q11, [x2, #0x50] + mls v31.4s, v12.4s, v7.s[0] + ldr q21, [x2, #0x60] + trn1 v9.4s, v15.4s, v10.4s + trn2 v6.4s, v15.4s, v10.4s + mul v24.4s, v24.4s, v22.s[0] + sub v10.4s, v2.4s, v3.4s + ldr q3, [x2], #0xc0 + mls v24.4s, v30.4s, v7.s[0] + add v26.4s, v8.4s, v29.4s + ldur q8, [x2, #-0x90] + add v17.4s, v5.4s, v4.4s + sqrdmulh v2.4s, v26.4s, v14.s[3] + sub v13.4s, v1.4s, v31.4s + add v30.4s, v1.4s, v31.4s + add v15.4s, v10.4s, v25.4s + sqrdmulh v19.4s, v13.4s, v14.s[1] + sub v25.4s, v10.4s, v25.4s + mul v29.4s, v13.4s, v14.s[0] + sub v5.4s, v28.4s, v16.4s + sqrdmulh v4.4s, v30.4s, v18.s[3] + sub v23.4s, v17.4s, v0.4s + add v31.4s, v17.4s, v0.4s + mul v18.4s, v30.4s, v18.s[2] + add v1.4s, v28.4s, v16.4s + trn2 v12.4s, v31.4s, v23.4s + mls v29.4s, v19.4s, v7.s[0] + trn1 v13.4s, v31.4s, v23.4s + trn2 v30.2d, v12.2d, v6.2d + mls v18.4s, v4.4s, v7.s[0] + trn2 v10.2d, v13.2d, v9.2d + trn1 v31.2d, v13.2d, v9.2d + mul v19.4s, v26.4s, v14.s[2] + trn1 v12.2d, v12.2d, v6.2d + sub v6.4s, v5.4s, v29.4s + mls v19.4s, v2.4s, v7.s[0] + add v13.4s, v5.4s, v29.4s + stur q10, [x0, #-0x20] + sub v10.4s, v1.4s, v18.4s + add v28.4s, v1.4s, v18.4s + sqrdmulh v5.4s, v25.4s, v27.s[1] + stur q31, [x0, #-0x40] + add v26.4s, v10.4s, v24.4s + sub v31.4s, v10.4s, v24.4s + mul v9.4s, v25.4s, v27.s[0] + stur q12, [x0, #-0x30] + sub v24.4s, v28.4s, v19.4s + sqrdmulh v10.4s, v15.4s, v22.s[3] + trn1 v1.4s, v26.4s, v31.4s + stur q30, [x0, #-0x10] + add v30.4s, v28.4s, v19.4s + mls v9.4s, v5.4s, v7.s[0] + trn2 v25.4s, v26.4s, v31.4s + trn2 v14.4s, v30.4s, v24.4s + mul v12.4s, v15.4s, v22.s[2] + trn1 v22.4s, v30.4s, v24.4s + trn1 v27.2d, v14.2d, v25.2d + mls v12.4s, v10.4s, v7.s[0] + trn2 v30.2d, v14.2d, v25.2d + sub v31.4s, v6.4s, v9.4s + trn2 v10.2d, v22.2d, v1.2d + mul v28.4s, v30.4s, v3.4s + subs x4, x4, #0x1 + cbnz x4, Lntt_layer45678_start + add v9.4s, v6.4s, v9.4s + sqrdmulh v6.4s, v30.4s, v20.4s + ldur q24, [x2, #-0xa0] + add v25.4s, v13.4s, v12.4s + sub v15.4s, v13.4s, v12.4s + mul v19.4s, v10.4s, v3.4s + trn2 v5.4s, v9.4s, v31.4s + sqrdmulh v3.4s, v10.4s, v20.4s + trn2 v10.4s, v25.4s, v15.4s + mls v28.4s, v6.4s, v7.s[0] + trn2 v13.2d, v10.2d, v5.2d + ldur q30, [x2, #-0x50] + mul v12.4s, v13.4s, v21.4s + mls v19.4s, v3.4s, v7.s[0] + add v20.4s, v27.4s, v28.4s + sqrdmulh v13.4s, v13.4s, v30.4s + sub v3.4s, v27.4s, v28.4s + mul v24.4s, v20.4s, v24.4s + sqrdmulh v6.4s, v3.4s, v11.4s + ldur q27, [x2, #-0x80] + mls v12.4s, v13.4s, v7.s[0] + trn1 v25.4s, v25.4s, v15.4s + mul v27.4s, v3.4s, v27.4s + trn1 v31.4s, v9.4s, v31.4s + trn1 v3.2d, v10.2d, v5.2d + ldur q13, [x2, #-0x30] + ldur q15, [x2, #-0x40] + sqrdmulh v9.4s, v20.4s, v8.4s + trn2 v20.2d, v25.2d, v31.2d + ldur q10, [x2, #-0x10] + mls v27.4s, v6.4s, v7.s[0] + add v5.4s, v3.4s, v12.4s + sub v6.4s, v3.4s, v12.4s + sqrdmulh v3.4s, v20.4s, v30.4s + trn1 v12.2d, v22.2d, v1.2d + sqrdmulh v10.4s, v6.4s, v10.4s + mls v24.4s, v9.4s, v7.s[0] + sub v9.4s, v12.4s, v19.4s + trn1 v25.2d, v25.2d, v31.2d + sqrdmulh v31.4s, v5.4s, v13.4s + add v30.4s, v9.4s, v27.4s + add v13.4s, v12.4s, v19.4s + mul v1.4s, v20.4s, v21.4s + ldur q12, [x2, #-0x20] + add v21.4s, v13.4s, v24.4s + sub v13.4s, v13.4s, v24.4s + mls v1.4s, v3.4s, v7.s[0] + sub v3.4s, v9.4s, v27.4s + mul v9.4s, v6.4s, v12.4s + trn2 v12.4s, v21.4s, v13.4s + trn1 v6.4s, v30.4s, v3.4s + trn2 v30.4s, v30.4s, v3.4s + mls v9.4s, v10.4s, v7.s[0] + trn1 v13.4s, v21.4s, v13.4s + mul v15.4s, v5.4s, v15.4s + sub v3.4s, v25.4s, v1.4s + add v5.4s, v25.4s, v1.4s + mls v15.4s, v31.4s, v7.s[0] + trn1 v21.2d, v13.2d, v6.2d + trn2 v6.2d, v13.2d, v6.2d + add v10.4s, v3.4s, v9.4s + sub v13.4s, v3.4s, v9.4s + str q21, [x0], #0x80 + trn1 v3.2d, v12.2d, v30.2d + trn2 v31.2d, v12.2d, v30.2d + trn1 v21.4s, v10.4s, v13.4s + sub v30.4s, v5.4s, v15.4s + add v12.4s, v5.4s, v15.4s + stur q3, [x0, #-0x70] + trn2 v13.4s, v10.4s, v13.4s + trn1 v19.4s, v12.4s, v30.4s + trn2 v12.4s, v12.4s, v30.4s + stur q6, [x0, #-0x60] + stur q31, [x0, #-0x50] + trn1 v10.2d, v19.2d, v21.2d + trn2 v3.2d, v19.2d, v21.2d + trn1 v21.2d, v12.2d, v13.2d + trn2 v13.2d, v12.2d, v13.2d + stur q10, [x0, #-0x40] + stur q3, [x0, #-0x20] + stur q13, [x0, #-0x10] + stur q21, [x0, #-0x30] + ldp d8, d9, [sp] + ldp d10, d11, [sp, #0x10] + ldp d12, d13, [sp, #0x20] + ldp d14, d15, [sp, #0x30] + add sp, sp, #0x40 + ret + .cfi_endproc diff --git a/proofs/hol_light/aarch64/mldsa/mldsa_rej_uniform.S b/proofs/hol_light/aarch64/mldsa/mldsa_rej_uniform.S new file mode 100644 index 000000000..794a1cde6 --- /dev/null +++ b/proofs/hol_light/aarch64/mldsa/mldsa_rej_uniform.S @@ -0,0 +1,187 @@ +/* + * Copyright (c) The mldsa-native project authors + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + + +/* + * WARNING: This file is auto-derived from the mldsa-native source file + * dev/aarch64_clean/src/rej_uniform_asm.S using scripts/simpasm. Do not modify it directly. + */ + +#if defined(__ELF__) +.section .note.GNU-stack,"",@progbits +#endif + +.text +.balign 4 +#ifdef __APPLE__ +.global _PQCP_MLDSA_NATIVE_MLDSA44_rej_uniform_asm +_PQCP_MLDSA_NATIVE_MLDSA44_rej_uniform_asm: +#else +.global PQCP_MLDSA_NATIVE_MLDSA44_rej_uniform_asm +PQCP_MLDSA_NATIVE_MLDSA44_rej_uniform_asm: +#endif + + .cfi_startproc + sub sp, sp, #0x440 + .cfi_adjust_cfa_offset 0x440 + mov x7, #0x1 // =1 + movk x7, #0x2, lsl #32 + mov v31.d[0], x7 + mov x7, #0x4 // =4 + movk x7, #0x8, lsl #32 + mov v31.d[1], x7 + mov w7, #0xe001 // =57345 + movk w7, #0x7f, lsl #16 + dup v30.4s, w7 + mov x8, sp + mov x7, x8 + mov x11, #0x0 // =0 + eor v16.16b, v16.16b, v16.16b + +Lrej_uniform_initial_zero: + str q16, [x7], #0x40 + stur q16, [x7, #-0x30] + stur q16, [x7, #-0x20] + stur q16, [x7, #-0x10] + add x11, x11, #0x10 + cmp x11, #0x100 + b.lt Lrej_uniform_initial_zero + mov x7, x8 + mov x9, #0x0 // =0 + mov x4, #0x100 // =256 + cmp x2, #0x30 + b.lo Lrej_uniform_loop48_end + +Lrej_uniform_loop48: + cmp x9, x4 + b.hs Lrej_uniform_memory_copy + sub x2, x2, #0x30 + ld3 { v0.16b, v1.16b, v2.16b }, [x1], #48 + movi v4.16b, #0x80 + bic v2.16b, v2.16b, v4.16b + zip1 v4.16b, v0.16b, v1.16b + zip2 v5.16b, v0.16b, v1.16b + ushll v6.8h, v2.8b, #0x0 + ushll2 v7.8h, v2.16b, #0x0 + zip1 v16.8h, v4.8h, v6.8h + zip2 v17.8h, v4.8h, v6.8h + zip1 v18.8h, v5.8h, v7.8h + zip2 v19.8h, v5.8h, v7.8h + cmhi v4.4s, v30.4s, v16.4s + cmhi v5.4s, v30.4s, v17.4s + cmhi v6.4s, v30.4s, v18.4s + cmhi v7.4s, v30.4s, v19.4s + and v4.16b, v4.16b, v31.16b + and v5.16b, v5.16b, v31.16b + and v6.16b, v6.16b, v31.16b + and v7.16b, v7.16b, v31.16b + uaddlv d20, v4.4s + uaddlv d21, v5.4s + uaddlv d22, v6.4s + uaddlv d23, v7.4s + fmov x12, d20 + fmov x13, d21 + fmov x14, d22 + fmov x15, d23 + ldr q24, [x3, x12, lsl #4] + ldr q25, [x3, x13, lsl #4] + ldr q26, [x3, x14, lsl #4] + ldr q27, [x3, x15, lsl #4] + cnt v4.16b, v4.16b + cnt v5.16b, v5.16b + cnt v6.16b, v6.16b + cnt v7.16b, v7.16b + uaddlv d20, v4.4s + uaddlv d21, v5.4s + uaddlv d22, v6.4s + uaddlv d23, v7.4s + fmov x12, d20 + fmov x13, d21 + fmov x14, d22 + fmov x15, d23 + tbl v16.16b, { v16.16b }, v24.16b + tbl v17.16b, { v17.16b }, v25.16b + tbl v18.16b, { v18.16b }, v26.16b + tbl v19.16b, { v19.16b }, v27.16b + st1 { v16.4s }, [x7] + add x7, x7, x12, lsl #2 + st1 { v17.4s }, [x7] + add x7, x7, x13, lsl #2 + st1 { v18.4s }, [x7] + add x7, x7, x14, lsl #2 + st1 { v19.4s }, [x7] + add x7, x7, x15, lsl #2 + add x12, x12, x13 + add x14, x14, x15 + add x9, x9, x12 + add x9, x9, x14 + cmp x2, #0x30 + b.hs Lrej_uniform_loop48 + +Lrej_uniform_loop48_end: + cmp x9, x4 + b.hs Lrej_uniform_memory_copy + cmp x2, #0x18 + b.lo Lrej_uniform_memory_copy + sub x2, x2, #0x18 + ld3 { v0.8b, v1.8b, v2.8b }, [x1], #24 + movi v4.16b, #0x80 + bic v2.16b, v2.16b, v4.16b + zip1 v4.16b, v0.16b, v1.16b + ushll v6.8h, v2.8b, #0x0 + zip1 v16.8h, v4.8h, v6.8h + zip2 v17.8h, v4.8h, v6.8h + cmhi v4.4s, v30.4s, v16.4s + cmhi v5.4s, v30.4s, v17.4s + and v4.16b, v4.16b, v31.16b + and v5.16b, v5.16b, v31.16b + uaddlv d20, v4.4s + uaddlv d21, v5.4s + fmov x12, d20 + fmov x13, d21 + ldr q24, [x3, x12, lsl #4] + ldr q25, [x3, x13, lsl #4] + cnt v4.16b, v4.16b + cnt v5.16b, v5.16b + uaddlv d20, v4.4s + uaddlv d21, v5.4s + fmov x12, d20 + fmov x13, d21 + tbl v16.16b, { v16.16b }, v24.16b + tbl v17.16b, { v17.16b }, v25.16b + str q16, [x7] + add x7, x7, x12, lsl #2 + str q17, [x7] + add x7, x7, x13, lsl #2 + add x9, x9, x12 + add x9, x9, x13 + +Lrej_uniform_memory_copy: + cmp x9, x4 + csel x9, x9, x4, lo + mov x11, #0x0 // =0 + mov x7, x8 + +Lrej_uniform_final_copy: + ldr q16, [x7], #0x40 + ldur q17, [x7, #-0x30] + ldur q18, [x7, #-0x20] + ldur q19, [x7, #-0x10] + str q16, [x0], #0x40 + stur q17, [x0, #-0x30] + stur q18, [x0, #-0x20] + stur q19, [x0, #-0x10] + add x11, x11, #0x10 + cmp x11, #0x100 + b.lt Lrej_uniform_final_copy + mov x0, x9 + b Lrej_uniform_return + +Lrej_uniform_return: + add sp, sp, #0x440 + .cfi_adjust_cfa_offset -0x440 + ret + .cfi_endproc diff --git a/proofs/hol_light/aarch64/proofs/aarch64_utils.ml b/proofs/hol_light/aarch64/proofs/aarch64_utils.ml index e63fa0409..1187b79aa 100644 --- a/proofs/hol_light/aarch64/proofs/aarch64_utils.ml +++ b/proofs/hol_light/aarch64/proofs/aarch64_utils.ml @@ -3,6 +3,17 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT *) +(* Merge 4 x bytes32 reads into bytes128 reads *) +let MEMORY_128_FROM_32_TAC = + let a_tm = `a:int64` and n_tm = `n:num` and i64_ty = `:int64` + and pat = `read (memory :> bytes128(word_add a (word n))) s0` in + fun v boff n -> + let pat' = subst[mk_var(v,i64_ty),a_tm] pat in + let f i = + let itm = mk_small_numeral(boff + 16*i) in + READ_MEMORY_MERGE_CONV 2 (subst[itm,n_tm] pat') in + MP_TAC(end_itlist CONJ (map f (0--(n-1))));; + (* Utility for executing until target PC is reached *) let MAP_UNTIL_TARGET_PC f n = fun (asl, w) -> let is_pc_condition = can (term_match [] `read PC some_state = some_value`) in diff --git a/proofs/hol_light/aarch64/proofs/dump_bytecode.ml b/proofs/hol_light/aarch64/proofs/dump_bytecode.ml index d64561e32..6d016ba9e 100644 --- a/proofs/hol_light/aarch64/proofs/dump_bytecode.ml +++ b/proofs/hol_light/aarch64/proofs/dump_bytecode.ml @@ -6,6 +6,14 @@ (* Load base theories for AArch64 from s2n-bignum *) needs "arm/proofs/base.ml";; +print_string "=== bytecode start: aarch64/mldsa/mldsa_ntt.o ===\n";; +print_literal_from_elf "aarch64/mldsa/mldsa_ntt.o";; +print_string "==== bytecode end =====================================\n\n";; + print_string "=== bytecode start: aarch64/mldsa/mldsa_poly_caddq.o ===\n";; print_literal_from_elf "aarch64/mldsa/mldsa_poly_caddq.o";; print_string "==== bytecode end =====================================\n\n";; + +print_string "=== bytecode start: aarch64/mldsa/mldsa_rej_uniform.o ===\n";; +print_literal_from_elf "aarch64/mldsa/mldsa_rej_uniform.o";; +print_string "==== bytecode end =====================================\n\n";; diff --git a/proofs/hol_light/aarch64/proofs/mldsa_ntt.ml b/proofs/hol_light/aarch64/proofs/mldsa_ntt.ml new file mode 100644 index 000000000..2bd6c66d2 --- /dev/null +++ b/proofs/hol_light/aarch64/proofs/mldsa_ntt.ml @@ -0,0 +1,818 @@ +(* + * Copyright (c) The mldsa-native project authors + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(* ========================================================================= *) +(* Forward number theoretic transform. *) +(* ========================================================================= *) + +needs "arm/proofs/base.ml";; +needs "common/mldsa_specs.ml";; +needs "aarch64/proofs/aarch64_utils.ml";; +needs "aarch64/proofs/mldsa_zetas.ml";; + +(**** print_literal_from_elf "aarch64/mldsa/mldsa_ntt.o";; + ****) + +let mldsa_ntt_mc = define_assert_from_elf + "mldsa_ntt_mc" "aarch64/mldsa/mldsa_ntt.o" +(*** BYTECODE START ***) +[ + 0xd10103ff; (* arm_SUB SP SP (rvalue (word 64)) *) + 0x6d0027e8; (* arm_STP D8 D9 SP (Immediate_Offset (iword (&0))) *) + 0x6d012fea; (* arm_STP D10 D11 SP (Immediate_Offset (iword (&16))) *) + 0x6d0237ec; (* arm_STP D12 D13 SP (Immediate_Offset (iword (&32))) *) + 0x6d033fee; (* arm_STP D14 D15 SP (Immediate_Offset (iword (&48))) *) + 0x529c0025; (* arm_MOV W5 (rvalue (word 57345)) *) + 0x72a00fe5; (* arm_MOVK W5 (word 127) 16 *) + 0x4e040ca7; (* arm_DUP_GEN Q7 X5 32 128 *) + 0xaa0003e3; (* arm_MOV X3 X0 *) + 0xd2800104; (* arm_MOV X4 (rvalue (word 8)) *) + 0x3cc40420; (* arm_LDR Q0 X1 (Postimmediate_Offset (word 64)) *) + 0x3cdd0021; (* arm_LDR Q1 X1 (Immediate_Offset (word 18446744073709551568)) *) + 0x3cde0022; (* arm_LDR Q2 X1 (Immediate_Offset (word 18446744073709551584)) *) + 0x3cdf0023; (* arm_LDR Q3 X1 (Immediate_Offset (word 18446744073709551600)) *) + 0x3dc0e417; (* arm_LDR Q23 X0 (Immediate_Offset (word 912)) *) + 0x3dc0e00d; (* arm_LDR Q13 X0 (Immediate_Offset (word 896)) *) + 0x3dc02016; (* arm_LDR Q22 X0 (Immediate_Offset (word 128)) *) + 0x3dc0641a; (* arm_LDR Q26 X0 (Immediate_Offset (word 400)) *) + 0x3dc0a008; (* arm_LDR Q8 X0 (Immediate_Offset (word 640)) *) + 0x3dc08406; (* arm_LDR Q6 X0 (Immediate_Offset (word 528)) *) + 0x4f8081aa; (* arm_MUL_VEC Q10 Q13 (Q0 :> LANE_S 0) 32 128 *) + 0x4fa0d1ad; (* arm_SQRDMULH_VEC Q13 Q13 (Q0 :> LANE_S 1) 32 128 *) + 0x4f80810c; (* arm_MUL_VEC Q12 Q8 (Q0 :> LANE_S 0) 32 128 *) + 0x4fa0d11b; (* arm_SQRDMULH_VEC Q27 Q8 (Q0 :> LANE_S 1) 32 128 *) + 0x4f8080c4; (* arm_MUL_VEC Q4 Q6 (Q0 :> LANE_S 0) 32 128 *) + 0x6f8741aa; (* arm_MLS_VEC Q10 Q13 (Q7 :> LANE_S 0) 32 128 *) + 0x3dc0600d; (* arm_LDR Q13 X0 (Immediate_Offset (word 384)) *) + 0x4fa0d2ee; (* arm_SQRDMULH_VEC Q14 Q23 (Q0 :> LANE_S 1) 32 128 *) + 0x6f87436c; (* arm_MLS_VEC Q12 Q27 (Q7 :> LANE_S 0) 32 128 *) + 0x4eaa85bf; (* arm_ADD_VEC Q31 Q13 Q10 32 128 *) + 0x6eaa85ad; (* arm_SUB_VEC Q13 Q13 Q10 32 128 *) + 0x4f8082ea; (* arm_MUL_VEC Q10 Q23 (Q0 :> LANE_S 0) 32 128 *) + 0x4fa1d1a8; (* arm_SQRDMULH_VEC Q8 Q13 (Q1 :> LANE_S 1) 32 128 *) + 0x6eac86d2; (* arm_SUB_VEC Q18 Q22 Q12 32 128 *) + 0x6f8741ca; (* arm_MLS_VEC Q10 Q14 (Q7 :> LANE_S 0) 32 128 *) + 0x4f8181ad; (* arm_MUL_VEC Q13 Q13 (Q1 :> LANE_S 0) 32 128 *) + 0x6f87410d; (* arm_MLS_VEC Q13 Q8 (Q7 :> LANE_S 0) 32 128 *) + 0x6eaa875d; (* arm_SUB_VEC Q29 Q26 Q10 32 128 *) + 0x4eaa8759; (* arm_ADD_VEC Q25 Q26 Q10 32 128 *) + 0x4f808bea; (* arm_MUL_VEC Q10 Q31 (Q0 :> LANE_S 2) 32 128 *) + 0x4f808b2e; (* arm_MUL_VEC Q14 Q25 (Q0 :> LANE_S 2) 32 128 *) + 0x4ead8651; (* arm_ADD_VEC Q17 Q18 Q13 32 128 *) + 0x6ead864f; (* arm_SUB_VEC Q15 Q18 Q13 32 128 *) + 0x4fa0dbed; (* arm_SQRDMULH_VEC Q13 Q31 (Q0 :> LANE_S 3) 32 128 *) + 0x4fa3d1f4; (* arm_SQRDMULH_VEC Q20 Q15 (Q3 :> LANE_S 1) 32 128 *) + 0x4fa2da25; (* arm_SQRDMULH_VEC Q5 Q17 (Q2 :> LANE_S 3) 32 128 *) + 0x6f8741aa; (* arm_MLS_VEC Q10 Q13 (Q7 :> LANE_S 0) 32 128 *) + 0x3dc0c00d; (* arm_LDR Q13 X0 (Immediate_Offset (word 768)) *) + 0x4f828a32; (* arm_MUL_VEC Q18 Q17 (Q2 :> LANE_S 2) 32 128 *) + 0x4eac86df; (* arm_ADD_VEC Q31 Q22 Q12 32 128 *) + 0x4f8381f7; (* arm_MUL_VEC Q23 Q15 (Q3 :> LANE_S 0) 32 128 *) + 0x3dc02411; (* arm_LDR Q17 X0 (Immediate_Offset (word 144)) *) + 0x4eaa87f3; (* arm_ADD_VEC Q19 Q31 Q10 32 128 *) + 0x6eaa87f0; (* arm_SUB_VEC Q16 Q31 Q10 32 128 *) + 0x4f8081aa; (* arm_MUL_VEC Q10 Q13 (Q0 :> LANE_S 0) 32 128 *) + 0x4fa0d1ad; (* arm_SQRDMULH_VEC Q13 Q13 (Q0 :> LANE_S 1) 32 128 *) + 0x4fa2d21b; (* arm_SQRDMULH_VEC Q27 Q16 (Q2 :> LANE_S 1) 32 128 *) + 0x4f82820b; (* arm_MUL_VEC Q11 Q16 (Q2 :> LANE_S 0) 32 128 *) + 0x6f8741aa; (* arm_MLS_VEC Q10 Q13 (Q7 :> LANE_S 0) 32 128 *) + 0x3dc0a40d; (* arm_LDR Q13 X0 (Immediate_Offset (word 656)) *) + 0x3dc04016; (* arm_LDR Q22 X0 (Immediate_Offset (word 256)) *) + 0x6f87436b; (* arm_MLS_VEC Q11 Q27 (Q7 :> LANE_S 0) 32 128 *) + 0x4fa0d1af; (* arm_SQRDMULH_VEC Q15 Q13 (Q0 :> LANE_S 1) 32 128 *) + 0x6eaa86cc; (* arm_SUB_VEC Q12 Q22 Q10 32 128 *) + 0x4eaa86de; (* arm_ADD_VEC Q30 Q22 Q10 32 128 *) + 0x4f8081aa; (* arm_MUL_VEC Q10 Q13 (Q0 :> LANE_S 0) 32 128 *) + 0x3dc0001c; (* arm_LDR Q28 X0 (Immediate_Offset (word 0)) *) + 0x4fa0db2d; (* arm_SQRDMULH_VEC Q13 Q25 (Q0 :> LANE_S 3) 32 128 *) + 0x4fa0dbdb; (* arm_SQRDMULH_VEC Q27 Q30 (Q0 :> LANE_S 3) 32 128 *) + 0x6f8741ea; (* arm_MLS_VEC Q10 Q15 (Q7 :> LANE_S 0) 32 128 *) + 0x6f8741ae; (* arm_MLS_VEC Q14 Q13 (Q7 :> LANE_S 0) 32 128 *) + 0x3dc0800d; (* arm_LDR Q13 X0 (Immediate_Offset (word 512)) *) + 0x4fa1d199; (* arm_SQRDMULH_VEC Q25 Q12 (Q1 :> LANE_S 1) 32 128 *) + 0x4eaa8638; (* arm_ADD_VEC Q24 Q17 Q10 32 128 *) + 0x6eaa8635; (* arm_SUB_VEC Q21 Q17 Q10 32 128 *) + 0x4fa0d1a8; (* arm_SQRDMULH_VEC Q8 Q13 (Q0 :> LANE_S 1) 32 128 *) + 0x6eae8709; (* arm_SUB_VEC Q9 Q24 Q14 32 128 *) + 0x4f81819a; (* arm_MUL_VEC Q26 Q12 (Q1 :> LANE_S 0) 32 128 *) + 0x4f8081ad; (* arm_MUL_VEC Q13 Q13 (Q0 :> LANE_S 0) 32 128 *) + 0x6f87410d; (* arm_MLS_VEC Q13 Q8 (Q7 :> LANE_S 0) 32 128 *) + 0x4f808bc8; (* arm_MUL_VEC Q8 Q30 (Q0 :> LANE_S 2) 32 128 *) + 0x6f874368; (* arm_MLS_VEC Q8 Q27 (Q7 :> LANE_S 0) 32 128 *) + 0x4ead8790; (* arm_ADD_VEC Q16 Q28 Q13 32 128 *) + 0x6ead878a; (* arm_SUB_VEC Q10 Q28 Q13 32 128 *) + 0x6f87433a; (* arm_MLS_VEC Q26 Q25 (Q7 :> LANE_S 0) 32 128 *) + 0x4fa1da6c; (* arm_SQRDMULH_VEC Q12 Q19 (Q1 :> LANE_S 3) 32 128 *) + 0x6ea88619; (* arm_SUB_VEC Q25 Q16 Q8 32 128 *) + 0x6f874297; (* arm_MLS_VEC Q23 Q20 (Q7 :> LANE_S 0) 32 128 *) + 0x6eab8736; (* arm_SUB_VEC Q22 Q25 Q11 32 128 *) + 0x4fa2d134; (* arm_SQRDMULH_VEC Q20 Q9 (Q2 :> LANE_S 1) 32 128 *) + 0x6eba854f; (* arm_SUB_VEC Q15 Q10 Q26 32 128 *) + 0xd1000884; (* arm_SUB X4 X4 (rvalue (word 2)) *) + 0x4eba855f; (* arm_ADD_VEC Q31 Q10 Q26 32 128 *) + 0x4f818a71; (* arm_MUL_VEC Q17 Q19 (Q1 :> LANE_S 2) 32 128 *) + 0x4eb785fa; (* arm_ADD_VEC Q26 Q15 Q23 32 128 *) + 0x3dc0a81e; (* arm_LDR Q30 X0 (Immediate_Offset (word 672)) *) + 0x6eb785ed; (* arm_SUB_VEC Q13 Q15 Q23 32 128 *) + 0x4f8183b7; (* arm_MUL_VEC Q23 Q29 (Q1 :> LANE_S 0) 32 128 *) + 0x4eab8739; (* arm_ADD_VEC Q25 Q25 Q11 32 128 *) + 0x3d806016; (* arm_STR Q22 X0 (Immediate_Offset (word 384)) *) + 0x4f82812b; (* arm_MUL_VEC Q11 Q9 (Q2 :> LANE_S 0) 32 128 *) + 0x3d80e00d; (* arm_STR Q13 X0 (Immediate_Offset (word 896)) *) + 0x3dc0041c; (* arm_LDR Q28 X0 (Immediate_Offset (word 16)) *) + 0x4ea8860a; (* arm_ADD_VEC Q10 Q16 Q8 32 128 *) + 0x6f874191; (* arm_MLS_VEC Q17 Q12 (Q7 :> LANE_S 0) 32 128 *) + 0x3dc0e80d; (* arm_LDR Q13 X0 (Immediate_Offset (word 928)) *) + 0x3d80c01a; (* arm_STR Q26 X0 (Immediate_Offset (word 768)) *) + 0x4fa0d3db; (* arm_SQRDMULH_VEC Q27 Q30 (Q0 :> LANE_S 1) 32 128 *) + 0x6f8740b2; (* arm_MLS_VEC Q18 Q5 (Q7 :> LANE_S 0) 32 128 *) + 0x3dc06809; (* arm_LDR Q9 X0 (Immediate_Offset (word 416)) *) + 0x6eb18550; (* arm_SUB_VEC Q16 Q10 Q17 32 128 *) + 0x4eb1854f; (* arm_ADD_VEC Q15 Q10 Q17 32 128 *) + 0x4fa0d0ca; (* arm_SQRDMULH_VEC Q10 Q6 (Q0 :> LANE_S 1) 32 128 *) + 0x3d802010; (* arm_STR Q16 X0 (Immediate_Offset (word 128)) *) + 0x3c81040f; (* arm_STR Q15 X0 (Postimmediate_Offset (word 16)) *) + 0x4fa0d1b3; (* arm_SQRDMULH_VEC Q19 Q13 (Q0 :> LANE_S 1) 32 128 *) + 0x6eb287ef; (* arm_SUB_VEC Q15 Q31 Q18 32 128 *) + 0x4f8081a8; (* arm_MUL_VEC Q8 Q13 (Q0 :> LANE_S 0) 32 128 *) + 0x4eb287fa; (* arm_ADD_VEC Q26 Q31 Q18 32 128 *) + 0x3d809c0f; (* arm_STR Q15 X0 (Immediate_Offset (word 624)) *) + 0x4fa1d3ad; (* arm_SQRDMULH_VEC Q13 Q29 (Q1 :> LANE_S 1) 32 128 *) + 0x3d807c1a; (* arm_STR Q26 X0 (Immediate_Offset (word 496)) *) + 0x6f874268; (* arm_MLS_VEC Q8 Q19 (Q7 :> LANE_S 0) 32 128 *) + 0x6f87428b; (* arm_MLS_VEC Q11 Q20 (Q7 :> LANE_S 0) 32 128 *) + 0x6f8741b7; (* arm_MLS_VEC Q23 Q13 (Q7 :> LANE_S 0) 32 128 *) + 0x4ea88536; (* arm_ADD_VEC Q22 Q9 Q8 32 128 *) + 0x3dc08406; (* arm_LDR Q6 X0 (Immediate_Offset (word 528)) *) + 0x6ea8853d; (* arm_SUB_VEC Q29 Q9 Q8 32 128 *) + 0x4f8083d1; (* arm_MUL_VEC Q17 Q30 (Q0 :> LANE_S 0) 32 128 *) + 0x3dc0c009; (* arm_LDR Q9 X0 (Immediate_Offset (word 768)) *) + 0x4fa0dacd; (* arm_SQRDMULH_VEC Q13 Q22 (Q0 :> LANE_S 3) 32 128 *) + 0x4eb786b2; (* arm_ADD_VEC Q18 Q21 Q23 32 128 *) + 0x6f874144; (* arm_MLS_VEC Q4 Q10 (Q7 :> LANE_S 0) 32 128 *) + 0x6eb786bf; (* arm_SUB_VEC Q31 Q21 Q23 32 128 *) + 0x4fa3d3f0; (* arm_SQRDMULH_VEC Q16 Q31 (Q3 :> LANE_S 1) 32 128 *) + 0x4eae8713; (* arm_ADD_VEC Q19 Q24 Q14 32 128 *) + 0x4f808ace; (* arm_MUL_VEC Q14 Q22 (Q0 :> LANE_S 2) 32 128 *) + 0x6ea4878a; (* arm_SUB_VEC Q10 Q28 Q4 32 128 *) + 0x6f8741ae; (* arm_MLS_VEC Q14 Q13 (Q7 :> LANE_S 0) 32 128 *) + 0x3dc0400d; (* arm_LDR Q13 X0 (Immediate_Offset (word 256)) *) + 0x4fa0d136; (* arm_SQRDMULH_VEC Q22 Q9 (Q0 :> LANE_S 1) 32 128 *) + 0x4f808128; (* arm_MUL_VEC Q8 Q9 (Q0 :> LANE_S 0) 32 128 *) + 0x4f8383f7; (* arm_MUL_VEC Q23 Q31 (Q3 :> LANE_S 0) 32 128 *) + 0x6f8742c8; (* arm_MLS_VEC Q8 Q22 (Q7 :> LANE_S 0) 32 128 *) + 0x6f874217; (* arm_MLS_VEC Q23 Q16 (Q7 :> LANE_S 0) 32 128 *) + 0x4ea48790; (* arm_ADD_VEC Q16 Q28 Q4 32 128 *) + 0x3dc02416; (* arm_LDR Q22 X0 (Immediate_Offset (word 144)) *) + 0x4f8080c4; (* arm_MUL_VEC Q4 Q6 (Q0 :> LANE_S 0) 32 128 *) + 0x6f874371; (* arm_MLS_VEC Q17 Q27 (Q7 :> LANE_S 0) 32 128 *) + 0x4ea885b5; (* arm_ADD_VEC Q21 Q13 Q8 32 128 *) + 0x6ea885bb; (* arm_SUB_VEC Q27 Q13 Q8 32 128 *) + 0x4fa0dabf; (* arm_SQRDMULH_VEC Q31 Q21 (Q0 :> LANE_S 3) 32 128 *) + 0x3d803c19; (* arm_STR Q25 X0 (Immediate_Offset (word 240)) *) + 0x4f808aa8; (* arm_MUL_VEC Q8 Q21 (Q0 :> LANE_S 2) 32 128 *) + 0x4eb186d8; (* arm_ADD_VEC Q24 Q22 Q17 32 128 *) + 0x6eb186d5; (* arm_SUB_VEC Q21 Q22 Q17 32 128 *) + 0x4fa2da45; (* arm_SQRDMULH_VEC Q5 Q18 (Q2 :> LANE_S 3) 32 128 *) + 0x6f8743e8; (* arm_MLS_VEC Q8 Q31 (Q7 :> LANE_S 0) 32 128 *) + 0x6eae8709; (* arm_SUB_VEC Q9 Q24 Q14 32 128 *) + 0x4fa1d374; (* arm_SQRDMULH_VEC Q20 Q27 (Q1 :> LANE_S 1) 32 128 *) + 0x4f81837a; (* arm_MUL_VEC Q26 Q27 (Q1 :> LANE_S 0) 32 128 *) + 0x6ea88619; (* arm_SUB_VEC Q25 Q16 Q8 32 128 *) + 0x4f828a52; (* arm_MUL_VEC Q18 Q18 (Q2 :> LANE_S 2) 32 128 *) + 0x6eab8736; (* arm_SUB_VEC Q22 Q25 Q11 32 128 *) + 0x6f87429a; (* arm_MLS_VEC Q26 Q20 (Q7 :> LANE_S 0) 32 128 *) + 0x4fa2d134; (* arm_SQRDMULH_VEC Q20 Q9 (Q2 :> LANE_S 1) 32 128 *) + 0x4fa1da6c; (* arm_SQRDMULH_VEC Q12 Q19 (Q1 :> LANE_S 3) 32 128 *) + 0x6eba854f; (* arm_SUB_VEC Q15 Q10 Q26 32 128 *) + 0xf1000484; (* arm_SUBS X4 X4 (rvalue (word 1)) *) + 0xb5fff664; (* arm_CBNZ X4 (word 2096844) *) + 0x4eba854d; (* arm_ADD_VEC Q13 Q10 Q26 32 128 *) + 0x6f8740b2; (* arm_MLS_VEC Q18 Q5 (Q7 :> LANE_S 0) 32 128 *) + 0x3d806016; (* arm_STR Q22 X0 (Immediate_Offset (word 384)) *) + 0x4ea8861b; (* arm_ADD_VEC Q27 Q16 Q8 32 128 *) + 0x4f818a76; (* arm_MUL_VEC Q22 Q19 (Q1 :> LANE_S 2) 32 128 *) + 0x4eae871a; (* arm_ADD_VEC Q26 Q24 Q14 32 128 *) + 0x3dc0441f; (* arm_LDR Q31 X0 (Immediate_Offset (word 272)) *) + 0x6eb785ee; (* arm_SUB_VEC Q14 Q15 Q23 32 128 *) + 0x4eb785f1; (* arm_ADD_VEC Q17 Q15 Q23 32 128 *) + 0x6f874196; (* arm_MLS_VEC Q22 Q12 (Q7 :> LANE_S 0) 32 128 *) + 0x4eb285bc; (* arm_ADD_VEC Q28 Q13 Q18 32 128 *) + 0x3d80e00e; (* arm_STR Q14 X0 (Immediate_Offset (word 896)) *) + 0x4fa0d0d8; (* arm_SQRDMULH_VEC Q24 Q6 (Q0 :> LANE_S 1) 32 128 *) + 0x4eab8725; (* arm_ADD_VEC Q5 Q25 Q11 32 128 *) + 0x6eb285b3; (* arm_SUB_VEC Q19 Q13 Q18 32 128 *) + 0x3d80c011; (* arm_STR Q17 X0 (Immediate_Offset (word 768)) *) + 0x3d804005; (* arm_STR Q5 X0 (Immediate_Offset (word 256)) *) + 0x4f828130; (* arm_MUL_VEC Q16 Q9 (Q2 :> LANE_S 0) 32 128 *) + 0x3dc0c412; (* arm_LDR Q18 X0 (Immediate_Offset (word 784)) *) + 0x3d80a013; (* arm_STR Q19 X0 (Immediate_Offset (word 640)) *) + 0x6f874290; (* arm_MLS_VEC Q16 Q20 (Q7 :> LANE_S 0) 32 128 *) + 0x3d80801c; (* arm_STR Q28 X0 (Immediate_Offset (word 512)) *) + 0x4eb6876d; (* arm_ADD_VEC Q13 Q27 Q22 32 128 *) + 0x3dc0040f; (* arm_LDR Q15 X0 (Immediate_Offset (word 16)) *) + 0x6eb6876a; (* arm_SUB_VEC Q10 Q27 Q22 32 128 *) + 0x6f874304; (* arm_MLS_VEC Q4 Q24 (Q7 :> LANE_S 0) 32 128 *) + 0x3c81040d; (* arm_STR Q13 X0 (Postimmediate_Offset (word 16)) *) + 0x3d801c0a; (* arm_STR Q10 X0 (Immediate_Offset (word 112)) *) + 0x4fa1d3ac; (* arm_SQRDMULH_VEC Q12 Q29 (Q1 :> LANE_S 1) 32 128 *) + 0x4f8183b7; (* arm_MUL_VEC Q23 Q29 (Q1 :> LANE_S 0) 32 128 *) + 0x4f818b48; (* arm_MUL_VEC Q8 Q26 (Q1 :> LANE_S 2) 32 128 *) + 0x4ea485f4; (* arm_ADD_VEC Q20 Q15 Q4 32 128 *) + 0x6ea485e6; (* arm_SUB_VEC Q6 Q15 Q4 32 128 *) + 0x6f874197; (* arm_MLS_VEC Q23 Q12 (Q7 :> LANE_S 0) 32 128 *) + 0x4fa0d256; (* arm_SQRDMULH_VEC Q22 Q18 (Q0 :> LANE_S 1) 32 128 *) + 0x4f808245; (* arm_MUL_VEC Q5 Q18 (Q0 :> LANE_S 0) 32 128 *) + 0x6eb786bc; (* arm_SUB_VEC Q28 Q21 Q23 32 128 *) + 0x4fa1db4a; (* arm_SQRDMULH_VEC Q10 Q26 (Q1 :> LANE_S 3) 32 128 *) + 0x6f8742c5; (* arm_MLS_VEC Q5 Q22 (Q7 :> LANE_S 0) 32 128 *) + 0x4fa3d39e; (* arm_SQRDMULH_VEC Q30 Q28 (Q3 :> LANE_S 1) 32 128 *) + 0x4eb786a4; (* arm_ADD_VEC Q4 Q21 Q23 32 128 *) + 0x6f874148; (* arm_MLS_VEC Q8 Q10 (Q7 :> LANE_S 0) 32 128 *) + 0x4ea587ec; (* arm_ADD_VEC Q12 Q31 Q5 32 128 *) + 0x6ea587e9; (* arm_SUB_VEC Q9 Q31 Q5 32 128 *) + 0x4fa2d899; (* arm_SQRDMULH_VEC Q25 Q4 (Q2 :> LANE_S 3) 32 128 *) + 0x4fa1d12f; (* arm_SQRDMULH_VEC Q15 Q9 (Q1 :> LANE_S 1) 32 128 *) + 0x4fa0d99f; (* arm_SQRDMULH_VEC Q31 Q12 (Q0 :> LANE_S 3) 32 128 *) + 0x4f808992; (* arm_MUL_VEC Q18 Q12 (Q0 :> LANE_S 2) 32 128 *) + 0x4f81812b; (* arm_MUL_VEC Q11 Q9 (Q1 :> LANE_S 0) 32 128 *) + 0x6f8743f2; (* arm_MLS_VEC Q18 Q31 (Q7 :> LANE_S 0) 32 128 *) + 0x4f82889d; (* arm_MUL_VEC Q29 Q4 (Q2 :> LANE_S 2) 32 128 *) + 0x6f87433d; (* arm_MLS_VEC Q29 Q25 (Q7 :> LANE_S 0) 32 128 *) + 0x4eb28697; (* arm_ADD_VEC Q23 Q20 Q18 32 128 *) + 0x6f8741eb; (* arm_MLS_VEC Q11 Q15 (Q7 :> LANE_S 0) 32 128 *) + 0x6eb2869f; (* arm_SUB_VEC Q31 Q20 Q18 32 128 *) + 0x4ea886f1; (* arm_ADD_VEC Q17 Q23 Q8 32 128 *) + 0x4eb087e5; (* arm_ADD_VEC Q5 Q31 Q16 32 128 *) + 0x4f838398; (* arm_MUL_VEC Q24 Q28 (Q3 :> LANE_S 0) 32 128 *) + 0x3c810411; (* arm_STR Q17 X0 (Postimmediate_Offset (word 16)) *) + 0x6eb087f3; (* arm_SUB_VEC Q19 Q31 Q16 32 128 *) + 0x6f8743d8; (* arm_MLS_VEC Q24 Q30 (Q7 :> LANE_S 0) 32 128 *) + 0x3d803c05; (* arm_STR Q5 X0 (Immediate_Offset (word 240)) *) + 0x4eab84df; (* arm_ADD_VEC Q31 Q6 Q11 32 128 *) + 0x6ea886fa; (* arm_SUB_VEC Q26 Q23 Q8 32 128 *) + 0x3d805c13; (* arm_STR Q19 X0 (Immediate_Offset (word 368)) *) + 0x4ebd87e4; (* arm_ADD_VEC Q4 Q31 Q29 32 128 *) + 0x6eab84cd; (* arm_SUB_VEC Q13 Q6 Q11 32 128 *) + 0x3d801c1a; (* arm_STR Q26 X0 (Immediate_Offset (word 112)) *) + 0x6ebd87eb; (* arm_SUB_VEC Q11 Q31 Q29 32 128 *) + 0x6eb885b6; (* arm_SUB_VEC Q22 Q13 Q24 32 128 *) + 0x4eb885b7; (* arm_ADD_VEC Q23 Q13 Q24 32 128 *) + 0x3d807c04; (* arm_STR Q4 X0 (Immediate_Offset (word 496)) *) + 0x3d809c0b; (* arm_STR Q11 X0 (Immediate_Offset (word 624)) *) + 0x3d80bc17; (* arm_STR Q23 X0 (Immediate_Offset (word 752)) *) + 0x3d80dc16; (* arm_STR Q22 X0 (Immediate_Offset (word 880)) *) + 0xaa0303e0; (* arm_MOV X0 X3 *) + 0xd2800104; (* arm_MOV X4 (rvalue (word 8)) *) + 0x3dc01009; (* arm_LDR Q9 X0 (Immediate_Offset (word 64)) *) + 0x3cc40437; (* arm_LDR Q23 X1 (Postimmediate_Offset (word 64)) *) + 0x3dc01855; (* arm_LDR Q21 X2 (Immediate_Offset (word 96)) *) + 0x3dc00801; (* arm_LDR Q1 X0 (Immediate_Offset (word 32)) *) + 0x3cdd002e; (* arm_LDR Q14 X1 (Immediate_Offset (word 18446744073709551568)) *) + 0x3dc0000d; (* arm_LDR Q13 X0 (Immediate_Offset (word 0)) *) + 0x3dc0144b; (* arm_LDR Q11 X2 (Immediate_Offset (word 80)) *) + 0x4fb7d130; (* arm_SQRDMULH_VEC Q16 Q9 (Q23 :> LANE_S 1) 32 128 *) + 0x3dc01411; (* arm_LDR Q17 X0 (Immediate_Offset (word 80)) *) + 0x4f97812f; (* arm_MUL_VEC Q15 Q9 (Q23 :> LANE_S 0) 32 128 *) + 0x3dc01c1e; (* arm_LDR Q30 X0 (Immediate_Offset (word 112)) *) + 0x3dc0181b; (* arm_LDR Q27 X0 (Immediate_Offset (word 96)) *) + 0x3dc00c48; (* arm_LDR Q8 X2 (Immediate_Offset (word 48)) *) + 0x4fb7d22c; (* arm_SQRDMULH_VEC Q12 Q17 (Q23 :> LANE_S 1) 32 128 *) + 0x3dc00c06; (* arm_LDR Q6 X0 (Immediate_Offset (word 48)) *) + 0x6f87420f; (* arm_MLS_VEC Q15 Q16 (Q7 :> LANE_S 0) 32 128 *) + 0x4fb7d372; (* arm_SQRDMULH_VEC Q18 Q27 (Q23 :> LANE_S 1) 32 128 *) + 0x4fb7d3d3; (* arm_SQRDMULH_VEC Q19 Q30 (Q23 :> LANE_S 1) 32 128 *) + 0x4eaf85a5; (* arm_ADD_VEC Q5 Q13 Q15 32 128 *) + 0x4f978379; (* arm_MUL_VEC Q25 Q27 (Q23 :> LANE_S 0) 32 128 *) + 0x6eaf85ba; (* arm_SUB_VEC Q26 Q13 Q15 32 128 *) + 0x6f874259; (* arm_MLS_VEC Q25 Q18 (Q7 :> LANE_S 0) 32 128 *) + 0x4f97822a; (* arm_MUL_VEC Q10 Q17 (Q23 :> LANE_S 0) 32 128 *) + 0x6f87418a; (* arm_MLS_VEC Q10 Q12 (Q7 :> LANE_S 0) 32 128 *) + 0x4f9783c4; (* arm_MUL_VEC Q4 Q30 (Q23 :> LANE_S 0) 32 128 *) + 0x6eb98436; (* arm_SUB_VEC Q22 Q1 Q25 32 128 *) + 0x6f874264; (* arm_MLS_VEC Q4 Q19 (Q7 :> LANE_S 0) 32 128 *) + 0x4eb9843c; (* arm_ADD_VEC Q28 Q1 Q25 32 128 *) + 0x4fb7db93; (* arm_SQRDMULH_VEC Q19 Q28 (Q23 :> LANE_S 3) 32 128 *) + 0x4faed2c9; (* arm_SQRDMULH_VEC Q9 Q22 (Q14 :> LANE_S 1) 32 128 *) + 0x4ea484c2; (* arm_ADD_VEC Q2 Q6 Q4 32 128 *) + 0x4f978b80; (* arm_MUL_VEC Q0 Q28 (Q23 :> LANE_S 2) 32 128 *) + 0x4fb7d85b; (* arm_SQRDMULH_VEC Q27 Q2 (Q23 :> LANE_S 3) 32 128 *) + 0x6ea484d1; (* arm_SUB_VEC Q17 Q6 Q4 32 128 *) + 0x4f978843; (* arm_MUL_VEC Q3 Q2 (Q23 :> LANE_S 2) 32 128 *) + 0x4faed234; (* arm_SQRDMULH_VEC Q20 Q17 (Q14 :> LANE_S 1) 32 128 *) + 0x3dc00401; (* arm_LDR Q1 X0 (Immediate_Offset (word 16)) *) + 0x6f874363; (* arm_MLS_VEC Q3 Q27 (Q7 :> LANE_S 0) 32 128 *) + 0x6f874260; (* arm_MLS_VEC Q0 Q19 (Q7 :> LANE_S 0) 32 128 *) + 0x3cde0030; (* arm_LDR Q16 X1 (Immediate_Offset (word 18446744073709551584)) *) + 0x4eaa843f; (* arm_ADD_VEC Q31 Q1 Q10 32 128 *) + 0x4f8e823e; (* arm_MUL_VEC Q30 Q17 (Q14 :> LANE_S 0) 32 128 *) + 0x6f87429e; (* arm_MLS_VEC Q30 Q20 (Q7 :> LANE_S 0) 32 128 *) + 0x4ea387fb; (* arm_ADD_VEC Q27 Q31 Q3 32 128 *) + 0x6eaa8437; (* arm_SUB_VEC Q23 Q1 Q10 32 128 *) + 0x6ea387f8; (* arm_SUB_VEC Q24 Q31 Q3 32 128 *) + 0x4faedb64; (* arm_SQRDMULH_VEC Q4 Q27 (Q14 :> LANE_S 3) 32 128 *) + 0x4fb0d30a; (* arm_SQRDMULH_VEC Q10 Q24 (Q16 :> LANE_S 1) 32 128 *) + 0x4f908312; (* arm_MUL_VEC Q18 Q24 (Q16 :> LANE_S 0) 32 128 *) + 0x4ebe86ef; (* arm_ADD_VEC Q15 Q23 Q30 32 128 *) + 0x6ebe86f7; (* arm_SUB_VEC Q23 Q23 Q30 32 128 *) + 0x4f8e8b7d; (* arm_MUL_VEC Q29 Q27 (Q14 :> LANE_S 2) 32 128 *) + 0x6ea084a2; (* arm_SUB_VEC Q2 Q5 Q0 32 128 *) + 0x4ea084ac; (* arm_ADD_VEC Q12 Q5 Q0 32 128 *) + 0x6f874152; (* arm_MLS_VEC Q18 Q10 (Q7 :> LANE_S 0) 32 128 *) + 0x3cdf0023; (* arm_LDR Q3 X1 (Immediate_Offset (word 18446744073709551600)) *) + 0x6f87409d; (* arm_MLS_VEC Q29 Q4 (Q7 :> LANE_S 0) 32 128 *) + 0x4f8e82c4; (* arm_MUL_VEC Q4 Q22 (Q14 :> LANE_S 0) 32 128 *) + 0x4eb28441; (* arm_ADD_VEC Q1 Q2 Q18 32 128 *) + 0x6eb28458; (* arm_SUB_VEC Q24 Q2 Q18 32 128 *) + 0x6f874124; (* arm_MLS_VEC Q4 Q9 (Q7 :> LANE_S 0) 32 128 *) + 0x3dc00454; (* arm_LDR Q20 X2 (Immediate_Offset (word 16)) *) + 0x4ebd8599; (* arm_ADD_VEC Q25 Q12 Q29 32 128 *) + 0x4f8382e9; (* arm_MUL_VEC Q9 Q23 (Q3 :> LANE_S 0) 32 128 *) + 0x6ebd8585; (* arm_SUB_VEC Q5 Q12 Q29 32 128 *) + 0x4fa3d2ff; (* arm_SQRDMULH_VEC Q31 Q23 (Q3 :> LANE_S 1) 32 128 *) + 0x4e986826; (* arm_TRN2 Q6 Q1 Q24 32 128 *) + 0x4e856b2a; (* arm_TRN2 Q10 Q25 Q5 32 128 *) + 0x4fb0d9ed; (* arm_SQRDMULH_VEC Q13 Q15 (Q16 :> LANE_S 3) 32 128 *) + 0x4ec6695e; (* arm_TRN2 Q30 Q10 Q6 64 128 *) + 0x3ccc0443; (* arm_LDR Q3 X2 (Postimmediate_Offset (word 192)) *) + 0x4f9089ec; (* arm_MUL_VEC Q12 Q15 (Q16 :> LANE_S 2) 32 128 *) + 0x4ec6295b; (* arm_TRN1 Q27 Q10 Q6 64 128 *) + 0x6f8743e9; (* arm_MLS_VEC Q9 Q31 (Q7 :> LANE_S 0) 32 128 *) + 0x4e852b36; (* arm_TRN1 Q22 Q25 Q5 32 128 *) + 0x6ea48746; (* arm_SUB_VEC Q6 Q26 Q4 32 128 *) + 0x6f8741ac; (* arm_MLS_VEC Q12 Q13 (Q7 :> LANE_S 0) 32 128 *) + 0x4e982821; (* arm_TRN1 Q1 Q1 Q24 32 128 *) + 0x4ea4874d; (* arm_ADD_VEC Q13 Q26 Q4 32 128 *) + 0x4ec16aca; (* arm_TRN2 Q10 Q22 Q1 64 128 *) + 0x4ea39fdc; (* arm_MUL_VEC Q28 Q30 Q3 32 128 *) + 0x6ea984df; (* arm_SUB_VEC Q31 Q6 Q9 32 128 *) + 0xd1000484; (* arm_SUB X4 X4 (rvalue (word 1)) *) + 0x4eac85a2; (* arm_ADD_VEC Q2 Q13 Q12 32 128 *) + 0x6eb4b7c5; (* arm_SQRDMULH_VEC Q5 Q30 Q20 32 128 *) + 0x6eac85b9; (* arm_SUB_VEC Q25 Q13 Q12 32 128 *) + 0x4ea984d1; (* arm_ADD_VEC Q17 Q6 Q9 32 128 *) + 0x4ea39d53; (* arm_MUL_VEC Q19 Q10 Q3 32 128 *) + 0x4e996844; (* arm_TRN2 Q4 Q2 Q25 32 128 *) + 0x3cdb0058; (* arm_LDR Q24 X2 (Immediate_Offset (word 18446744073709551536)) *) + 0x4e9f6a3d; (* arm_TRN2 Q29 Q17 Q31 32 128 *) + 0x6eb4b54f; (* arm_SQRDMULH_VEC Q15 Q10 Q20 32 128 *) + 0x6f8740bc; (* arm_MLS_VEC Q28 Q5 (Q7 :> LANE_S 0) 32 128 *) + 0x4edd6883; (* arm_TRN2 Q3 Q4 Q29 64 128 *) + 0x6eb8b46c; (* arm_SQRDMULH_VEC Q12 Q3 Q24 32 128 *) + 0x4eb59c70; (* arm_MUL_VEC Q16 Q3 Q21 32 128 *) + 0x6f8741f3; (* arm_MLS_VEC Q19 Q15 (Q7 :> LANE_S 0) 32 128 *) + 0x3cd6004a; (* arm_LDR Q10 X2 (Immediate_Offset (word 18446744073709551456)) *) + 0x4ebc876d; (* arm_ADD_VEC Q13 Q27 Q28 32 128 *) + 0x6f874190; (* arm_MLS_VEC Q16 Q12 (Q7 :> LANE_S 0) 32 128 *) + 0x6ea8b5a9; (* arm_SQRDMULH_VEC Q9 Q13 Q8 32 128 *) + 0x6ebc877e; (* arm_SUB_VEC Q30 Q27 Q28 32 128 *) + 0x3cc40432; (* arm_LDR Q18 X1 (Postimmediate_Offset (word 64)) *) + 0x4eaa9da8; (* arm_MUL_VEC Q8 Q13 Q10 32 128 *) + 0x3dc0340a; (* arm_LDR Q10 X0 (Immediate_Offset (word 208)) *) + 0x6eabb7ce; (* arm_SQRDMULH_VEC Q14 Q30 Q11 32 128 *) + 0x3dc03817; (* arm_LDR Q23 X0 (Immediate_Offset (word 224)) *) + 0x4fb2d14d; (* arm_SQRDMULH_VEC Q13 Q10 (Q18 :> LANE_S 1) 32 128 *) + 0x4fb2d2ec; (* arm_SQRDMULH_VEC Q12 Q23 (Q18 :> LANE_S 1) 32 128 *) + 0x3cd80046; (* arm_LDR Q6 X2 (Immediate_Offset (word 18446744073709551488)) *) + 0x4f928143; (* arm_MUL_VEC Q3 Q10 (Q18 :> LANE_S 0) 32 128 *) + 0x6f8741a3; (* arm_MLS_VEC Q3 Q13 (Q7 :> LANE_S 0) 32 128 *) + 0x3dc03c0d; (* arm_LDR Q13 X0 (Immediate_Offset (word 240)) *) + 0x4e99285b; (* arm_TRN1 Q27 Q2 Q25 32 128 *) + 0x4ea69fc2; (* arm_MUL_VEC Q2 Q30 Q6 32 128 *) + 0x4e9f2a34; (* arm_TRN1 Q20 Q17 Q31 32 128 *) + 0x4edd2899; (* arm_TRN1 Q25 Q4 Q29 64 128 *) + 0x4fb2d1aa; (* arm_SQRDMULH_VEC Q10 Q13 (Q18 :> LANE_S 1) 32 128 *) + 0x4ed46b65; (* arm_TRN2 Q5 Q27 Q20 64 128 *) + 0x3cdf0046; (* arm_LDR Q6 X2 (Immediate_Offset (word 18446744073709551600)) *) + 0x6f874128; (* arm_MLS_VEC Q8 Q9 (Q7 :> LANE_S 0) 32 128 *) + 0x6eb0872f; (* arm_SUB_VEC Q15 Q25 Q16 32 128 *) + 0x6eb8b4bf; (* arm_SQRDMULH_VEC Q31 Q5 Q24 32 128 *) + 0x6ea6b5fe; (* arm_SQRDMULH_VEC Q30 Q15 Q6 32 128 *) + 0x3cdd0049; (* arm_LDR Q9 X2 (Immediate_Offset (word 18446744073709551568)) *) + 0x4eb59ca4; (* arm_MUL_VEC Q4 Q5 Q21 32 128 *) + 0x3cdc0055; (* arm_LDR Q21 X2 (Immediate_Offset (word 18446744073709551552)) *) + 0x3cde0046; (* arm_LDR Q6 X2 (Immediate_Offset (word 18446744073709551584)) *) + 0x4eb08725; (* arm_ADD_VEC Q5 Q25 Q16 32 128 *) + 0x6f8743e4; (* arm_MLS_VEC Q4 Q31 (Q7 :> LANE_S 0) 32 128 *) + 0x4eb59ca0; (* arm_MUL_VEC Q0 Q5 Q21 32 128 *) + 0x4f9281b1; (* arm_MUL_VEC Q17 Q13 (Q18 :> LANE_S 0) 32 128 *) + 0x6f874151; (* arm_MLS_VEC Q17 Q10 (Q7 :> LANE_S 0) 32 128 *) + 0x3dc02c1c; (* arm_LDR Q28 X0 (Immediate_Offset (word 176)) *) + 0x6ea9b4ba; (* arm_SQRDMULH_VEC Q26 Q5 Q9 32 128 *) + 0x4ea69de9; (* arm_MUL_VEC Q9 Q15 Q6 32 128 *) + 0x4ec12ac6; (* arm_TRN1 Q6 Q22 Q1 64 128 *) + 0x6f8743c9; (* arm_MLS_VEC Q9 Q30 (Q7 :> LANE_S 0) 32 128 *) + 0x4eb18799; (* arm_ADD_VEC Q25 Q28 Q17 32 128 *) + 0x6f8741c2; (* arm_MLS_VEC Q2 Q14 (Q7 :> LANE_S 0) 32 128 *) + 0x4ed42b65; (* arm_TRN1 Q5 Q27 Q20 64 128 *) + 0x3dc00454; (* arm_LDR Q20 X2 (Immediate_Offset (word 16)) *) + 0x4f928b3d; (* arm_MUL_VEC Q29 Q25 (Q18 :> LANE_S 2) 32 128 *) + 0x4eb384cf; (* arm_ADD_VEC Q15 Q6 Q19 32 128 *) + 0x3dc0301e; (* arm_LDR Q30 X0 (Immediate_Offset (word 192)) *) + 0x6eb384d3; (* arm_SUB_VEC Q19 Q6 Q19 32 128 *) + 0x4ea885ff; (* arm_ADD_VEC Q31 Q15 Q8 32 128 *) + 0x6f874340; (* arm_MLS_VEC Q0 Q26 (Q7 :> LANE_S 0) 32 128 *) + 0x3cdd002e; (* arm_LDR Q14 X1 (Immediate_Offset (word 18446744073709551568)) *) + 0x4ea28675; (* arm_ADD_VEC Q21 Q19 Q2 32 128 *) + 0x6ea28678; (* arm_SUB_VEC Q24 Q19 Q2 32 128 *) + 0x4fb2db3b; (* arm_SQRDMULH_VEC Q27 Q25 (Q18 :> LANE_S 3) 32 128 *) + 0x6ea885fa; (* arm_SUB_VEC Q26 Q15 Q8 32 128 *) + 0x3dc02402; (* arm_LDR Q2 X0 (Immediate_Offset (word 144)) *) + 0x4f9283d0; (* arm_MUL_VEC Q16 Q30 (Q18 :> LANE_S 0) 32 128 *) + 0x6eb18799; (* arm_SUB_VEC Q25 Q28 Q17 32 128 *) + 0x4e9a2beb; (* arm_TRN1 Q11 Q31 Q26 32 128 *) + 0x3dc02801; (* arm_LDR Q1 X0 (Immediate_Offset (word 160)) *) + 0x4e982aa6; (* arm_TRN1 Q6 Q21 Q24 32 128 *) + 0x4faed32d; (* arm_SQRDMULH_VEC Q13 Q25 (Q14 :> LANE_S 1) 32 128 *) + 0x4ea38448; (* arm_ADD_VEC Q8 Q2 Q3 32 128 *) + 0x4ec6697c; (* arm_TRN2 Q28 Q11 Q6 64 128 *) + 0x4fb2d3d3; (* arm_SQRDMULH_VEC Q19 Q30 (Q18 :> LANE_S 1) 32 128 *) + 0x6ea484aa; (* arm_SUB_VEC Q10 Q5 Q4 32 128 *) + 0x3cde0036; (* arm_LDR Q22 X1 (Immediate_Offset (word 18446744073709551584)) *) + 0x3d80081c; (* arm_STR Q28 X0 (Immediate_Offset (word 32)) *) + 0x6f87437d; (* arm_MLS_VEC Q29 Q27 (Q7 :> LANE_S 0) 32 128 *) + 0x4ea9854f; (* arm_ADD_VEC Q15 Q10 Q9 32 128 *) + 0x4f8e8339; (* arm_MUL_VEC Q25 Q25 (Q14 :> LANE_S 0) 32 128 *) + 0x3cdf003b; (* arm_LDR Q27 X1 (Immediate_Offset (word 18446744073709551600)) *) + 0x4e9a6bf1; (* arm_TRN2 Q17 Q31 Q26 32 128 *) + 0x4e986ab5; (* arm_TRN2 Q21 Q21 Q24 32 128 *) + 0x6f874270; (* arm_MLS_VEC Q16 Q19 (Q7 :> LANE_S 0) 32 128 *) + 0x6ebd8518; (* arm_SUB_VEC Q24 Q8 Q29 32 128 *) + 0x6ea9854a; (* arm_SUB_VEC Q10 Q10 Q9 32 128 *) + 0x6f8741b9; (* arm_MLS_VEC Q25 Q13 (Q7 :> LANE_S 0) 32 128 *) + 0x4ec6296d; (* arm_TRN1 Q13 Q11 Q6 64 128 *) + 0x3dc0201c; (* arm_LDR Q28 X0 (Immediate_Offset (word 128)) *) + 0x4fb6d31e; (* arm_SQRDMULH_VEC Q30 Q24 (Q22 :> LANE_S 1) 32 128 *) + 0x4ed56a33; (* arm_TRN2 Q19 Q17 Q21 64 128 *) + 0x4ed52a26; (* arm_TRN1 Q6 Q17 Q21 64 128 *) + 0x4f9282ff; (* arm_MUL_VEC Q31 Q23 (Q18 :> LANE_S 0) 32 128 *) + 0x3c88040d; (* arm_STR Q13 X0 (Postimmediate_Offset (word 128)) *) + 0x3c990006; (* arm_STR Q6 X0 (Immediate_Offset (word 18446744073709551504)) *) + 0x3c9b0013; (* arm_STR Q19 X0 (Immediate_Offset (word 18446744073709551536)) *) + 0x3dc0144b; (* arm_LDR Q11 X2 (Immediate_Offset (word 80)) *) + 0x6f87419f; (* arm_MLS_VEC Q31 Q12 (Q7 :> LANE_S 0) 32 128 *) + 0x3dc01855; (* arm_LDR Q21 X2 (Immediate_Offset (word 96)) *) + 0x4e8a29e9; (* arm_TRN1 Q9 Q15 Q10 32 128 *) + 0x4e8a69e6; (* arm_TRN2 Q6 Q15 Q10 32 128 *) + 0x4f968318; (* arm_MUL_VEC Q24 Q24 (Q22 :> LANE_S 0) 32 128 *) + 0x6ea3844a; (* arm_SUB_VEC Q10 Q2 Q3 32 128 *) + 0x3ccc0443; (* arm_LDR Q3 X2 (Postimmediate_Offset (word 192)) *) + 0x6f8743d8; (* arm_MLS_VEC Q24 Q30 (Q7 :> LANE_S 0) 32 128 *) + 0x4ebd851a; (* arm_ADD_VEC Q26 Q8 Q29 32 128 *) + 0x3cd70048; (* arm_LDR Q8 X2 (Immediate_Offset (word 18446744073709551472)) *) + 0x4ea484b1; (* arm_ADD_VEC Q17 Q5 Q4 32 128 *) + 0x4faedb42; (* arm_SQRDMULH_VEC Q2 Q26 (Q14 :> LANE_S 3) 32 128 *) + 0x6ebf842d; (* arm_SUB_VEC Q13 Q1 Q31 32 128 *) + 0x4ebf843e; (* arm_ADD_VEC Q30 Q1 Q31 32 128 *) + 0x4eb9854f; (* arm_ADD_VEC Q15 Q10 Q25 32 128 *) + 0x4faed1b3; (* arm_SQRDMULH_VEC Q19 Q13 (Q14 :> LANE_S 1) 32 128 *) + 0x6eb98559; (* arm_SUB_VEC Q25 Q10 Q25 32 128 *) + 0x4f8e81bd; (* arm_MUL_VEC Q29 Q13 (Q14 :> LANE_S 0) 32 128 *) + 0x6eb08785; (* arm_SUB_VEC Q5 Q28 Q16 32 128 *) + 0x4fb2dbc4; (* arm_SQRDMULH_VEC Q4 Q30 (Q18 :> LANE_S 3) 32 128 *) + 0x6ea08637; (* arm_SUB_VEC Q23 Q17 Q0 32 128 *) + 0x4ea0863f; (* arm_ADD_VEC Q31 Q17 Q0 32 128 *) + 0x4f928bd2; (* arm_MUL_VEC Q18 Q30 (Q18 :> LANE_S 2) 32 128 *) + 0x4eb08781; (* arm_ADD_VEC Q1 Q28 Q16 32 128 *) + 0x4e976bec; (* arm_TRN2 Q12 Q31 Q23 32 128 *) + 0x6f87427d; (* arm_MLS_VEC Q29 Q19 (Q7 :> LANE_S 0) 32 128 *) + 0x4e972bed; (* arm_TRN1 Q13 Q31 Q23 32 128 *) + 0x4ec6699e; (* arm_TRN2 Q30 Q12 Q6 64 128 *) + 0x6f874092; (* arm_MLS_VEC Q18 Q4 (Q7 :> LANE_S 0) 32 128 *) + 0x4ec969aa; (* arm_TRN2 Q10 Q13 Q9 64 128 *) + 0x4ec929bf; (* arm_TRN1 Q31 Q13 Q9 64 128 *) + 0x4f8e8b53; (* arm_MUL_VEC Q19 Q26 (Q14 :> LANE_S 2) 32 128 *) + 0x4ec6298c; (* arm_TRN1 Q12 Q12 Q6 64 128 *) + 0x6ebd84a6; (* arm_SUB_VEC Q6 Q5 Q29 32 128 *) + 0x6f874053; (* arm_MLS_VEC Q19 Q2 (Q7 :> LANE_S 0) 32 128 *) + 0x4ebd84ad; (* arm_ADD_VEC Q13 Q5 Q29 32 128 *) + 0x3c9e000a; (* arm_STR Q10 X0 (Immediate_Offset (word 18446744073709551584)) *) + 0x6eb2842a; (* arm_SUB_VEC Q10 Q1 Q18 32 128 *) + 0x4eb2843c; (* arm_ADD_VEC Q28 Q1 Q18 32 128 *) + 0x4fbbd325; (* arm_SQRDMULH_VEC Q5 Q25 (Q27 :> LANE_S 1) 32 128 *) + 0x3c9c001f; (* arm_STR Q31 X0 (Immediate_Offset (word 18446744073709551552)) *) + 0x4eb8855a; (* arm_ADD_VEC Q26 Q10 Q24 32 128 *) + 0x6eb8855f; (* arm_SUB_VEC Q31 Q10 Q24 32 128 *) + 0x4f9b8329; (* arm_MUL_VEC Q9 Q25 (Q27 :> LANE_S 0) 32 128 *) + 0x3c9d000c; (* arm_STR Q12 X0 (Immediate_Offset (word 18446744073709551568)) *) + 0x6eb38798; (* arm_SUB_VEC Q24 Q28 Q19 32 128 *) + 0x4fb6d9ea; (* arm_SQRDMULH_VEC Q10 Q15 (Q22 :> LANE_S 3) 32 128 *) + 0x4e9f2b41; (* arm_TRN1 Q1 Q26 Q31 32 128 *) + 0x3c9f001e; (* arm_STR Q30 X0 (Immediate_Offset (word 18446744073709551600)) *) + 0x4eb3879e; (* arm_ADD_VEC Q30 Q28 Q19 32 128 *) + 0x6f8740a9; (* arm_MLS_VEC Q9 Q5 (Q7 :> LANE_S 0) 32 128 *) + 0x4e9f6b59; (* arm_TRN2 Q25 Q26 Q31 32 128 *) + 0x4e986bce; (* arm_TRN2 Q14 Q30 Q24 32 128 *) + 0x4f9689ec; (* arm_MUL_VEC Q12 Q15 (Q22 :> LANE_S 2) 32 128 *) + 0x4e982bd6; (* arm_TRN1 Q22 Q30 Q24 32 128 *) + 0x4ed929db; (* arm_TRN1 Q27 Q14 Q25 64 128 *) + 0x6f87414c; (* arm_MLS_VEC Q12 Q10 (Q7 :> LANE_S 0) 32 128 *) + 0x4ed969de; (* arm_TRN2 Q30 Q14 Q25 64 128 *) + 0x6ea984df; (* arm_SUB_VEC Q31 Q6 Q9 32 128 *) + 0x4ec16aca; (* arm_TRN2 Q10 Q22 Q1 64 128 *) + 0x4ea39fdc; (* arm_MUL_VEC Q28 Q30 Q3 32 128 *) + 0xf1000484; (* arm_SUBS X4 X4 (rvalue (word 1)) *) + 0xb5ffeb64; (* arm_CBNZ X4 (word 2096492) *) + 0x4ea984c9; (* arm_ADD_VEC Q9 Q6 Q9 32 128 *) + 0x6eb4b7c6; (* arm_SQRDMULH_VEC Q6 Q30 Q20 32 128 *) + 0x3cd60058; (* arm_LDR Q24 X2 (Immediate_Offset (word 18446744073709551456)) *) + 0x4eac85b9; (* arm_ADD_VEC Q25 Q13 Q12 32 128 *) + 0x6eac85af; (* arm_SUB_VEC Q15 Q13 Q12 32 128 *) + 0x4ea39d53; (* arm_MUL_VEC Q19 Q10 Q3 32 128 *) + 0x4e9f6925; (* arm_TRN2 Q5 Q9 Q31 32 128 *) + 0x6eb4b543; (* arm_SQRDMULH_VEC Q3 Q10 Q20 32 128 *) + 0x4e8f6b2a; (* arm_TRN2 Q10 Q25 Q15 32 128 *) + 0x6f8740dc; (* arm_MLS_VEC Q28 Q6 (Q7 :> LANE_S 0) 32 128 *) + 0x4ec5694d; (* arm_TRN2 Q13 Q10 Q5 64 128 *) + 0x3cdb005e; (* arm_LDR Q30 X2 (Immediate_Offset (word 18446744073709551536)) *) + 0x4eb59dac; (* arm_MUL_VEC Q12 Q13 Q21 32 128 *) + 0x6f874073; (* arm_MLS_VEC Q19 Q3 (Q7 :> LANE_S 0) 32 128 *) + 0x4ebc8774; (* arm_ADD_VEC Q20 Q27 Q28 32 128 *) + 0x6ebeb5ad; (* arm_SQRDMULH_VEC Q13 Q13 Q30 32 128 *) + 0x6ebc8763; (* arm_SUB_VEC Q3 Q27 Q28 32 128 *) + 0x4eb89e98; (* arm_MUL_VEC Q24 Q20 Q24 32 128 *) + 0x6eabb466; (* arm_SQRDMULH_VEC Q6 Q3 Q11 32 128 *) + 0x3cd8005b; (* arm_LDR Q27 X2 (Immediate_Offset (word 18446744073709551488)) *) + 0x6f8741ac; (* arm_MLS_VEC Q12 Q13 (Q7 :> LANE_S 0) 32 128 *) + 0x4e8f2b39; (* arm_TRN1 Q25 Q25 Q15 32 128 *) + 0x4ebb9c7b; (* arm_MUL_VEC Q27 Q3 Q27 32 128 *) + 0x4e9f293f; (* arm_TRN1 Q31 Q9 Q31 32 128 *) + 0x4ec52943; (* arm_TRN1 Q3 Q10 Q5 64 128 *) + 0x3cdd004d; (* arm_LDR Q13 X2 (Immediate_Offset (word 18446744073709551568)) *) + 0x3cdc004f; (* arm_LDR Q15 X2 (Immediate_Offset (word 18446744073709551552)) *) + 0x6ea8b689; (* arm_SQRDMULH_VEC Q9 Q20 Q8 32 128 *) + 0x4edf6b34; (* arm_TRN2 Q20 Q25 Q31 64 128 *) + 0x3cdf004a; (* arm_LDR Q10 X2 (Immediate_Offset (word 18446744073709551600)) *) + 0x6f8740db; (* arm_MLS_VEC Q27 Q6 (Q7 :> LANE_S 0) 32 128 *) + 0x4eac8465; (* arm_ADD_VEC Q5 Q3 Q12 32 128 *) + 0x6eac8466; (* arm_SUB_VEC Q6 Q3 Q12 32 128 *) + 0x6ebeb683; (* arm_SQRDMULH_VEC Q3 Q20 Q30 32 128 *) + 0x4ec12acc; (* arm_TRN1 Q12 Q22 Q1 64 128 *) + 0x6eaab4ca; (* arm_SQRDMULH_VEC Q10 Q6 Q10 32 128 *) + 0x6f874138; (* arm_MLS_VEC Q24 Q9 (Q7 :> LANE_S 0) 32 128 *) + 0x6eb38589; (* arm_SUB_VEC Q9 Q12 Q19 32 128 *) + 0x4edf2b39; (* arm_TRN1 Q25 Q25 Q31 64 128 *) + 0x6eadb4bf; (* arm_SQRDMULH_VEC Q31 Q5 Q13 32 128 *) + 0x4ebb853e; (* arm_ADD_VEC Q30 Q9 Q27 32 128 *) + 0x4eb3858d; (* arm_ADD_VEC Q13 Q12 Q19 32 128 *) + 0x4eb59e81; (* arm_MUL_VEC Q1 Q20 Q21 32 128 *) + 0x3cde004c; (* arm_LDR Q12 X2 (Immediate_Offset (word 18446744073709551584)) *) + 0x4eb885b5; (* arm_ADD_VEC Q21 Q13 Q24 32 128 *) + 0x6eb885ad; (* arm_SUB_VEC Q13 Q13 Q24 32 128 *) + 0x6f874061; (* arm_MLS_VEC Q1 Q3 (Q7 :> LANE_S 0) 32 128 *) + 0x6ebb8523; (* arm_SUB_VEC Q3 Q9 Q27 32 128 *) + 0x4eac9cc9; (* arm_MUL_VEC Q9 Q6 Q12 32 128 *) + 0x4e8d6aac; (* arm_TRN2 Q12 Q21 Q13 32 128 *) + 0x4e832bc6; (* arm_TRN1 Q6 Q30 Q3 32 128 *) + 0x4e836bde; (* arm_TRN2 Q30 Q30 Q3 32 128 *) + 0x6f874149; (* arm_MLS_VEC Q9 Q10 (Q7 :> LANE_S 0) 32 128 *) + 0x4e8d2aad; (* arm_TRN1 Q13 Q21 Q13 32 128 *) + 0x4eaf9caf; (* arm_MUL_VEC Q15 Q5 Q15 32 128 *) + 0x6ea18723; (* arm_SUB_VEC Q3 Q25 Q1 32 128 *) + 0x4ea18725; (* arm_ADD_VEC Q5 Q25 Q1 32 128 *) + 0x6f8743ef; (* arm_MLS_VEC Q15 Q31 (Q7 :> LANE_S 0) 32 128 *) + 0x4ec629b5; (* arm_TRN1 Q21 Q13 Q6 64 128 *) + 0x4ec669a6; (* arm_TRN2 Q6 Q13 Q6 64 128 *) + 0x4ea9846a; (* arm_ADD_VEC Q10 Q3 Q9 32 128 *) + 0x6ea9846d; (* arm_SUB_VEC Q13 Q3 Q9 32 128 *) + 0x3c880415; (* arm_STR Q21 X0 (Postimmediate_Offset (word 128)) *) + 0x4ede2983; (* arm_TRN1 Q3 Q12 Q30 64 128 *) + 0x4ede699f; (* arm_TRN2 Q31 Q12 Q30 64 128 *) + 0x4e8d2955; (* arm_TRN1 Q21 Q10 Q13 32 128 *) + 0x6eaf84be; (* arm_SUB_VEC Q30 Q5 Q15 32 128 *) + 0x4eaf84ac; (* arm_ADD_VEC Q12 Q5 Q15 32 128 *) + 0x3c990003; (* arm_STR Q3 X0 (Immediate_Offset (word 18446744073709551504)) *) + 0x4e8d694d; (* arm_TRN2 Q13 Q10 Q13 32 128 *) + 0x4e9e2993; (* arm_TRN1 Q19 Q12 Q30 32 128 *) + 0x4e9e698c; (* arm_TRN2 Q12 Q12 Q30 32 128 *) + 0x3c9a0006; (* arm_STR Q6 X0 (Immediate_Offset (word 18446744073709551520)) *) + 0x3c9b001f; (* arm_STR Q31 X0 (Immediate_Offset (word 18446744073709551536)) *) + 0x4ed52a6a; (* arm_TRN1 Q10 Q19 Q21 64 128 *) + 0x4ed56a63; (* arm_TRN2 Q3 Q19 Q21 64 128 *) + 0x4ecd2995; (* arm_TRN1 Q21 Q12 Q13 64 128 *) + 0x4ecd698d; (* arm_TRN2 Q13 Q12 Q13 64 128 *) + 0x3c9c000a; (* arm_STR Q10 X0 (Immediate_Offset (word 18446744073709551552)) *) + 0x3c9e0003; (* arm_STR Q3 X0 (Immediate_Offset (word 18446744073709551584)) *) + 0x3c9f000d; (* arm_STR Q13 X0 (Immediate_Offset (word 18446744073709551600)) *) + 0x3c9d0015; (* arm_STR Q21 X0 (Immediate_Offset (word 18446744073709551568)) *) + 0x6d4027e8; (* arm_LDP D8 D9 SP (Immediate_Offset (iword (&0))) *) + 0x6d412fea; (* arm_LDP D10 D11 SP (Immediate_Offset (iword (&16))) *) + 0x6d4237ec; (* arm_LDP D12 D13 SP (Immediate_Offset (iword (&32))) *) + 0x6d433fee; (* arm_LDP D14 D15 SP (Immediate_Offset (iword (&48))) *) + 0x910103ff; (* arm_ADD SP SP (rvalue (word 64)) *) + 0xd65f03c0 (* arm_RET X30 *) +];; +(*** BYTECODE END ***) + +let MLDSA_NTT_EXEC = ARM_MK_EXEC_RULE mldsa_ntt_mc;; + +(* ------------------------------------------------------------------------- *) +(* Correctness proof. *) +(* ------------------------------------------------------------------------- *) + +let MLDSA_NTT_CORRECT = prove + (`!a z_012345 z_67 (zetas_012345:int32 list) (zetas_67:int32 list) x pc. + ALL (nonoverlapping (a,1024)) + [(word pc,0x920); (z_012345,576); (z_67,1536)] + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) mldsa_ntt_mc /\ + read PC s = word (pc + 0x14) /\ + C_ARGUMENTS [a; z_012345; z_67] s /\ + wordlist_from_memory(z_012345,144) s = zetas_012345 /\ + wordlist_from_memory(z_67,384) s = zetas_67 /\ + !i. i < 256 + ==> read(memory :> bytes32(word_add a (word(4 * i)))) s = + x i) + (\s. read PC s = word(pc + 0x908) /\ + (zetas_012345 = MAP iword ntt_zetas_layer012345 /\ + zetas_67 = MAP iword ntt_zetas_layer67 /\ + (!i. i < 256 ==> abs(ival(x i)) <= &8380416) + ==> !i. i < 256 + ==> let zi = + read(memory :> bytes32(word_add a (word(4 * i)))) s in + (ival zi == arm_mldsa_forward_ntt (ival o x) i) (mod &8380417) /\ + abs(ival zi) <= &75423752)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [Q8; Q9; Q10; Q11; Q12; Q13; Q14; Q15] ,, + MAYCHANGE [memory :> bytes(a,1024)])`, + MAP_EVERY X_GEN_TAC + [`a:int64`; `z_012345:int64`; `z_67:int64`; `zetas_012345:int32 list`; + `zetas_67:int32 list`; `x:num->int32`; `pc:num`] THEN + REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI; C_ARGUMENTS; + NONOVERLAPPING_CLAUSES; ALL] THEN + DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN + + (*** Globalize the assumptions on zeta constants by case splitting ***) + + ASM_CASES_TAC + `zetas_012345:int32 list = MAP iword ntt_zetas_layer012345 /\ + zetas_67:int32 list = MAP iword ntt_zetas_layer67` THEN + ASM_REWRITE_TAC[CONJ_ASSOC] THEN REWRITE_TAC[GSYM CONJ_ASSOC] THENL + [FIRST_X_ASSUM(CONJUNCTS_THEN SUBST1_TAC); + ARM_QUICKSIM_TAC MLDSA_NTT_EXEC + [`read X0 s = a`; `read X1 s = z`; `read X2 s = w`; + `read X3 s = i`; `read X4 s = i`] + (1--1959)] THEN + + (*** Manually expand the cases in the hypotheses ***) + + CONV_TAC(RATOR_CONV(LAND_CONV(ONCE_DEPTH_CONV + (EXPAND_CASES_CONV THENC ONCE_DEPTH_CONV NUM_MULT_CONV)))) THEN + CONV_TAC(ONCE_DEPTH_CONV WORDLIST_FROM_MEMORY_CONV) THEN + REWRITE_TAC[ntt_zetas_layer012345; ntt_zetas_layer67] THEN + REWRITE_TAC[MAP; CONS_11] THEN CONV_TAC(ONCE_DEPTH_CONV WORD_IWORD_CONV) THEN + REWRITE_TAC[WORD_ADD_0] THEN ENSURES_INIT_TAC "s0" THEN + + (*** Manually restructure to match the 128-bit loads. It would be nicer + *** if the simulation machinery did this automatically. + ***) + + MEMORY_128_FROM_32_TAC "a" 0 64 THEN + MEMORY_128_FROM_32_TAC "z_012345" 0 36 THEN + MEMORY_128_FROM_32_TAC "z_67" 0 96 THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN CONV_TAC WORD_REDUCE_CONV THEN + DISCARD_MATCHING_ASSUMPTIONS [`read (memory :> bytes32 a) s = x`] THEN + REPEAT STRIP_TAC THEN + + (*** Simulate all the way to the end, in effect unrolling loops ***) + + MAP_EVERY (fun n -> ARM_STEPS_TAC MLDSA_NTT_EXEC [n] THEN + SIMD_SIMPLIFY_ABBREV_TAC[arm_mldsa_barmul] []) + (1--1959) THEN + + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + + (*** Reverse the restructuring by splitting the 128-bit words up ***) + + REPEAT(FIRST_X_ASSUM(STRIP_ASSUME_TAC o + CONV_RULE(SIMD_SIMPLIFY_CONV[]) o + CONV_RULE(READ_MEMORY_SPLIT_CONV 2) o + check (can (term_match [] `read qqq s:int128 = xxx`) o concl))) THEN + + (*** Expand and substitute in the conclusion we want to prove ***) + + DISCH_TAC THEN + CONV_TAC(ONCE_DEPTH_CONV let_CONV) THEN REWRITE_TAC[INT_ABS_BOUNDS] THEN + GEN_REWRITE_TAC (BINDER_CONV o RAND_CONV) [GSYM I_THM] THEN + CONV_TAC(EXPAND_CASES_CONV THENC ONCE_DEPTH_CONV NUM_MULT_CONV) THEN + ASM_REWRITE_TAC[I_THM; WORD_ADD_0] THEN DISCARD_STATE_TAC "s1959" THEN + + W(fun (asl,w) -> + let lfn = PROCESS_BOUND_ASSUMPTIONS + (CONJUNCTS(tryfind (CONV_RULE EXPAND_CASES_CONV o snd) asl)) + and asms = + map snd (filter (is_local_definition [arm_mldsa_barmul] o concl o snd) asl) in + let lfn' = LOCAL_CONGBOUND_RULE lfn (rev asms) in + + REPEAT(W(fun (asl,w) -> + if length(conjuncts w) > 3 then CONJ_TAC else NO_TAC)) THEN + + W(MP_TAC o ASM_CONGBOUND_RULE lfn' o + rand o lhand o rator o lhand o snd) THEN + (MATCH_MP_TAC MONO_AND THEN CONJ_TAC THENL + [MATCH_MP_TAC(REWRITE_RULE[IMP_CONJ_ALT] INT_CONG_TRANS) THEN + CONV_TAC(ONCE_DEPTH_CONV ARM_MLDSA_FORWARD_NTT_CONV) THEN + REWRITE_TAC[GSYM INT_REM_EQ; o_THM] THEN CONV_TAC INT_REM_DOWN_CONV THEN + REWRITE_TAC[INT_REM_EQ] THEN + REWRITE_TAC[REAL_INT_CONGRUENCE; INT_OF_NUM_EQ; ARITH_EQ] THEN + REWRITE_TAC[GSYM REAL_OF_INT_CLAUSES] THEN + CONV_TAC(RAND_CONV REAL_POLY_CONV) THEN REAL_INTEGER_TAC; + MATCH_MP_TAC(INT_ARITH + `l':int <= l /\ u <= u' + ==> l <= x /\ x <= u ==> l' <= x /\ x <= u'`) THEN + CONV_TAC INT_REDUCE_CONV])));; + +(*** Subroutine form, somewhat messy elaboration of the usual wrapper ***) + +(* NOTE: This must be kept in sync with the CBMC specification + * in mldsa/src/native/aarch64/src/arith_native_aarch64.h *) + +let MLDSA_NTT_SUBROUTINE_CORRECT = prove + (`!a z_012345 z_67 zetas_012345 zetas_67 x pc stackpointer returnaddress. + aligned 16 stackpointer /\ + ALLPAIRS nonoverlapping + [(a,1024); (word_sub stackpointer (word 64),64)] + [(word pc,0x920); (z_012345,576); (z_67,1536)] /\ + nonoverlapping (a,1024) (word_sub stackpointer (word 64),64) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) mldsa_ntt_mc /\ + read PC s = word pc /\ + read SP s = stackpointer /\ + read X30 s = returnaddress /\ + C_ARGUMENTS [a; z_012345; z_67] s /\ + wordlist_from_memory(z_012345,144) s:int32 list = zetas_012345 /\ + wordlist_from_memory(z_67,384) s:int32 list = zetas_67 /\ + !i. i < 256 + ==> read(memory :> bytes32(word_add a (word(4 * i)))) s = + x i) + (\s. read PC s = returnaddress /\ + (zetas_012345 = MAP iword ntt_zetas_layer012345 /\ + zetas_67 = MAP iword ntt_zetas_layer67 /\ + (!i. i < 256 ==> abs(ival(x i)) <= &8380416) + ==> !i. i < 256 + ==> let zi = + read(memory :> bytes32(word_add a (word(4 * i)))) s in + (ival zi == arm_mldsa_forward_ntt (ival o x) i) (mod &8380417) /\ + abs(ival zi) <= &75423752)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(a,1024); + memory :> bytes(word_sub stackpointer (word 64),64)])`, + let TWEAK_CONV = + ONCE_DEPTH_CONV let_CONV THENC + ONCE_DEPTH_CONV WORDLIST_FROM_MEMORY_CONV THENC + ONCE_DEPTH_CONV EXPAND_CASES_CONV THENC + ONCE_DEPTH_CONV NUM_MULT_CONV THENC + PURE_REWRITE_CONV [WORD_ADD_0] in + REWRITE_TAC[fst MLDSA_NTT_EXEC] THEN + CONV_TAC TWEAK_CONV THEN + ARM_ADD_RETURN_STACK_TAC ~pre_post_nsteps:(5,5) MLDSA_NTT_EXEC + (REWRITE_RULE[fst MLDSA_NTT_EXEC] (CONV_RULE TWEAK_CONV MLDSA_NTT_CORRECT)) + `[D8; D9; D10; D11; D12; D13; D14; D15]` 64);; + + +(* ------------------------------------------------------------------------- *) +(* Constant-time and memory safety proof. *) +(* ------------------------------------------------------------------------- *) +needs "arm/proofs/consttime.ml";; +needs "aarch64/proofs/subroutine_signatures.ml";; + +let full_spec,public_vars = mk_safety_spec + ~keep_maychanges:false + (assoc "mldsa_ntt_arm" subroutine_signatures) + MLDSA_NTT_SUBROUTINE_CORRECT + MLDSA_NTT_EXEC;; + +let MLDSA_NTT_SUBROUTINE_SAFE = time prove + (`exists f_events. + forall e a z_012345 z_67 pc stackpointer returnaddress. + aligned 16 stackpointer /\ + ALLPAIRS nonoverlapping + [a,1024; word_sub stackpointer (word 64),64] + [word pc,2336; z_012345,576; z_67,1536] /\ + nonoverlapping (a,1024) (word_sub stackpointer (word 64),64) + ==> ensures arm + (\s. + aligned_bytes_loaded s (word pc) mldsa_ntt_mc /\ + read PC s = word pc /\ + read SP s = stackpointer /\ + read X30 s = returnaddress /\ + C_ARGUMENTS [a; z_012345; z_67] s /\ + read events s = e) + (\s. + read PC s = returnaddress /\ + exists e2. + read events s = APPEND e2 e /\ + e2 = + f_events z_012345 z_67 a pc + (word_sub stackpointer (word 64)) + returnaddress /\ + memaccess_inbounds e2 + [a,1024; z_012345,576; z_67,1536; a,1024; + word_sub stackpointer (word 64),64] + [a,1024; word_sub stackpointer (word 64),64]) + (\s s'. true)`, + ASSERT_CONCL_TAC full_spec THEN + PROVE_SAFETY_SPEC_TAC ~public_vars:public_vars MLDSA_NTT_EXEC);; diff --git a/proofs/hol_light/aarch64/proofs/mldsa_rej_uniform.ml b/proofs/hol_light/aarch64/proofs/mldsa_rej_uniform.ml new file mode 100644 index 000000000..1f6c0febf --- /dev/null +++ b/proofs/hol_light/aarch64/proofs/mldsa_rej_uniform.ml @@ -0,0 +1,1687 @@ +(* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + *) + +(* ========================================================================= *) +(* Uniform rejection sampling for ML-DSA, filtering packed numbers < Q. *) +(* *) +(* ML-DSA uses q = 8380417 with 23-bit coefficients packed as 3 bytes each. *) +(* The assembly processes 48 bytes (16 coefficients) per main-loop iteration *) +(* and stores accepted coefficients as 32-bit words. *) +(* ========================================================================= *) + +(* Load base theories for AArch64 from s2n-bignum *) +needs "arm/proofs/base.ml";; + +needs "common/mldsa_specs.ml";; +needs "aarch64/proofs/aarch64_utils.ml";; +needs "aarch64/proofs/mldsa_rej_uniform_table.ml";; + +(**** print_literal_from_elf "aarch64/mldsa/mldsa_rej_uniform.o";; + ****) + +let mldsa_rej_uniform_mc = define_assert_from_elf + "mldsa_rej_uniform_mc" "aarch64/mldsa/mldsa_rej_uniform.o" +(*** BYTECODE START ***) +[ + 0xd11103ff; (* arm_SUB SP SP (rvalue (word 1088)) *) + 0xd2800027; (* arm_MOV X7 (rvalue (word 1)) *) + 0xf2c00047; (* arm_MOVK X7 (word 2) 32 *) + 0x4e081cff; (* arm_INS_GEN Q31 X7 0 64 *) + 0xd2800087; (* arm_MOV X7 (rvalue (word 4)) *) + 0xf2c00107; (* arm_MOVK X7 (word 8) 32 *) + 0x4e181cff; (* arm_INS_GEN Q31 X7 64 64 *) + 0x529c0027; (* arm_MOV W7 (rvalue (word 57345)) *) + 0x72a00fe7; (* arm_MOVK W7 (word 127) 16 *) + 0x4e040cfe; (* arm_DUP_GEN Q30 X7 32 128 *) + 0x910003e8; (* arm_ADD X8 SP (rvalue (word 0)) *) + 0xaa0803e7; (* arm_MOV X7 X8 *) + 0xd280000b; (* arm_MOV X11 (rvalue (word 0)) *) + 0x6e301e10; (* arm_EOR_VEC Q16 Q16 Q16 128 *) + 0x3c8404f0; (* arm_STR Q16 X7 (Postimmediate_Offset (word 64)) *) + 0x3c9d00f0; (* arm_STR Q16 X7 (Immediate_Offset (word 18446744073709551568)) *) + 0x3c9e00f0; (* arm_STR Q16 X7 (Immediate_Offset (word 18446744073709551584)) *) + 0x3c9f00f0; (* arm_STR Q16 X7 (Immediate_Offset (word 18446744073709551600)) *) + 0x9100416b; (* arm_ADD X11 X11 (rvalue (word 16)) *) + 0xf104017f; (* arm_CMP X11 (rvalue (word 256)) *) + 0x54ffff4b; (* arm_BLT (word 2097128) *) + 0xaa0803e7; (* arm_MOV X7 X8 *) + 0xd2800009; (* arm_MOV X9 (rvalue (word 0)) *) + 0xd2802004; (* arm_MOV X4 (rvalue (word 256)) *) + 0xf100c05f; (* arm_CMP X2 (rvalue (word 48)) *) + 0x54000823; (* arm_BCC (word 260) *) + 0xeb04013f; (* arm_CMP X9 X4 *) + 0x54000c62; (* arm_BCS (word 396) *) + 0xd100c042; (* arm_SUB X2 X2 (rvalue (word 48)) *) + 0x4cdf4020; (* arm_LD3 [Q0; Q1; Q2] X1 (Postimmediate_Offset (word 48)) 128 8 *) + 0x4f04e404; (* UNSUPPORTED: movi v4.16b, #0x80 *) + 0x4e641c42; (* arm_BIC_VEC Q2 Q2 Q4 128 *) + 0x4e013804; (* arm_ZIP1 Q4 Q0 Q1 8 128 *) + 0x4e017805; (* arm_ZIP2 Q5 Q0 Q1 8 128 *) + 0x2f08a446; (* UNSUPPORTED: ushll v6.8h, v2.8b, #0 *) + 0x6f08a447; (* UNSUPPORTED: ushll2 v7.8h, v2.16b, #0 *) + 0x4e463890; (* arm_ZIP1 Q16 Q4 Q6 16 128 *) + 0x4e467891; (* arm_ZIP2 Q17 Q4 Q6 16 128 *) + 0x4e4738b2; (* arm_ZIP1 Q18 Q5 Q7 16 128 *) + 0x4e4778b3; (* arm_ZIP2 Q19 Q5 Q7 16 128 *) + 0x6eb037c4; (* arm_CMHI_VEC Q4 Q30 Q16 32 128 *) + 0x6eb137c5; (* arm_CMHI_VEC Q5 Q30 Q17 32 128 *) + 0x6eb237c6; (* arm_CMHI_VEC Q6 Q30 Q18 32 128 *) + 0x6eb337c7; (* arm_CMHI_VEC Q7 Q30 Q19 32 128 *) + 0x4e3f1c84; (* arm_AND_VEC Q4 Q4 Q31 128 *) + 0x4e3f1ca5; (* arm_AND_VEC Q5 Q5 Q31 128 *) + 0x4e3f1cc6; (* arm_AND_VEC Q6 Q6 Q31 128 *) + 0x4e3f1ce7; (* arm_AND_VEC Q7 Q7 Q31 128 *) + 0x6eb03894; (* arm_UADDLV Q20 Q4 32 64 *) + 0x6eb038b5; (* arm_UADDLV Q21 Q5 32 64 *) + 0x6eb038d6; (* arm_UADDLV Q22 Q6 32 64 *) + 0x6eb038f7; (* arm_UADDLV Q23 Q7 32 64 *) + 0x9e66028c; (* UNSUPPORTED: fmov x12, d20 *) + 0x9e6602ad; (* UNSUPPORTED: fmov x13, d21 *) + 0x9e6602ce; (* UNSUPPORTED: fmov x14, d22 *) + 0x9e6602ef; (* UNSUPPORTED: fmov x15, d23 *) + 0x3cec7878; (* arm_LDR Q24 X3 (Shiftreg_Offset X12 4) *) + 0x3ced7879; (* arm_LDR Q25 X3 (Shiftreg_Offset X13 4) *) + 0x3cee787a; (* arm_LDR Q26 X3 (Shiftreg_Offset X14 4) *) + 0x3cef787b; (* arm_LDR Q27 X3 (Shiftreg_Offset X15 4) *) + 0x4e205884; (* arm_CNT Q4 Q4 128 *) + 0x4e2058a5; (* arm_CNT Q5 Q5 128 *) + 0x4e2058c6; (* arm_CNT Q6 Q6 128 *) + 0x4e2058e7; (* arm_CNT Q7 Q7 128 *) + 0x6eb03894; (* arm_UADDLV Q20 Q4 32 64 *) + 0x6eb038b5; (* arm_UADDLV Q21 Q5 32 64 *) + 0x6eb038d6; (* arm_UADDLV Q22 Q6 32 64 *) + 0x6eb038f7; (* arm_UADDLV Q23 Q7 32 64 *) + 0x9e66028c; (* UNSUPPORTED: fmov x12, d20 *) + 0x9e6602ad; (* UNSUPPORTED: fmov x13, d21 *) + 0x9e6602ce; (* UNSUPPORTED: fmov x14, d22 *) + 0x9e6602ef; (* UNSUPPORTED: fmov x15, d23 *) + 0x4e180210; (* arm_TBL Q16 [Q16] Q24 128 *) + 0x4e190231; (* arm_TBL Q17 [Q17] Q25 128 *) + 0x4e1a0252; (* arm_TBL Q18 [Q18] Q26 128 *) + 0x4e1b0273; (* arm_TBL Q19 [Q19] Q27 128 *) + 0x4c0078f0; (* UNSUPPORTED: st1 {v16.4s}, [x7] *) + 0x8b0c08e7; (* arm_ADD X7 X7 (Shiftedreg X12 LSL 2) *) + 0x4c0078f1; (* UNSUPPORTED: st1 {v17.4s}, [x7] *) + 0x8b0d08e7; (* arm_ADD X7 X7 (Shiftedreg X13 LSL 2) *) + 0x4c0078f2; (* UNSUPPORTED: st1 {v18.4s}, [x7] *) + 0x8b0e08e7; (* arm_ADD X7 X7 (Shiftedreg X14 LSL 2) *) + 0x4c0078f3; (* UNSUPPORTED: st1 {v19.4s}, [x7] *) + 0x8b0f08e7; (* arm_ADD X7 X7 (Shiftedreg X15 LSL 2) *) + 0x8b0d018c; (* arm_ADD X12 X12 X13 *) + 0x8b0f01ce; (* arm_ADD X14 X14 X15 *) + 0x8b0c0129; (* arm_ADD X9 X9 X12 *) + 0x8b0e0129; (* arm_ADD X9 X9 X14 *) + 0xf100c05f; (* arm_CMP X2 (rvalue (word 48)) *) + 0x54fff822; (* arm_BCS (word 2096900) *) + 0xeb04013f; (* arm_CMP X9 X4 *) + 0x54000462; (* arm_BCS (word 140) *) + 0xf100605f; (* arm_CMP X2 (rvalue (word 24)) *) + 0x54000423; (* arm_BCC (word 132) *) + 0xd1006042; (* arm_SUB X2 X2 (rvalue (word 24)) *) + 0x0cdf4020; (* arm_LD3 [Q0; Q1; Q2] X1 (Postimmediate_Offset (word 24)) 64 8 *) + 0x4f04e404; (* UNSUPPORTED: movi v4.16b, #0x80 *) + 0x4e641c42; (* arm_BIC_VEC Q2 Q2 Q4 128 *) + 0x4e013804; (* arm_ZIP1 Q4 Q0 Q1 8 128 *) + 0x2f08a446; (* UNSUPPORTED: ushll v6.8h, v2.8b, #0 *) + 0x4e463890; (* arm_ZIP1 Q16 Q4 Q6 16 128 *) + 0x4e467891; (* arm_ZIP2 Q17 Q4 Q6 16 128 *) + 0x6eb037c4; (* arm_CMHI_VEC Q4 Q30 Q16 32 128 *) + 0x6eb137c5; (* arm_CMHI_VEC Q5 Q30 Q17 32 128 *) + 0x4e3f1c84; (* arm_AND_VEC Q4 Q4 Q31 128 *) + 0x4e3f1ca5; (* arm_AND_VEC Q5 Q5 Q31 128 *) + 0x6eb03894; (* arm_UADDLV Q20 Q4 32 64 *) + 0x6eb038b5; (* arm_UADDLV Q21 Q5 32 64 *) + 0x9e66028c; (* UNSUPPORTED: fmov x12, d20 *) + 0x9e6602ad; (* UNSUPPORTED: fmov x13, d21 *) + 0x3cec7878; (* arm_LDR Q24 X3 (Shiftreg_Offset X12 4) *) + 0x3ced7879; (* arm_LDR Q25 X3 (Shiftreg_Offset X13 4) *) + 0x4e205884; (* arm_CNT Q4 Q4 128 *) + 0x4e2058a5; (* arm_CNT Q5 Q5 128 *) + 0x6eb03894; (* arm_UADDLV Q20 Q4 32 64 *) + 0x6eb038b5; (* arm_UADDLV Q21 Q5 32 64 *) + 0x9e66028c; (* UNSUPPORTED: fmov x12, d20 *) + 0x9e6602ad; (* UNSUPPORTED: fmov x13, d21 *) + 0x4e180210; (* arm_TBL Q16 [Q16] Q24 128 *) + 0x4e190231; (* arm_TBL Q17 [Q17] Q25 128 *) + 0x3d8000f0; (* arm_STR Q16 X7 (Immediate_Offset (word 0)) *) + 0x8b0c08e7; (* arm_ADD X7 X7 (Shiftedreg X12 LSL 2) *) + 0x3d8000f1; (* arm_STR Q17 X7 (Immediate_Offset (word 0)) *) + 0x8b0d08e7; (* arm_ADD X7 X7 (Shiftedreg X13 LSL 2) *) + 0x8b0c0129; (* arm_ADD X9 X9 X12 *) + 0x8b0d0129; (* arm_ADD X9 X9 X13 *) + 0xeb04013f; (* arm_CMP X9 X4 *) + 0x9a843129; (* arm_CSEL X9 X9 X4 Condition_CC *) + 0xd280000b; (* arm_MOV X11 (rvalue (word 0)) *) + 0xaa0803e7; (* arm_MOV X7 X8 *) + 0x3cc404f0; (* arm_LDR Q16 X7 (Postimmediate_Offset (word 64)) *) + 0x3cdd00f1; (* arm_LDR Q17 X7 (Immediate_Offset (word 18446744073709551568)) *) + 0x3cde00f2; (* arm_LDR Q18 X7 (Immediate_Offset (word 18446744073709551584)) *) + 0x3cdf00f3; (* arm_LDR Q19 X7 (Immediate_Offset (word 18446744073709551600)) *) + 0x3c840410; (* arm_STR Q16 X0 (Postimmediate_Offset (word 64)) *) + 0x3c9d0011; (* arm_STR Q17 X0 (Immediate_Offset (word 18446744073709551568)) *) + 0x3c9e0012; (* arm_STR Q18 X0 (Immediate_Offset (word 18446744073709551584)) *) + 0x3c9f0013; (* arm_STR Q19 X0 (Immediate_Offset (word 18446744073709551600)) *) + 0x9100416b; (* arm_ADD X11 X11 (rvalue (word 16)) *) + 0xf104017f; (* arm_CMP X11 (rvalue (word 256)) *) + 0x54fffecb; (* arm_BLT (word 2097112) *) + 0xaa0903e0; (* arm_MOV X0 X9 *) + 0x14000001; (* arm_B (word 4) *) + 0x911103ff; (* arm_ADD SP SP (rvalue (word 1088)) *) + 0xd65f03c0 (* arm_RET X30 *) +];; +(*** BYTECODE END ***) + +let MLDSA_REJ_UNIFORM_EXEC = ARM_MK_EXEC_RULE mldsa_rej_uniform_mc;; + +(* ------------------------------------------------------------------------- *) +(* Code length constants *) +(* ------------------------------------------------------------------------- *) + +let LENGTH_MLDSA_REJ_UNIFORM_MC = + REWRITE_CONV[mldsa_rej_uniform_mc] `LENGTH mldsa_rej_uniform_mc` + |> CONV_RULE (RAND_CONV LENGTH_CONV);; + +let MLDSA_REJ_UNIFORM_PREAMBLE_LENGTH = new_definition + `MLDSA_REJ_UNIFORM_PREAMBLE_LENGTH = 4`;; + +let MLDSA_REJ_UNIFORM_POSTAMBLE_LENGTH = new_definition + `MLDSA_REJ_UNIFORM_POSTAMBLE_LENGTH = 8`;; + +let MLDSA_REJ_UNIFORM_CORE_START = new_definition + `MLDSA_REJ_UNIFORM_CORE_START = MLDSA_REJ_UNIFORM_PREAMBLE_LENGTH`;; + +let MLDSA_REJ_UNIFORM_CORE_END = new_definition + `MLDSA_REJ_UNIFORM_CORE_END = LENGTH mldsa_rej_uniform_mc - MLDSA_REJ_UNIFORM_POSTAMBLE_LENGTH`;; + +let LENGTH_SIMPLIFY_CONV = + REWRITE_CONV[LENGTH_MLDSA_REJ_UNIFORM_MC; + MLDSA_REJ_UNIFORM_CORE_START; MLDSA_REJ_UNIFORM_CORE_END; + MLDSA_REJ_UNIFORM_PREAMBLE_LENGTH; MLDSA_REJ_UNIFORM_POSTAMBLE_LENGTH] THENC + NUM_REDUCE_CONV THENC REWRITE_CONV [ADD_0];; + +(* ------------------------------------------------------------------------- *) +(* General wordlist helpers (same as ML-KEM proof) *) +(* ------------------------------------------------------------------------- *) + +let num_of_wordlist = define + `num_of_wordlist [] = 0 /\ + num_of_wordlist (CONS (h:N word) t) = + val h + 2 EXP dimindex(:N) * num_of_wordlist t`;; + +let NUM_OF_WORDLIST_SING = prove + (`!h:N word. num_of_wordlist [h] = val h`, + REWRITE_TAC[num_of_wordlist; MULT_CLAUSES; ADD_CLAUSES]);; + +let NUM_OF_WORDLIST_APPEND = prove + (`!lis1 lis2:(N word)list. + num_of_wordlist(APPEND lis1 lis2) = + num_of_wordlist lis1 + + 2 EXP (dimindex(:N) * LENGTH lis1) * num_of_wordlist lis2`, + LIST_INDUCT_TAC THEN + ASM_REWRITE_TAC[APPEND; LENGTH; num_of_wordlist] THEN + ASM_REWRITE_TAC[MULT_CLAUSES; EXP; ADD_CLAUSES] THEN + REWRITE_TAC[EXP_ADD] THEN ARITH_TAC);; + +let NUM_OF_WORDLIST_BOUND_LENGTH = prove + (`!l:(N word)list. num_of_wordlist l < 2 EXP (dimindex(:N) * LENGTH l)`, + LIST_INDUCT_TAC THEN REWRITE_TAC[LENGTH; num_of_wordlist] THEN + REWRITE_TAC[MULT_CLAUSES; EXP; EXP_ADD; ARITH] THEN + W(MP_TAC o PART_MATCH lhand VAL_BOUND o lhand o lhand o snd) THEN + MATCH_MP_TAC(ARITH_RULE + `n * (x + 1) <= y ==> h < n ==> h + n * x < y`) THEN + ASM_REWRITE_TAC[LE_MULT_LCANCEL] THEN ASM_ARITH_TAC);; + +let NUM_OF_WORDLIST_BOUND = prove + (`!l:(N word)list n. + LENGTH l <= n ==> num_of_wordlist l < 2 EXP (dimindex(:N) * n)`, + REPEAT STRIP_TAC THEN + TRANS_TAC LTE_TRANS `2 EXP (dimindex(:N) * LENGTH(l:(N word)list))` THEN + ASM_REWRITE_TAC[NUM_OF_WORDLIST_BOUND_LENGTH; LE_EXP; LE_MULT_LCANCEL] THEN + ASM_ARITH_TAC);; + +let NUM_OF_WORDLIST_BOUND_GEN = prove + (`!l:((N word)list) n. + dimindex(:N) * LENGTH l <= n ==> num_of_wordlist l < 2 EXP n`, + REPEAT STRIP_TAC THEN + W(MP_TAC o PART_MATCH lhand NUM_OF_WORDLIST_BOUND_LENGTH o + lhand o snd) THEN + MATCH_MP_TAC(REWRITE_RULE[IMP_CONJ_ALT] LTE_TRANS) THEN + ASM_REWRITE_TAC[LE_EXP] THEN ARITH_TAC);; + +let NUM_OF_WORDLIST_SUB_LIST_0 = prove + (`!(l:(N word)list) n. + num_of_wordlist(SUB_LIST(0,n) l) = + num_of_wordlist l MOD 2 EXP (dimindex(:N) * n)`, + MATCH_MP_TAC list_INDUCT THEN + REWRITE_TAC[SUB_LIST_CLAUSES; num_of_wordlist; DIV_0; MOD_0] THEN + MAP_EVERY X_GEN_TAC [`h:N word`; `t:(N word)list`] THEN + DISCH_TAC THEN MATCH_MP_TAC num_INDUCTION THEN + ASM_REWRITE_TAC[SUB_LIST_CLAUSES; num_of_wordlist] THEN + REWRITE_TAC[MULT_CLAUSES; EXP; MOD_1] THEN + X_GEN_TAC `n:num` THEN DISCH_THEN(K ALL_TAC) THEN + CONV_TAC SYM_CONV THEN REWRITE_TAC[MOD_UNIQUE] THEN + REWRITE_TAC[EXP_ADD] THEN CONJ_TAC THENL + [DISJ2_TAC THEN MATCH_MP_TAC(ARITH_RULE + `h < n /\ n * (t + 1) <= n * e + ==> h + n * t < n * e`) THEN + REWRITE_TAC[VAL_BOUND; LE_MULT_LCANCEL] THEN DISJ2_TAC THEN + REWRITE_TAC[ARITH_RULE `n + 1 <= m <=> n < m`; MOD_LT_EQ] THEN + REWRITE_TAC[EXP_EQ_0; ARITH_EQ]; + MATCH_MP_TAC(NUMBER_RULE + `(t == t') (mod d) + ==> (h + e * t == h + e * t') (mod (e * d))`) THEN + REWRITE_TAC[CONG_RMOD; CONG_REFL]]);; + +let NUM_OF_WORDLIST_SUB_LIST = prove + (`!(l:(N word)list) m n. + num_of_wordlist (SUB_LIST(m,n) l) = + (num_of_wordlist l DIV 2 EXP (dimindex(:N) * m)) MOD + 2 EXP (dimindex(:N) * n)`, + MATCH_MP_TAC list_INDUCT THEN + REWRITE_TAC[SUB_LIST_CLAUSES; num_of_wordlist; DIV_0; MOD_0] THEN + MAP_EVERY X_GEN_TAC [`h:N word`; `t:(N word)list`] THEN + DISCH_TAC THEN MATCH_MP_TAC num_INDUCTION THEN + REWRITE_TAC[NUM_OF_WORDLIST_SUB_LIST_0; GSYM(CONJUNCT2 num_of_wordlist); + EXP; DIV_1; MULT_CLAUSES] THEN + ASM_REWRITE_TAC[SUB_LIST_CLAUSES; num_of_wordlist] THEN + X_GEN_TAC `m:num` THEN DISCH_THEN(K ALL_TAC) THEN X_GEN_TAC `n:num` THEN + SIMP_TAC[EXP_ADD; GSYM DIV_DIV; DIV_MULT_ADD; EXP_EQ_0; ARITH_EQ] THEN + SIMP_TAC[DIV_LT; VAL_BOUND; ADD_CLAUSES]);; + +let SUB_LIST_REFL = prove + (`!(l:A list) n. LENGTH l <= n ==> SUB_LIST(0,n) l = l`, + LIST_INDUCT_TAC THEN ASM_REWRITE_TAC[SUB_LIST_CLAUSES] THEN + INDUCT_TAC THEN ASM_SIMP_TAC[SUB_LIST_CLAUSES; LE_SUC; LENGTH] THEN + ARITH_TAC);; + +let SUB_LIST_1 = prove + (`!(l:A list) n. SUB_LIST(n,1) l = if n < LENGTH l then [EL n l] else []`, + MATCH_MP_TAC list_INDUCT THEN + REWRITE_TAC[LENGTH; CONJUNCT1 LT; SUB_LIST_CLAUSES] THEN + MAP_EVERY X_GEN_TAC [`h:A`; `t:A list`] THEN DISCH_TAC THEN + MATCH_MP_TAC num_INDUCTION THEN + REWRITE_TAC[SUB_LIST_CLAUSES; LT_0; num_CONV `1`; EL; TL] THEN + ASM_REWRITE_TAC[GSYM(num_CONV `1`); LT_SUC; HD]);; + +let SUB_LIST_APPEND_LEFT = prove + (`!(l:A list) m n. + n <= LENGTH l ==> SUB_LIST(0,n) (APPEND l m) = SUB_LIST(0,n) l`, + LIST_INDUCT_TAC THEN + SIMP_TAC[LENGTH; CONJUNCT1 LE; SUB_LIST_CLAUSES] THEN + ONCE_REWRITE_TAC[SWAP_FORALL_THM] THEN + INDUCT_TAC THEN ASM_SIMP_TAC[SUB_LIST_CLAUSES; APPEND; LE_SUC]);; + +let NUM_OF_WORDLIST_EL = prove + (`!(l:(N word)list) m. + (num_of_wordlist l DIV 2 EXP (dimindex(:N) * m)) MOD + 2 EXP (dimindex(:N)) = + if m < LENGTH l then val(EL m l) else 0`, + REPEAT GEN_TAC THEN + MP_TAC(SPECL [`l:(N word)list`; `m:num`; `1`] + NUM_OF_WORDLIST_SUB_LIST) THEN + REWRITE_TAC[SUB_LIST_1; MULT_CLAUSES] THEN + DISCH_THEN(SUBST1_TAC o SYM) THEN COND_CASES_TAC THEN + REWRITE_TAC[NUM_OF_WORDLIST_SING; num_of_wordlist]);; + +let pair_wordlist = define + `(!hi (lo:N word) rest. + pair_wordlist (CONS lo (CONS hi rest)) = + CONS (word_join hi lo:((N)tybit0)word) (pair_wordlist rest)) /\ + (!w. pair_wordlist [w] = [word_join (word 0:N word) w]) /\ + pair_wordlist [] = []`;; + +let NUM_OF_PAIR_WORDLIST = prove + (`!l:(N word)list. num_of_wordlist (pair_wordlist l) = num_of_wordlist l`, + GEN_TAC THEN WF_INDUCT_TAC `LENGTH(l:(N word)list)` THEN + POP_ASSUM MP_TAC THEN SPEC_TAC(`l:(N word)list`,`l:(N word)list`) THEN + MATCH_MP_TAC list_INDUCT THEN + REWRITE_TAC[pair_wordlist; num_of_wordlist] THEN + MAP_EVERY X_GEN_TAC [`lo:N word`; `med:(N word)list`] THEN + DISCH_THEN(K ALL_TAC) THEN + SPEC_TAC(`med:(N word)list`,`med:(N word)list`) THEN + MATCH_MP_TAC list_INDUCT THEN + REWRITE_TAC[pair_wordlist; num_of_wordlist] THEN + SIMP_TAC[MULT_CLAUSES; ADD_CLAUSES; VAL_WORD_JOIN_SIMPLE; DIMINDEX_TYBIT0; + VAL_WORD_0; GSYM MULT_2; LENGTH; ARITH_RULE `n < SUC(SUC n)`] THEN + REWRITE_TAC[MULT_2; EXP_ADD] THEN ARITH_TAC);; + +let BYTES_EQ_NUM_OF_WORDLIST_EXPAND = prove + (`!m (a:int64) len (s:S) (h:((((N)tybit0)tybit0)tybit0) word) t. + dimindex(:N) <= len + ==> (read (m :> bytes(a,len)) s = num_of_wordlist (CONS h t) <=> + read (m :> wbytes a) s = h /\ + read (m :> bytes(word_add a (word(dimindex(:N))),len-dimindex(:N))) s = + num_of_wordlist t)`, + REPEAT STRIP_TAC THEN + REWRITE_TAC[GSYM VAL_EQ; VAL_READ_WBYTES; READ_COMPONENT_COMPOSE] THEN + REWRITE_TAC[num_of_wordlist; DIMINDEX_TYBIT0] THEN + REWRITE_TAC[ARITH_RULE `2 * 2 * 2 * n = 8 * n`] THEN + REWRITE_TAC[ARITH_RULE `(8 * n) DIV 8 = n`] THEN + FIRST_ASSUM(SUBST1_TAC o MATCH_MP (ARITH_RULE + `d:num <= l ==> l = d + (l - d)`)) THEN + REWRITE_TAC[READ_BYTES_COMBINE; ADD_SUB2] THEN + ONCE_REWRITE_TAC[ADD_SYM] THEN ONCE_REWRITE_TAC[CONJ_SYM] THEN + MATCH_MP_TAC LEXICOGRAPHIC_EQ THEN REWRITE_TAC[READ_BYTES_BOUND] THEN + W(MP_TAC o PART_MATCH lhand VAL_BOUND o lhand o snd) THEN + REWRITE_TAC[DIMINDEX_TYBIT0; ARITH_RULE `2 * 2 * 2 * n = 8 * n`]);; + +let BYTES_EQ_NUM_OF_WORDLIST_EXPAND_CONV = + let pth = prove + (`!m (a:int64) len (s:S) (h:((((N)tybit0)tybit0)tybit0) word). + dimindex(:N) = len + ==> (read (m :> bytes(a,len)) s = num_of_wordlist [h] <=> + read (m :> wbytes a) s = h)`, + SIMP_TAC[BYTES_EQ_NUM_OF_WORDLIST_EXPAND; LE_REFL] THEN + REWRITE_TAC[READ_COMPONENT_COMPOSE; SUB_REFL; READ_BYTES_TRIVIAL] THEN + REWRITE_TAC[num_of_wordlist]) in + let frule = PART_MATCH (lhand o rand) pth + and brule = PART_MATCH (lhand o rand) BYTES_EQ_NUM_OF_WORDLIST_EXPAND in + let baseconv tm = + let ith = frule tm in + let sth = (LAND_CONV DIMINDEX_CONV THENC NUM_EQ_CONV) + (lhand(concl ith)) in + MP ith (EQT_ELIM sth) in + let rec conv tm = + try baseconv tm with Failure _ -> + let ith = brule tm in + let dth = DIMINDEX_CONV(lhand(lhand(concl ith))) in + let ith' = SUBS[dth] ith in + let ath = MP ith' (EQT_ELIM(NUM_LE_CONV(lhand(concl ith')))) in + let bth = CONV_RULE(RAND_CONV(RAND_CONV(LAND_CONV(LAND_CONV(RAND_CONV + (RAND_CONV + (BINOP2_CONV (TRY_CONV NORMALIZE_RELATIVE_ADDRESS_CONV) + NUM_SUB_CONV))))))) ath in + CONV_RULE(RAND_CONV(RAND_CONV conv)) bth in + conv;; + +let BYTES_EQ_NUM_OF_WORDLIST_APPEND = prove + (`!m (a:int64) (s:S) lis1 (lis2:(N word)list) len1 len2. + dimindex(:N) * LENGTH lis1 = 8 * len1 + ==> (read (m :> bytes(a,len1+len2)) s = + num_of_wordlist(APPEND lis1 lis2) <=> + read (m :> bytes(a,len1)) s = num_of_wordlist lis1 /\ + read (m :> bytes(word_add a (word len1),len2)) s = + num_of_wordlist lis2)`, + REPEAT STRIP_TAC THEN + REWRITE_TAC[READ_COMPONENT_COMPOSE; READ_BYTES_COMBINE] THEN + ASM_REWRITE_TAC[NUM_OF_WORDLIST_APPEND] THEN + ONCE_REWRITE_TAC[ADD_SYM] THEN ONCE_REWRITE_TAC[CONJ_SYM] THEN + MATCH_MP_TAC LEXICOGRAPHIC_EQ THEN REWRITE_TAC[READ_BYTES_BOUND] THEN + MATCH_MP_TAC NUM_OF_WORDLIST_BOUND_GEN THEN ASM_REWRITE_TAC[LE_REFL]);; + +(* ------------------------------------------------------------------------- *) +(* Memory read merging and splitting conversions *) +(* ------------------------------------------------------------------------- *) + +let READ_MEMORY_MERGE_CONV = + let baseconv = + GEN_REWRITE_CONV I [READ_MEMORY_BYTESIZED_SPLIT] THENC + LAND_CONV(LAND_CONV(RAND_CONV(RAND_CONV + (TRY_CONV(GEN_REWRITE_CONV I [GSYM WORD_ADD_ASSOC] THENC + RAND_CONV WORD_ADD_CONV))))) in + let rec conv n tm = + if n = 0 then REFL tm else + (baseconv THENC BINOP_CONV (conv(n - 1))) tm in + conv;; + +let MEMORY_128_FROM_64_TAC = + let a_tm = `a:int64` and n_tm = `n:num` and i64_ty = `:int64` + and pat = `read (memory :> bytes128(word_add a (word n))) s0` in + fun v boff n -> + let pat' = subst[mk_var(v,i64_ty),a_tm] pat in + let f i = + let itm = mk_small_numeral(boff + 16*i) in + READ_MEMORY_MERGE_CONV 1 (subst[itm,n_tm] pat') in + MP_TAC(end_itlist CONJ (map f (0--(n-1))));; + +let READ_MEMORY_SPLIT_CONV = + let baseconv = + GEN_REWRITE_CONV I [READ_MEMORY_BYTESIZED_UNSPLIT] THENC + BINOP_CONV(LAND_CONV(LAND_CONV(RAND_CONV(RAND_CONV + (TRY_CONV(GEN_REWRITE_CONV I [GSYM WORD_ADD_ASSOC] THENC + RAND_CONV WORD_ADD_CONV)))))) in + let rec conv n tm = + if n = 0 then REFL tm else + (baseconv THENC BINOP_CONV (conv(n - 1))) tm in + conv;; + +(* ------------------------------------------------------------------------- *) +(* Filter length bound *) +(* ------------------------------------------------------------------------- *) + +let LENGTH_FILTER = prove + (`!P l:A list. LENGTH(FILTER P l) <= LENGTH l`, + GEN_TAC THEN LIST_INDUCT_TAC THEN ASM_REWRITE_TAC[FILTER; LE_REFL] THEN + COND_CASES_TAC THEN REWRITE_TAC[LENGTH] THEN ASM_ARITH_TAC);; + +(* ------------------------------------------------------------------------- *) +(* Helper: val(w:24 word) MOD 2^23 as pure word operation *) +(* ------------------------------------------------------------------------- *) + +let VAL_MOD_23_EQ_AND = prove + (`!w:24 word. (word(val w MOD 2 EXP 23):int32) = + word_and (word_zx w:int32) (word 8388607)`, + GEN_TAC THEN CONV_TAC WORD_BLAST);; + +(* ------------------------------------------------------------------------- *) +(* ML-DSA rejection sampling specification *) +(* ------------------------------------------------------------------------- *) + +let REJ_SAMPLE = define + `REJ_SAMPLE l = FILTER (\x:int32. val x < 8380417) + (MAP (\x:24 word. word(val x MOD 2 EXP 23):int32) l)`;; + +let REJ_SAMPLE_EMPTY = prove + (`REJ_SAMPLE [] = []`, + REWRITE_TAC[REJ_SAMPLE; FILTER; MAP]);; + +let REJ_SAMPLE_APPEND = prove + (`!l1 l2. REJ_SAMPLE(APPEND l1 l2) = + APPEND (REJ_SAMPLE l1) (REJ_SAMPLE l2)`, + REWRITE_TAC[REJ_SAMPLE; MAP_APPEND; FILTER_APPEND]);; + +let DIMINDEX_24 = DIMINDEX_CONV `dimindex(:24)`;; +let DIMINDEX_192 = DIMINDEX_CONV `dimindex(:192)`;; +let DIMINDEX_384 = DIMINDEX_CONV `dimindex(:384)`;; + +(* ------------------------------------------------------------------------- *) +(* The main correctness theorem. *) +(* ------------------------------------------------------------------------- *) + +let MLDSA_REJ_UNIFORM_CORRECT = prove + (`!res buf buflen table (inlist:(24 word)list) pc stackpointer. + 24 divides val buflen /\ + 8 * val buflen = 24 * LENGTH inlist /\ + ALL (nonoverlapping (stackpointer,1088)) + [(word pc,LENGTH mldsa_rej_uniform_mc); (buf,val buflen); (table,256)] /\ + ALL (nonoverlapping (res,1024)) + [(word pc,LENGTH mldsa_rej_uniform_mc); (stackpointer,1088)] + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) mldsa_rej_uniform_mc /\ + read PC s = word(pc + MLDSA_REJ_UNIFORM_CORE_START) /\ + read SP s = stackpointer /\ + C_ARGUMENTS [res;buf;buflen;table] s /\ + read(memory :> bytes(table,256)) s = + num_of_wordlist mldsa_rej_uniform_table /\ + read(memory :> bytes(buf,val buflen)) s = + num_of_wordlist inlist) + (\s. read PC s = word(pc + MLDSA_REJ_UNIFORM_CORE_END) /\ + let outlist = SUB_LIST(0,256) (REJ_SAMPLE inlist) in + let outlen = LENGTH outlist in + C_RETURN s = word outlen /\ + read(memory :> bytes(res,4 * outlen)) s = + num_of_wordlist outlist) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(res,1024); + memory :> bytes(stackpointer,1088)])`, + CONV_TAC LENGTH_SIMPLIFY_CONV THEN + MAP_EVERY X_GEN_TAC [`res:int64`; `buf:int64`] THEN + W64_GEN_TAC `buflen:num` THEN + MAP_EVERY X_GEN_TAC + [`table:int64`; `inlist:(24 word)list`; `pc:num`; `stackpointer:int64`] THEN + REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI; C_ARGUMENTS; + ALL; C_RETURN; NONOVERLAPPING_CLAUSES] THEN + DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN + + (*** First split off and handle the writeback tail once and for all ***) + + ENSURES_SEQUENCE_TAC `pc + 0x1f8` + `\s. read X0 s = res /\ + read X4 s = word 256 /\ + read X8 s = stackpointer /\ + ?n. let outlist = REJ_SAMPLE (SUB_LIST (0,8 * n) inlist) in + let outlen = LENGTH outlist in + outlen < 272 /\ + read X9 s = word outlen /\ + (buflen < 24 * (n + 1) \/ 256 <= outlen) /\ + read (memory :> bytes (stackpointer,4 * outlen)) s = + num_of_wordlist outlist` THEN + CONJ_TAC THENL + [ALL_TAC; + + (*** Writeback: unroll the copy loop ***) + + ENSURES_INIT_TAC "s0" THEN + FIRST_X_ASSUM(X_CHOOSE_THEN `n:num` MP_TAC) THEN + CONV_TAC(TOP_DEPTH_CONV let_CONV) THEN + ABBREV_TAC `outlist = REJ_SAMPLE (SUB_LIST (0,8 * n) inlist)` THEN + ABBREV_TAC `outlen = LENGTH(outlist:int32 list)` THEN + DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN + VAL_INT64_TAC `outlen:num` THEN + BIGNUM_LDIGITIZE_TAC "b_" + `read (memory :> bytes(stackpointer,8 * 128)) s0` THEN + MEMORY_128_FROM_64_TAC "stackpointer" 0 64 THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN STRIP_TAC THEN + ARM_STEPS_TAC MLDSA_REJ_UNIFORM_EXEC (1--182) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + SUBGOAL_THEN + `read (memory :> bytes (res,4 * MIN 256 outlen)) s182 = + num_of_wordlist (SUB_LIST (0,256) outlist:int32 list)` + ASSUME_TAC THENL + [REWRITE_TAC[NUM_OF_WORDLIST_SUB_LIST_0; DIMINDEX_32] THEN + FIRST_X_ASSUM(fun th -> + GEN_REWRITE_TAC (RAND_CONV o LAND_CONV) [SYM th]) THEN + REWRITE_TAC[ARITH_RULE `4 * MIN 256 l = MIN (4 * l) 1024`] THEN + REWRITE_TAC[ARITH_RULE `32 * 256 = 8 * 1024`] THEN + REWRITE_TAC[READ_COMPONENT_COMPOSE; READ_BYTES_MOD] THEN + ONCE_REWRITE_TAC[ARITH_RULE `MIN a b = MIN b a`] THEN + REWRITE_TAC[GSYM READ_BYTES_MOD] THEN + AP_THM_TAC THEN AP_TERM_TAC THEN + REWRITE_TAC[GSYM READ_COMPONENT_COMPOSE] THEN + REWRITE_TAC[ARITH_RULE `1024 = 8 * 128`] THEN + CONV_TAC(ONCE_DEPTH_CONV BIGNUM_LEXPAND_CONV) THEN + RULE_ASSUM_TAC(CONV_RULE(ONCE_DEPTH_CONV(READ_MEMORY_SPLIT_CONV 1))) THEN + ASM_REWRITE_TAC[]; + ALL_TAC] THEN + FIRST_X_ASSUM DISJ_CASES_TAC THENL + [SUBGOAL_THEN `SUB_LIST (0,8 * n) (inlist:(24 word)list) = inlist` + SUBST_ALL_TAC THENL + [MATCH_MP_TAC SUB_LIST_REFL THEN FIRST_X_ASSUM(MATCH_MP_TAC o + MATCH_MP + (ARITH_RULE `8 * b = 24 * l ==> b <= 24 * n ==> l <= 8 * n`)) THEN + UNDISCH_TAC `buflen < 24 * (n + 1)` THEN + FIRST_X_ASSUM(MP_TAC o GEN_REWRITE_RULE I [divides]) THEN + SIMP_TAC[LEFT_IMP_EXISTS_THM; LE_MULT_LCANCEL; LT_MULT_LCANCEL] THEN + ARITH_TAC; + ASM_REWRITE_TAC[LENGTH_SUB_LIST; SUB_0]] THEN + GEN_REWRITE_TAC LAND_CONV [GSYM COND_RAND] THEN + REWRITE_TAC[ARITH_RULE `(if l < p then l else p) = MIN p l`]; + ASM_REWRITE_TAC[GSYM NOT_LE; LENGTH_SUB_LIST; SUB_0] THEN + MATCH_MP_TAC(MESON[] + `y = x /\ (y = x ==> P) ==> word x = word y /\ P`) THEN + CONJ_TAC THENL + [REWRITE_TAC[ARITH_RULE `MIN a b = a <=> a <= b`] THEN + TRANS_TAC LE_TRANS `outlen:num` THEN ASM_REWRITE_TAC[] THEN + SUBST1_TAC(SYM(ASSUME `LENGTH(outlist:int32 list) = outlen`)) THEN + EXPAND_TAC "outlist" THEN + MP_TAC(ISPECL [`inlist:(24 word)list`; `8 * n`] + SUB_LIST_TOPSPLIT) THEN + DISCH_THEN(fun th -> + GEN_REWRITE_TAC (funpow 3 RAND_CONV) [SYM th]) THEN + REWRITE_TAC[REJ_SAMPLE_APPEND; LENGTH_APPEND; LE_ADD]; + DISCH_THEN SUBST1_TAC] THEN + SUBGOAL_THEN + `SUB_LIST (0,256) (REJ_SAMPLE inlist) = SUB_LIST (0,256) outlist` + SUBST1_TAC THENL + [MP_TAC(ISPECL [`inlist:(24 word)list`; `8 * n`] + SUB_LIST_TOPSPLIT) THEN + DISCH_THEN(fun th -> ONCE_REWRITE_TAC[SYM th]) THEN + ASM_SIMP_TAC[REJ_SAMPLE_APPEND; SUB_LIST_APPEND_LEFT]; + ALL_TAC] THEN + FIRST_ASSUM(SUBST1_TAC o MATCH_MP (ARITH_RULE + `256 <= l ==> 4 * 256 = 4 * MIN 256 l`)) THEN + FIRST_ASSUM ACCEPT_TAC]] THEN + + (*** The computation half: main loop + tail ***) + + (*** Characterize the number of times round the main loop ***) + (*** ML-DSA processes 16 coefficients (48 bytes) per iteration ***) + + SUBGOAL_THEN + `?i. buflen < 48 * (i + 1) \/ + 256 <= LENGTH(REJ_SAMPLE(SUB_LIST(0,16 * i) inlist))` + MP_TAC THENL + [EXISTS_TAC `buflen:num` THEN ARITH_TAC; + GEN_REWRITE_TAC LAND_CONV [num_WOP]] THEN + + DISCH_THEN(X_CHOOSE_THEN `N:num` (CONJUNCTS_THEN2 ASSUME_TAC MP_TAC)) THEN + REWRITE_TAC[DE_MORGAN_THM; NOT_LE] THEN STRIP_TAC THEN + + (*** Slightly elaborated sequencing to handle N = 0 case as well ***) + (*** Intermediate state P' at pc + 0x168 (loop48_end: CMP X9,X4) ***) + + MATCH_MP_TAC(MESON[] + `!P'. (ensures arm P' Q R ==> ensures arm P Q R) /\ ensures arm P' Q R + ==> ensures arm P Q R`) THEN + EXISTS_TAC + `\s. aligned_bytes_loaded s (word pc) mldsa_rej_uniform_mc /\ + read PC s = word (pc + 0x168) /\ + read (memory :> bytes (table,256)) s = + num_of_wordlist mldsa_rej_uniform_table /\ + read (memory :> bytes (buf,buflen)) s = num_of_wordlist inlist /\ + read Q30 s = word 663965040167895004928633208018296833 /\ + read Q31 s = word 633825300187901677051779743745 /\ + let outlist = REJ_SAMPLE (SUB_LIST (0,16 * N) inlist) in + let outlen = LENGTH outlist in + read X0 s = res /\ + read X1 s = word_add buf (word (48 * N)) /\ + read X2 s = word_sub (word buflen) (word (48 * N)) /\ + read X3 s = table /\ + read X4 s = word 256 /\ + read X7 s = word_add stackpointer (word (4 * outlen)) /\ + read X8 s = stackpointer /\ + read X9 s = word outlen /\ + read (memory :> bytes (stackpointer,4 * outlen)) s = + num_of_wordlist outlist` THEN + CONJ_TAC THENL + [ASM_CASES_TAC `N = 0` THENL + [(*** N = 0 case: execute preamble + init loop to reach P' ***) + MATCH_MP_TAC(ONCE_REWRITE_RULE[IMP_CONJ] + (REWRITE_RULE[CONJ_ASSOC] ENSURES_TRANS_SIMPLE)) THEN + CONJ_TAC THENL [MAYCHANGE_IDEMPOT_TAC; ALL_TAC] THEN + FIRST_X_ASSUM(DISJ_CASES_THEN MP_TAC) THENL + [ASM_REWRITE_TAC[ADD_CLAUSES; MULT_CLAUSES] THEN DISCH_TAC; + ASM_REWRITE_TAC[MULT_CLAUSES; SUB_LIST_CLAUSES] THEN + REWRITE_TAC[REJ_SAMPLE_EMPTY; LENGTH] THEN + CONV_TAC NUM_REDUCE_CONV] THEN + GHOST_INTRO_TAC `q31_init:int128` `read Q31` THEN + ENSURES_INIT_TAC "s0" THEN + ARM_STEPS_TAC MLDSA_REJ_UNIFORM_EXEC (1--130) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + CONJ_TAC THENL [CONV_TAC WORD_BLAST; ALL_TAC] THEN + REWRITE_TAC[MULT_CLAUSES; SUB_LIST_CLAUSES; REJ_SAMPLE_EMPTY] THEN + CONV_TAC(TOP_DEPTH_CONV let_CONV) THEN REWRITE_TAC[LENGTH] THEN + REWRITE_TAC[MULT_CLAUSES; WORD_ADD_0; WORD_SUB_0] THEN + REWRITE_TAC[READ_COMPONENT_COMPOSE; READ_BYTES_TRIVIAL; num_of_wordlist]; + + FIRST_ASSUM(ASSUME_TAC o MATCH_MP (ARITH_RULE `~(N = 0) ==> 0 < N`)) THEN + DISCH_TAC] THEN + + (*** Set up the loop invariant ***) + + ENSURES_WHILE_UP_TAC `N:num` `pc + 0x070` `pc + 0x160` + `\i s. read (memory :> bytes (table,256)) s = + num_of_wordlist mldsa_rej_uniform_table /\ + read (memory :> bytes (buf,buflen)) s = num_of_wordlist inlist /\ + read Q30 s = word 663965040167895004928633208018296833 /\ + read Q31 s = word 633825300187901677051779743745 /\ + let outlist = REJ_SAMPLE(SUB_LIST(0,16 * i) inlist) in + let outlen = LENGTH outlist in + read X0 s = res /\ + read X1 s = word_add buf (word(48 * i)) /\ + read X2 s = word_sub (word buflen) (word(48 * i)) /\ + read X3 s = table /\ + read X4 s = word 256 /\ + read X7 s = word_add stackpointer (word(4 * outlen)) /\ + read X8 s = stackpointer /\ + read X9 s = word outlen /\ + read (memory :> bytes (stackpointer,4*outlen)) s = + num_of_wordlist outlist` THEN + REPEAT CONJ_TAC THENL + [(*** 0 < N ***) + ASM_ARITH_TAC; + + (*** Pre-loop initialization: reach invariant at i=0 ***) + FIRST_X_ASSUM(MP_TAC o SPEC `0`) THEN + ASM_REWRITE_TAC[ADD_CLAUSES; MULT_CLAUSES] THEN STRIP_TAC THEN + GHOST_INTRO_TAC `q31_init:int128` `read Q31` THEN + ENSURES_INIT_TAC "s0" THEN + ARM_STEPS_TAC MLDSA_REJ_UNIFORM_EXEC (1--132) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + CONJ_TAC THENL [CONV_TAC WORD_BLAST; ALL_TAC] THEN + REWRITE_TAC[MULT_CLAUSES; SUB_LIST_CLAUSES; REJ_SAMPLE_EMPTY] THEN + CONV_TAC(TOP_DEPTH_CONV let_CONV) THEN REWRITE_TAC[LENGTH] THEN + REWRITE_TAC[MULT_CLAUSES; WORD_ADD_0; WORD_SUB_0] THEN + REWRITE_TAC[READ_COMPONENT_COMPOSE; READ_BYTES_TRIVIAL; num_of_wordlist]; + + (*** Main loop body preservation: invariant i ==> invariant i+1 ***) + + X_GEN_TAC `i:num` THEN STRIP_TAC THEN + ABBREV_TAC `cur:int64 = word_add buf (word (48 * i))` THEN + ABBREV_TAC `curlist = REJ_SAMPLE (SUB_LIST(0,16 * i) inlist)` THEN + ABBREV_TAC `curlen = LENGTH(curlist:int32 list)` THEN + CONV_TAC(RATOR_CONV(LAND_CONV(TOP_DEPTH_CONV let_CONV))) THEN + ASM_REWRITE_TAC[] THEN ENSURES_INIT_TAC "s0" THEN + MAP_EVERY ABBREV_TAC + [`q0 = read (memory :> bytes128 cur) s0`; + `q1 = read (memory :> bytes128 (word_add cur (word 16))) s0`; + `q2 = read (memory :> bytes128 (word_add cur (word 32))) s0`] THEN + + (*** Abbreviate the 16 coefficients as 32-bit words ***) + + ABBREV_TAC + `(x:num->int32) j = + word_and (word_zx(word_subword + (read (memory :> wbytes cur) s0:384 word) + (24 * j,24):24 word):int32) (word 8388607)` THEN + + FIRST_X_ASSUM(MP_TAC o C MATCH_MP (ASSUME `i:num < N`)) THEN + ASM_REWRITE_TAC[] THEN STRIP_TAC THEN + + (*** Steps 1-12: LD3 + BIC + ZIP + USHLL byte extraction ***) + + ARM_STEPS_TAC MLDSA_REJ_UNIFORM_EXEC (1--12) THEN + RULE_ASSUM_TAC(CONV_RULE(TOP_DEPTH_CONV WORD_SIMPLE_SUBWORD_CONV)) THEN + + (*** Prove Q16-Q19 contain the correct coefficients ***) + + SUBGOAL_THEN + `read Q16 s12 = + word_join (word_join ((x:num->int32) 3) (x 2):int64) + (word_join (x 1) (x 0):int64) /\ + read Q17 s12 = + word_join (word_join ((x:num->int32) 7) (x 6):int64) + (word_join (x 5) (x 4):int64) /\ + read Q18 s12 = + word_join (word_join ((x:num->int32) 11) (x 10):int64) + (word_join (x 9) (x 8):int64) /\ + read Q19 s12 = + word_join (word_join ((x:num->int32) 15) (x 14):int64) + (word_join (x 13) (x 12):int64)` + MP_TAC THENL + [FIRST_X_ASSUM(MP_TAC o check (is_forall o concl)) THEN + DISCH_THEN(fun th -> REWRITE_TAC[GSYM th]) THEN + ASM_REWRITE_TAC[READ_MEMORY_TRIPLES_SPLIT] THEN REPEAT CONJ_TAC THEN + GEN_REWRITE_TAC I [WORD_EQ_BITS_ALT] THEN + REWRITE_TAC[DIMINDEX_128] THEN CONV_TAC EXPAND_CASES_CONV THEN + CONV_TAC NUM_REDUCE_CONV THEN REPEAT CONJ_TAC THEN + CONV_TAC(TOP_DEPTH_CONV BIT_WORD_CONV) THEN + REWRITE_TAC[word_subdeinterleave; BIT_WORD_OF_BITS; IN_ELIM_THM] THEN + CONV_TAC(TOP_DEPTH_CONV let_CONV) THEN + CONV_TAC(DEPTH_CONV WORD_NUM_RED_CONV) THEN + CONV_TAC(TOP_DEPTH_CONV BIT_WORD_CONV) THEN + REWRITE_TAC[] THEN NO_TAC; + DISCARD_MATCHING_ASSUMPTIONS + [`read Q16 s = x`; `read Q17 s = x`; + `read Q18 s = x`; `read Q19 s = x`] THEN + STRIP_TAC] THEN + + (*** Connect x to EL of SUB_LIST (needed for postcondition) ***) + + SUBGOAL_THEN + `!j. j < 16 + ==> EL j (MAP (\x:24 word. word(val x MOD 2 EXP 23):int32) + (SUB_LIST(16 * i,16) inlist)) = (x:num->int32) j` + ASSUME_TAC THENL + [UNDISCH_THEN + `read (memory :> bytes (buf,buflen)) s12 = + num_of_wordlist(inlist:(24 word)list)` + (MP_TAC o AP_TERM + `\x. x DIV 2 EXP (8 * 48 * i) MOD 2 EXP (8 * 48)`) THEN + REWRITE_TAC[READ_COMPONENT_COMPOSE; READ_BYTES_DIV] THEN + REWRITE_TAC[READ_BYTES_MOD] THEN + REWRITE_TAC[GSYM READ_COMPONENT_COMPOSE] THEN + REWRITE_TAC[ARITH_RULE `8 * 48 = 24 * 16 * 1`] THEN + REWRITE_TAC[ARITH_RULE `8 * 48 * x = 24 * 16 * x`] THEN + REWRITE_TAC[GSYM DIMINDEX_24; GSYM NUM_OF_WORDLIST_SUB_LIST] THEN + SUBGOAL_THEN + `MIN (buflen - 48 * i) 48 = dimindex(:384) DIV 8` + SUBST1_TAC THENL + [REWRITE_TAC[DIMINDEX_384] THEN CONV_TAC NUM_REDUCE_CONV THEN + MATCH_MP_TAC(ARITH_RULE + `~(b < 48 * (i + 1)) ==> MIN (b - 48 * i) 48 = 48`) THEN + ASM_REWRITE_TAC[]; + REWRITE_TAC[MULT_CLAUSES] THEN ASM_REWRITE_TAC[] THEN + REWRITE_TAC[READ_COMPONENT_COMPOSE; GSYM VAL_READ_WBYTES] THEN + ASM_REWRITE_TAC[GSYM READ_COMPONENT_COMPOSE]] THEN + DISCH_TAC THEN X_GEN_TAC `j:num` THEN DISCH_TAC THEN + SUBGOAL_THEN + `j < LENGTH(SUB_LIST(16 * i,16) (inlist:(24 word)list))` + ASSUME_TAC THENL + [REWRITE_TAC[LENGTH_SUB_LIST] THEN + MAP_EVERY UNDISCH_TAC + [`j:num < 16`; `~(buflen < 48 * (i + 1))`; + `8 * buflen = 24 * LENGTH(inlist:(24 word)list)`] THEN + ARITH_TAC; + ALL_TAC] THEN + ASM_SIMP_TAC[EL_MAP] THEN REWRITE_TAC[VAL_MOD_23_EQ_AND] THEN + FIRST_X_ASSUM(fun th -> + GEN_REWRITE_TAC RAND_CONV [SYM(SPEC `j:num` th)]) THEN + AP_THM_TAC THEN AP_TERM_TAC THEN AP_TERM_TAC THEN + REWRITE_TAC[GSYM VAL_EQ; VAL_WORD_SUBWORD; DIMINDEX_24; + ARITH_RULE `MIN 24 24 = 24`] THEN + ASM_REWRITE_TAC[GSYM DIMINDEX_24; NUM_OF_WORDLIST_EL; + LENGTH_SUB_LIST] THEN + COND_CASES_TAC THENL [REFL_TAC; ASM_MESON_TAC[]]; + ALL_TAC] THEN + + (*** Steps 13-28: CMHI + AND + UADDLV + FMOV ***) + + ARM_STEPS_TAC MLDSA_REJ_UNIFORM_EXEC (13--28) THEN + RULE_ASSUM_TAC(CONV_RULE(TOP_DEPTH_CONV WORD_SIMPLE_SUBWORD_CONV)) THEN + RULE_ASSUM_TAC(CONV_RULE WORD_REDUCE_CONV) THEN + RULE_ASSUM_TAC(REWRITE_RULE + [word_ugt; relational2; GT; WORD_AND_MASK]) THEN + RULE_ASSUM_TAC(ONCE_REWRITE_RULE[COND_RAND]) THEN + RULE_ASSUM_TAC(CONV_RULE WORD_REDUCE_CONV) THEN + + (*** Abbreviate table indices and entries ***) + + MAP_EVERY REABBREV_TAC + [`idx0 = read X12 s28`; `idx1 = read X13 s28`; + `idx2 = read X14 s28`; `idx3 = read X15 s28`] THEN + MAP_EVERY ABBREV_TAC + [`tab0 = read(memory :> bytes128(word_add table + (word(16 * val(idx0:int64))))) s28`; + `tab1 = read(memory :> bytes128(word_add table + (word(16 * val(idx1:int64))))) s28`; + `tab2 = read(memory :> bytes128(word_add table + (word(16 * val(idx2:int64))))) s28`; + `tab3 = read(memory :> bytes128(word_add table + (word(16 * val(idx3:int64))))) s28`] THEN + + (*** Steps 29-48: table loads + CNT + UADDLV + FMOV + TBL ***) + + ARM_STEPS_TAC MLDSA_REJ_UNIFORM_EXEC (29--48) THEN + RULE_ASSUM_TAC(REWRITE_RULE[WORD_SUBWORD_AND]) THEN + RULE_ASSUM_TAC(CONV_RULE(TOP_DEPTH_CONV WORD_SIMPLE_SUBWORD_CONV)) THEN + RULE_ASSUM_TAC(CONV_RULE WORD_REDUCE_CONV) THEN + RULE_ASSUM_TAC(REWRITE_RULE + [word_ugt; relational2; GT; WORD_AND_MASK]) THEN + RULE_ASSUM_TAC(ONCE_REWRITE_RULE[COND_RAND]) THEN + RULE_ASSUM_TAC(CONV_RULE WORD_REDUCE_CONV) THEN + + (*** TBL correctness: Q16-Q19 = word(num_of_wordlist(FILTER ...)) ***) + (*** TODO: 16-case brute force per group ***) + + SUBGOAL_THEN + `read Q16 s48 = word(num_of_wordlist (FILTER (\x:int32. val x < 8380417) + [(x:num->int32) 0; x 1; x 2; x 3])) /\ + read Q17 s48 = word(num_of_wordlist (FILTER (\x. val x < 8380417) + [x 4; x 5; x 6; x 7])) /\ + read Q18 s48 = word(num_of_wordlist (FILTER (\x. val x < 8380417) + [x 8; x 9; x 10; x 11])) /\ + read Q19 s48 = word(num_of_wordlist (FILTER (\x. val x < 8380417) + [x 12; x 13; x 14; x 15]))` + MP_TAC THENL + [UNDISCH_TAC + `read(memory :> bytes(table,256)) s48 = + num_of_wordlist mldsa_rej_uniform_table` THEN + REPLICATE_TAC 4 + (GEN_REWRITE_TAC (LAND_CONV o ONCE_DEPTH_CONV) + [GSYM NUM_OF_PAIR_WORDLIST]) THEN + REWRITE_TAC[mldsa_rej_uniform_table; pair_wordlist] THEN + CONV_TAC WORD_REDUCE_CONV THEN + CONV_TAC(LAND_CONV BYTES_EQ_NUM_OF_WORDLIST_EXPAND_CONV) THEN + REWRITE_TAC[GSYM BYTES128_WBYTES] THEN REPEAT STRIP_TAC THEN + DISCARD_MATCHING_ASSUMPTIONS + [`read Q24 s = x`; `read Q25 s = x`; + `read Q26 s = x`; `read Q27 s = x`] THEN + REPEAT(FIRST_X_ASSUM(SUBST_ALL_TAC o SYM o check + (fun th -> is_var(rhs(concl th)) && + let n = fst(dest_var(rhs(concl th))) in + n = "tab0" || n = "tab1" || n = "tab2" || n = "tab3"))) THEN + DISCARD_MATCHING_ASSUMPTIONS + [`read X12 s = x`; `read X13 s = x`; + `read X14 s = x`; `read X15 s = x`] THEN + REPEAT(FIRST_X_ASSUM(SUBST_ALL_TAC o SYM o check + (fun th -> is_var(rhs(concl th)) && + let n = fst(dest_var(rhs(concl th))) in + n = "idx0" || n = "idx1" || n = "idx2" || n = "idx3"))) THEN + ASM_REWRITE_TAC[] THEN + DISCARD_MATCHING_ASSUMPTIONS + [`read Q16 s = x`; `read Q17 s = x`; + `read Q18 s = x`; `read Q19 s = x`] THEN + ASM_REWRITE_TAC[FILTER] THEN + REPEAT(COND_CASES_TAC THEN ASM_REWRITE_TAC[]) THEN + ASM_REWRITE_TAC[bitval] THEN + CONV_TAC WORD_REDUCE_CONV THEN + CONV_TAC(TOP_DEPTH_CONV WORD_SIMPLE_SUBWORD_CONV) THEN + CONV_TAC(DEPTH_CONV WORD_NUM_RED_CONV) THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN + CONV_TAC(TOP_DEPTH_CONV WORD_SIMPLE_SUBWORD_CONV) THEN + CONV_TAC(DEPTH_CONV WORD_NUM_RED_CONV) THEN + REPLICATE_TAC 2 + (ONCE_REWRITE_TAC[GSYM NUM_OF_PAIR_WORDLIST]) THEN + REWRITE_TAC[pair_wordlist; NUM_OF_WORDLIST_SING; WORD_VAL] THEN + REWRITE_TAC[num_of_wordlist] THEN CONV_TAC WORD_BLAST; + DISCARD_MATCHING_ASSUMPTIONS + [`read Q16 s = x`; `read Q17 s = x`; + `read Q18 s = x`; `read Q19 s = x`] THEN + STRIP_TAC] THEN + + (*** Counting: X12-X15 = word(LENGTH(FILTER ...)) ***) + + SUBGOAL_THEN + `read X12 s48 = word(LENGTH (FILTER (\x:int32. val x < 8380417) + [(x:num->int32) 0; x 1; x 2; x 3])) /\ + read X13 s48 = word(LENGTH (FILTER (\x. val x < 8380417) + [x 4; x 5; x 6; x 7])) /\ + read X14 s48 = word(LENGTH (FILTER (\x. val x < 8380417) + [x 8; x 9; x 10; x 11])) /\ + read X15 s48 = word(LENGTH (FILTER (\x. val x < 8380417) + [x 12; x 13; x 14; x 15]))` + MP_TAC THENL + [ASM_REWRITE_TAC[WORD_AND_0; WORD_POPCOUNT_0; BITBLAST_RULE + `word_popcount(word_and (word 1) x:byte) = bitval(bit 0 x) /\ + word_popcount(word_and (word 2) x:byte) = bitval(bit 1 x) /\ + word_popcount(word_and (word 4) x:byte) = bitval(bit 2 x) /\ + word_popcount(word_and (word 8) x:byte) = bitval(bit 3 x) /\ + word_popcount(word_and (word 16) x:byte) = bitval(bit 4 x) /\ + word_popcount(word_and (word 32) x:byte) = bitval(bit 5 x) /\ + word_popcount(word_and (word 64) x:byte) = bitval(bit 6 x) /\ + word_popcount(word_and (word 128) x:byte) = bitval(bit 7 x)`] THEN + DISCARD_STATE_TAC "s48" THEN REPEAT CONJ_TAC THEN + REWRITE_TAC[WORD_MASK_ALT] THEN + REPEAT(COND_CASES_TAC THEN ASM_REWRITE_TAC[]) THEN + ASM_REWRITE_TAC[FILTER] THEN CONV_TAC(DEPTH_CONV WORD_NUM_RED_CONV) THEN + REWRITE_TAC[LENGTH] THEN CONV_TAC(DEPTH_CONV WORD_NUM_RED_CONV); + DISCARD_MATCHING_ASSUMPTIONS + [`read X12 s = x`; `read X13 s = x`; + `read X14 s = x`; `read X15 s = x`] THEN + STRIP_TAC] THEN + + (*** Abbreviate filtered lists and their lengths ***) + + MAP_EVERY ABBREV_TAC + [`lis0 = FILTER (\x:int32. val x < 8380417) + [(x:num->int32) 0; x 1; x 2; x 3]`; + `lis1 = FILTER (\x:int32. val x < 8380417) + [(x:num->int32) 4; x 5; x 6; x 7]`; + `lis2 = FILTER (\x:int32. val x < 8380417) + [(x:num->int32) 8; x 9; x 10; x 11]`; + `lis3 = FILTER (\x:int32. val x < 8380417) + [(x:num->int32) 12; x 13; x 14; x 15]`; + `len0 = LENGTH(lis0:int32 list)`; + `len1 = LENGTH(lis1:int32 list)`; + `len2 = LENGTH(lis2:int32 list)`; + `len3 = LENGTH(lis3:int32 list)`] THEN + + SUBGOAL_THEN `len0 <= 4 /\ len1 <= 4 /\ len2 <= 4 /\ len3 <= 4` + STRIP_ASSUME_TAC THENL + [MAP_EVERY EXPAND_TAC ["len0"; "len1"; "len2"; "len3"] THEN + MAP_EVERY EXPAND_TAC ["lis0"; "lis1"; "lis2"; "lis3"] THEN + REPEAT CONJ_TAC THEN + W(MP_TAC o PART_MATCH lhand LENGTH_FILTER o lhand o snd) THEN + REWRITE_TAC[LENGTH] THEN ARITH_TAC; + ALL_TAC] THEN + + (*** First writeback: ST1 Q16 + ADD X7 ***) + + ARM_STEPS_TAC MLDSA_REJ_UNIFORM_EXEC (49--50) THEN + ABBREV_TAC `curlist1:int32 list = APPEND curlist lis0` THEN + ABBREV_TAC `curlen1:num = curlen + len0` THEN + + SUBGOAL_THEN + `curlen1 < 260 /\ + read (memory :> bytes (stackpointer,4*curlen1)) s50 = + num_of_wordlist(curlist1:int32 list)` + STRIP_ASSUME_TAC THENL + [MAP_EVERY EXPAND_TAC ["curlen1"; "curlist1"] THEN CONJ_TAC THENL + [MAP_EVERY UNDISCH_TAC [`curlen < 256`; `len0 <= 4`] THEN ARITH_TAC; + REWRITE_TAC[LEFT_ADD_DISTRIB]] THEN + W(MP_TAC o PART_MATCH (lhand o rand) + BYTES_EQ_NUM_OF_WORDLIST_APPEND o snd) THEN + ASM_REWRITE_TAC[DIMINDEX_32; ARITH_RULE `8 * 4 * l = 32 * l`] THEN + DISCH_THEN SUBST1_TAC THEN + SUBGOAL_THEN + `read (memory :> bytes128 + (word_add stackpointer (word (4 * curlen)))) s50 = + word (num_of_wordlist(lis0:int32 list))` + MP_TAC THENL [ASM_REWRITE_TAC[]; ALL_TAC] THEN + DISCH_THEN(MP_TAC o AP_TERM `val:int128->num`) THEN + REWRITE_TAC[READ_COMPONENT_COMPOSE; BYTES128_WBYTES; + VAL_READ_WBYTES; + DIMINDEX_128; ARITH_RULE `128 DIV 8 = 16`] THEN + SUBGOAL_THEN `4 * len0 = MIN 16 (4 * len0)` SUBST1_TAC THENL + [UNDISCH_TAC `len0 <= 4` THEN ARITH_TAC; + REWRITE_TAC[GSYM READ_BYTES_MOD]] THEN + DISCH_THEN SUBST1_TAC THEN REWRITE_TAC[VAL_WORD] THEN + REWRITE_TAC[DIMINDEX_128; MOD_MOD_EXP_MIN] THEN + MATCH_MP_TAC MOD_LT THEN + MATCH_MP_TAC NUM_OF_WORDLIST_BOUND_GEN THEN + ASM_REWRITE_TAC[DIMINDEX_32] THEN + UNDISCH_TAC `len0 <= 4` THEN ARITH_TAC; + FIRST_X_ASSUM(MP_TAC o GEN_REWRITE_RULE RAND_CONV + [WORD_RULE `word_add (word_add a (word (4 * l0))) + (word_shl (word l1) 2) = + word_add a (word(4 * (l0 + l1)))`]) THEN + ASM_REWRITE_TAC[] THEN DISCH_TAC] THEN + SUBGOAL_THEN `LENGTH(curlist1:int32 list) = curlen1` ASSUME_TAC THENL + [MAP_EVERY EXPAND_TAC ["curlist1"; "curlen1"] THEN + REWRITE_TAC[LENGTH_APPEND] THEN ASM_REWRITE_TAC[]; + ALL_TAC] THEN + + (*** Second writeback ***) + + ARM_STEPS_TAC MLDSA_REJ_UNIFORM_EXEC (51--52) THEN + ABBREV_TAC `curlist2:int32 list = APPEND curlist1 lis1` THEN + ABBREV_TAC `curlen2:num = curlen1 + len1` THEN + SUBGOAL_THEN + `curlen2 < 264 /\ + read (memory :> bytes (stackpointer,4*curlen2)) s52 = + num_of_wordlist(curlist2:int32 list)` + STRIP_ASSUME_TAC THENL + [MAP_EVERY EXPAND_TAC ["curlen2"; "curlist2"] THEN CONJ_TAC THENL + [MAP_EVERY UNDISCH_TAC [`curlen1 < 260`; `len1 <= 4`] THEN ARITH_TAC; + REWRITE_TAC[LEFT_ADD_DISTRIB]] THEN + W(MP_TAC o PART_MATCH (lhand o rand) + BYTES_EQ_NUM_OF_WORDLIST_APPEND o snd) THEN + ASM_REWRITE_TAC[DIMINDEX_32; ARITH_RULE `8 * 4 * l = 32 * l`] THEN + DISCH_THEN SUBST1_TAC THEN + UNDISCH_THEN + `read (memory :> bytes128 + (word_add stackpointer (word (4 * curlen1)))) s52 = + word (num_of_wordlist(lis1:int32 list))` + (MP_TAC o AP_TERM `val:int128->num`) THEN + REWRITE_TAC[READ_COMPONENT_COMPOSE; BYTES128_WBYTES; + VAL_READ_WBYTES; + DIMINDEX_128; ARITH_RULE `128 DIV 8 = 16`] THEN + SUBGOAL_THEN `4 * len1 = MIN 16 (4 * len1)` SUBST1_TAC THENL + [UNDISCH_TAC `len1 <= 4` THEN ARITH_TAC; + REWRITE_TAC[GSYM READ_BYTES_MOD]] THEN + DISCH_THEN SUBST1_TAC THEN REWRITE_TAC[VAL_WORD] THEN + REWRITE_TAC[DIMINDEX_128; MOD_MOD_EXP_MIN] THEN + MATCH_MP_TAC MOD_LT THEN + MATCH_MP_TAC NUM_OF_WORDLIST_BOUND_GEN THEN + ASM_REWRITE_TAC[DIMINDEX_32] THEN + UNDISCH_TAC `len1 <= 4` THEN ARITH_TAC; + FIRST_X_ASSUM(MP_TAC o GEN_REWRITE_RULE RAND_CONV + [WORD_RULE `word_add (word_add a (word (4 * l0))) + (word_shl (word l1) 2) = + word_add a (word(4 * (l0 + l1)))`]) THEN + ASM_REWRITE_TAC[] THEN DISCH_TAC] THEN + SUBGOAL_THEN `LENGTH(curlist2:int32 list) = curlen2` ASSUME_TAC THENL + [MAP_EVERY EXPAND_TAC ["curlist2"; "curlen2"] THEN + REWRITE_TAC[LENGTH_APPEND] THEN ASM_REWRITE_TAC[]; + ALL_TAC] THEN + + (*** Third writeback ***) + + ARM_STEPS_TAC MLDSA_REJ_UNIFORM_EXEC (53--54) THEN + ABBREV_TAC `curlist3:int32 list = APPEND curlist2 lis2` THEN + ABBREV_TAC `curlen3:num = curlen2 + len2` THEN + SUBGOAL_THEN + `curlen3 < 268 /\ + read (memory :> bytes (stackpointer,4*curlen3)) s54 = + num_of_wordlist(curlist3:int32 list)` + STRIP_ASSUME_TAC THENL + [MAP_EVERY EXPAND_TAC ["curlen3"; "curlist3"] THEN CONJ_TAC THENL + [MAP_EVERY UNDISCH_TAC [`curlen2 < 264`; `len2 <= 4`] THEN ARITH_TAC; + REWRITE_TAC[LEFT_ADD_DISTRIB]] THEN + W(MP_TAC o PART_MATCH (lhand o rand) + BYTES_EQ_NUM_OF_WORDLIST_APPEND o snd) THEN + ASM_REWRITE_TAC[DIMINDEX_32; ARITH_RULE `8 * 4 * l = 32 * l`] THEN + DISCH_THEN SUBST1_TAC THEN + UNDISCH_THEN + `read (memory :> bytes128 + (word_add stackpointer (word (4 * curlen2)))) s54 = + word (num_of_wordlist(lis2:int32 list))` + (MP_TAC o AP_TERM `val:int128->num`) THEN + REWRITE_TAC[READ_COMPONENT_COMPOSE; BYTES128_WBYTES; + VAL_READ_WBYTES; + DIMINDEX_128; ARITH_RULE `128 DIV 8 = 16`] THEN + SUBGOAL_THEN `4 * len2 = MIN 16 (4 * len2)` SUBST1_TAC THENL + [UNDISCH_TAC `len2 <= 4` THEN ARITH_TAC; + REWRITE_TAC[GSYM READ_BYTES_MOD]] THEN + DISCH_THEN SUBST1_TAC THEN REWRITE_TAC[VAL_WORD] THEN + REWRITE_TAC[DIMINDEX_128; MOD_MOD_EXP_MIN] THEN + MATCH_MP_TAC MOD_LT THEN + MATCH_MP_TAC NUM_OF_WORDLIST_BOUND_GEN THEN + ASM_REWRITE_TAC[DIMINDEX_32] THEN + UNDISCH_TAC `len2 <= 4` THEN ARITH_TAC; + FIRST_X_ASSUM(MP_TAC o GEN_REWRITE_RULE RAND_CONV + [WORD_RULE `word_add (word_add a (word (4 * l0))) + (word_shl (word l1) 2) = + word_add a (word(4 * (l0 + l1)))`]) THEN + ASM_REWRITE_TAC[] THEN DISCH_TAC] THEN + SUBGOAL_THEN `LENGTH(curlist3:int32 list) = curlen3` ASSUME_TAC THENL + [MAP_EVERY EXPAND_TAC ["curlist3"; "curlen3"] THEN + REWRITE_TAC[LENGTH_APPEND] THEN ASM_REWRITE_TAC[]; + ALL_TAC] THEN + + (*** Fourth writeback ***) + + ARM_STEPS_TAC MLDSA_REJ_UNIFORM_EXEC (55--56) THEN + ABBREV_TAC `curlist4:int32 list = APPEND curlist3 lis3` THEN + ABBREV_TAC `curlen4:num = curlen3 + len3` THEN + SUBGOAL_THEN + `curlen4 < 272 /\ + read (memory :> bytes (stackpointer,4*curlen4)) s56 = + num_of_wordlist(curlist4:int32 list)` + STRIP_ASSUME_TAC THENL + [MAP_EVERY EXPAND_TAC ["curlen4"; "curlist4"] THEN CONJ_TAC THENL + [MAP_EVERY UNDISCH_TAC [`curlen3 < 268`; `len3 <= 4`] THEN ARITH_TAC; + REWRITE_TAC[LEFT_ADD_DISTRIB]] THEN + W(MP_TAC o PART_MATCH (lhand o rand) + BYTES_EQ_NUM_OF_WORDLIST_APPEND o snd) THEN + ASM_REWRITE_TAC[DIMINDEX_32; ARITH_RULE `8 * 4 * l = 32 * l`] THEN + DISCH_THEN SUBST1_TAC THEN + UNDISCH_THEN + `read (memory :> bytes128 + (word_add stackpointer (word (4 * curlen3)))) s56 = + word (num_of_wordlist(lis3:int32 list))` + (MP_TAC o AP_TERM `val:int128->num`) THEN + REWRITE_TAC[READ_COMPONENT_COMPOSE; BYTES128_WBYTES; + VAL_READ_WBYTES; + DIMINDEX_128; ARITH_RULE `128 DIV 8 = 16`] THEN + SUBGOAL_THEN `4 * len3 = MIN 16 (4 * len3)` SUBST1_TAC THENL + [UNDISCH_TAC `len3 <= 4` THEN ARITH_TAC; + REWRITE_TAC[GSYM READ_BYTES_MOD]] THEN + DISCH_THEN SUBST1_TAC THEN REWRITE_TAC[VAL_WORD] THEN + REWRITE_TAC[DIMINDEX_128; MOD_MOD_EXP_MIN] THEN + MATCH_MP_TAC MOD_LT THEN + MATCH_MP_TAC NUM_OF_WORDLIST_BOUND_GEN THEN + ASM_REWRITE_TAC[DIMINDEX_32] THEN + UNDISCH_TAC `len3 <= 4` THEN ARITH_TAC; + FIRST_X_ASSUM(MP_TAC o GEN_REWRITE_RULE RAND_CONV + [WORD_RULE `word_add (word_add a (word (4 * l0))) + (word_shl (word l1) 2) = + word_add a (word(4 * (l0 + l1)))`]) THEN + ASM_REWRITE_TAC[] THEN DISCH_TAC] THEN + SUBGOAL_THEN `LENGTH(curlist4:int32 list) = curlen4` ASSUME_TAC THENL + [MAP_EVERY EXPAND_TAC ["curlist4"; "curlen4"] THEN + REWRITE_TAC[LENGTH_APPEND] THEN ASM_REWRITE_TAC[]; + ALL_TAC] THEN + + (*** Steps 57-60: accumulation (use MAP_UNTIL_TARGET_PC) ***) + + MAP_UNTIL_TARGET_PC + (fun n -> ARM_STEPS_TAC MLDSA_REJ_UNIFORM_EXEC [n]) 57 THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + + (*** Postcondition: connect to loop invariant at i+1 ***) + + REWRITE_TAC[ARITH_RULE `16 * (i + 1) = 16 * i + 16`] THEN + ASM_REWRITE_TAC[SUB_LIST_SPLIT; REJ_SAMPLE_APPEND; ADD_CLAUSES] THEN + SUBGOAL_THEN + `REJ_SAMPLE (SUB_LIST (16 * i,16) inlist) = + APPEND (APPEND (APPEND lis0 lis1) lis2) lis3` + SUBST1_TAC THENL + [MAP_EVERY EXPAND_TAC ["lis0"; "lis1"; "lis2"; "lis3"] THEN + REWRITE_TAC[GSYM APPEND_ASSOC; GSYM FILTER_APPEND] THEN + REWRITE_TAC[APPEND; REJ_SAMPLE] THEN AP_TERM_TAC THEN + REWRITE_TAC[LIST_EQ] THEN CONV_TAC(ONCE_DEPTH_CONV LENGTH_CONV) THEN + REWRITE_TAC[LENGTH_MAP] THEN + MATCH_MP_TAC(TAUT `p /\ (p ==> q) ==> p /\ q`) THEN CONJ_TAC THENL + [REWRITE_TAC[LENGTH_SUB_LIST] THEN MATCH_MP_TAC(ARITH_RULE + `16 * (i + 1) <= l ==> MIN 16 (l - 16 * i) = 16`) THEN + MAP_EVERY UNDISCH_TAC + [`~(buflen < 48 * (i + 1))`; + `8 * buflen = 24 * LENGTH(inlist:(24 word)list)`] THEN + ARITH_TAC; + ASM_SIMP_TAC[EL_MAP] THEN DISCH_THEN(K ALL_TAC) THEN + CONV_TAC EXPAND_CASES_CONV THEN + CONV_TAC(ONCE_DEPTH_CONV EL_CONV) THEN REWRITE_TAC[]]; + ASM_REWRITE_TAC[APPEND_ASSOC] THEN + CONV_TAC(TOP_DEPTH_CONV let_CONV)] THEN + ASM_REWRITE_TAC[] THEN EXPAND_TAC "cur" THEN + REPEAT(CONJ_TAC THENL [CONV_TAC WORD_RULE; ALL_TAC]) THEN + SUBST1_TAC(SYM(ASSUME `curlen3 + len3:num = curlen4`)) THEN + SUBST1_TAC(SYM(ASSUME `curlen2 + len2:num = curlen3`)) THEN + SUBST1_TAC(SYM(ASSUME `curlen1 + len1:num = curlen2`)) THEN + SUBST1_TAC(SYM(ASSUME `curlen + len0:num = curlen1`)) THEN + CONV_TAC WORD_RULE; + + (*** Loop back: from end_pc (pc+0x160) to start_pc (pc+0x070) ***) + X_GEN_TAC `i:num` THEN STRIP_TAC THEN + FIRST_X_ASSUM(MP_TAC o SPEC `i:num`) THEN + ASM_REWRITE_TAC[] THEN STRIP_TAC THEN + CONV_TAC(TOP_DEPTH_CONV let_CONV) THEN + ENSURES_INIT_TAC "s0" THEN + SUBGOAL_THEN + `48 <= val(word_sub (word buflen:int64) (word (48 * i)))` + ASSUME_TAC THENL + [ASM_REWRITE_TAC[VAL_WORD_SUB_CASES] THEN + VAL_INT64_TAC `48 * i` THEN ASM_REWRITE_TAC[] THEN + UNDISCH_TAC `~(buflen < 48 * (i + 1))` THEN ARITH_TAC; + ALL_TAC] THEN + ARM_STEPS_TAC MLDSA_REJ_UNIFORM_EXEC (1--4) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + MATCH_MP_TAC(MESON[] `~p ==> (if p then x else y) = y`) THEN + ASM_SIMP_TAC[NOT_LE; VAL_WORD_LT]; + + (*** Post-loop exit: from end_pc (invariant N) to split point ***) + (*** Handle cases: outlen >= 256 direct, or fall through to P' ***) + SUBGOAL_THEN + `LENGTH (REJ_SAMPLE (SUB_LIST (0,16 * N) inlist)) < 272` + ASSUME_TAC THENL + [ASM_CASES_TAC `N = 0` THENL + [ASM_REWRITE_TAC[MULT_CLAUSES; SUB_LIST_CLAUSES; REJ_SAMPLE_EMPTY] THEN + REWRITE_TAC[LENGTH] THEN CONV_TAC NUM_REDUCE_CONV; + FIRST_X_ASSUM(MP_TAC o SPEC `N - 1`)] THEN + ASM_REWRITE_TAC[ARITH_RULE `n - 1 < n <=> ~(n = 0)`] THEN + MATCH_MP_TAC(ARITH_RULE + `l' <= l + 16 ==> ~(b < x) /\ l < 256 ==> l' < 272`) THEN + MP_TAC(ISPECL [`inlist:(24 word)list`; `16 * (N - 1)`; `16`; `0`] + SUB_LIST_SPLIT) THEN + ASM_SIMP_TAC[ARITH_RULE `~(N = 0) ==> 16 * (N - 1) + 16 = 16 * N`] THEN + DISCH_THEN SUBST1_TAC THEN REWRITE_TAC[REJ_SAMPLE_APPEND] THEN + REWRITE_TAC[LENGTH_APPEND; LE_ADD_LCANCEL; ADD_CLAUSES] THEN + REWRITE_TAC[REJ_SAMPLE] THEN + W(MP_TAC o PART_MATCH lhand LENGTH_FILTER o lhand o snd) THEN + MATCH_MP_TAC(REWRITE_RULE[IMP_CONJ_ALT] LE_TRANS) THEN + REWRITE_TAC[LENGTH_MAP; LENGTH_SUB_LIST] THEN ARITH_TAC; + ALL_TAC] THEN + VAL_INT64_TAC `LENGTH (REJ_SAMPLE (SUB_LIST (0,16 * N) inlist))` THEN + SUBGOAL_THEN + `48 <= val(word_sub (word buflen:int64) (word (48 * N))) <=> + 48 * (N + 1) <= buflen` + ASSUME_TAC THENL + [SUBGOAL_THEN `48 * N < 2 EXP 64` ASSUME_TAC THENL + [FIRST_X_ASSUM(MP_TAC o SPEC `N - 1`) THEN SIMPLE_ARITH_TAC; + MAP_EVERY VAL_INT64_TAC [`48 * N`; `buflen:num`]] THEN + ASM_REWRITE_TAC[VAL_WORD_SUB_CASES] THEN + FIRST_X_ASSUM(MP_TAC o SPEC `N - 1`) THEN SIMPLE_ARITH_TAC; + ALL_TAC] THEN + CONV_TAC(TOP_DEPTH_CONV let_CONV) THEN + ASM_CASES_TAC `48 * (N + 1) <= buflen` THENL + [ENSURES_INIT_TAC "s0" THEN + ARM_STEPS_TAC MLDSA_REJ_UNIFORM_EXEC (1--2) THEN + FIRST_X_ASSUM(MP_TAC o check (is_disj o concl)) THEN + ASM_REWRITE_TAC[GSYM NOT_LE] THEN DISCH_TAC THEN + ARM_STEPS_TAC MLDSA_REJ_UNIFORM_EXEC (3--4) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + EXISTS_TAC `2 * N` THEN + ASM_REWRITE_TAC[ARITH_RULE `8 * 2 * n = 16 * n`]; + ALL_TAC] THEN + RULE_ASSUM_TAC(CONV_RULE(TOP_DEPTH_CONV let_CONV)) THEN + FIRST_X_ASSUM(MATCH_MP_TAC o MATCH_MP + (ONCE_REWRITE_RULE[IMP_CONJ_ALT] + (REWRITE_RULE[CONJ_ASSOC] ENSURES_TRANS_SIMPLE))) THEN + CONJ_TAC THENL [MAYCHANGE_IDEMPOT_TAC; ALL_TAC] THEN + ENSURES_INIT_TAC "s0" THEN + ARM_STEPS_TAC MLDSA_REJ_UNIFORM_EXEC (1--2) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[]]; + + ALL_TAC] THEN + + (*** The tail computation from P' at pc+0x168 to split at pc+0x1f8 ***) + (*** Handles: outlen >= 256 direct, or 24-byte tail body ***) + + CONV_TAC(TOP_DEPTH_CONV let_CONV) THEN + SUBGOAL_THEN + `LENGTH (REJ_SAMPLE (SUB_LIST (0,16 * N) inlist)) < 272` + ASSUME_TAC THENL + [ASM_CASES_TAC `N = 0` THENL + [ASM_REWRITE_TAC[MULT_CLAUSES; SUB_LIST_CLAUSES; REJ_SAMPLE_EMPTY] THEN + REWRITE_TAC[LENGTH] THEN CONV_TAC NUM_REDUCE_CONV; + FIRST_X_ASSUM(MP_TAC o SPEC `N - 1`)] THEN + ASM_REWRITE_TAC[ARITH_RULE `n - 1 < n <=> ~(n = 0)`] THEN + MATCH_MP_TAC(ARITH_RULE + `l' <= l + 16 ==> ~(b < x) /\ l < 256 ==> l' < 272`) THEN + MP_TAC(ISPECL [`inlist:(24 word)list`; `16 * (N - 1)`; `16`; `0`] + SUB_LIST_SPLIT) THEN + ASM_SIMP_TAC[ARITH_RULE `~(N = 0) ==> 16 * (N - 1) + 16 = 16 * N`] THEN + DISCH_THEN SUBST1_TAC THEN REWRITE_TAC[REJ_SAMPLE_APPEND] THEN + REWRITE_TAC[LENGTH_APPEND; LE_ADD_LCANCEL; ADD_CLAUSES] THEN + REWRITE_TAC[REJ_SAMPLE] THEN + W(MP_TAC o PART_MATCH lhand LENGTH_FILTER o lhand o snd) THEN + MATCH_MP_TAC(REWRITE_RULE[IMP_CONJ_ALT] LE_TRANS) THEN + REWRITE_TAC[LENGTH_MAP; LENGTH_SUB_LIST] THEN ARITH_TAC; + ALL_TAC] THEN + VAL_INT64_TAC `LENGTH (REJ_SAMPLE (SUB_LIST (0,16 * N) inlist))` THEN + SUBGOAL_THEN + `24 <= val(word_sub (word buflen:int64) (word (48 * N))) <=> + 48 * N + 24 <= buflen` + ASSUME_TAC THENL + [SUBGOAL_THEN `48 * N < 2 EXP 64` ASSUME_TAC THENL + [FIRST_X_ASSUM(MP_TAC o SPEC `N - 1`) THEN SIMPLE_ARITH_TAC; + MAP_EVERY VAL_INT64_TAC [`48 * N`; `buflen:num`]] THEN + ASM_REWRITE_TAC[VAL_WORD_SUB_CASES] THEN + FIRST_X_ASSUM(MP_TAC o SPEC `N - 1`) THEN SIMPLE_ARITH_TAC; + ALL_TAC] THEN + ENSURES_INIT_TAC "s0" THEN + ASM_CASES_TAC `256 <= LENGTH (REJ_SAMPLE (SUB_LIST (0,16 * N) inlist))` THEN + ARM_STEPS_TAC MLDSA_REJ_UNIFORM_EXEC (1--2) THENL + [ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + EXISTS_TAC `2 * N` THEN + ASM_REWRITE_TAC[ARITH_RULE `8 * 2 * n = 16 * n`]; + FIRST_X_ASSUM(MP_TAC o check (is_disj o concl)) THEN + ASM_REWRITE_TAC[] THEN DISCH_TAC] THEN + ASM_CASES_TAC `48 * N + 24 <= buflen` THEN + ARM_STEPS_TAC MLDSA_REJ_UNIFORM_EXEC (3--4) THENL + [RULE_ASSUM_TAC(REWRITE_RULE[NOT_LE]); + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + EXISTS_TAC `2 * N` THEN + ASM_REWRITE_TAC[ARITH_RULE `8 * 2 * n = 16 * n`] THEN + UNDISCH_TAC `~(48 * N + 24 <= buflen)` THEN ARITH_TAC] THEN + + (*** Because of divisibility, exactly 24 buffer elements left ***) + + FIRST_X_ASSUM(K ALL_TAC o check (is_forall o concl)) THEN + SUBGOAL_THEN `48 * N + 24 = buflen` ASSUME_TAC THENL + [MAP_EVERY UNDISCH_TAC + [`48 * N + 24 <= buflen`; `buflen < 48 * (N + 1)`] THEN + FIRST_X_ASSUM(MP_TAC o GEN_REWRITE_RULE I [divides]) THEN + SIMP_TAC[LEFT_IMP_EXISTS_THM; ARITH_RULE `48 = 24 * 2`] THEN + REWRITE_TAC[GSYM MULT_ASSOC; ARITH_RULE `24 * n + 24 = 24 * (n + 1)`] THEN + REWRITE_TAC[LE_MULT_LCANCEL; LT_MULT_LCANCEL] THEN ARITH_TAC; + ALL_TAC] THEN + + (*** The tail computation body: 8 coefficients in 2 groups ***) + (*** TODO: Half-sized version of main loop body. Key differences: + - 192-bit wbytes (24 bytes) instead of 384-bit + - bytes64 reads for q0,q1,q2 instead of bytes128 + - 2 groups of 4 coefficients (Q16-Q17 only) + - Steps start at 5 (s4 from ENSURES_INIT_TAC at line 1306) + - EXISTS witness is 2*N+1 + - SUB_LIST(16*N,8) instead of SUB_LIST(16*i,16) + - 48*N+24=buflen (exact) instead of ~(buflen<48*(i+1)) + ***) + ABBREV_TAC `cur:int64 = word_add buf (word (48 * N))` THEN + ABBREV_TAC `curlist = REJ_SAMPLE (SUB_LIST(0,16 * N) inlist)` THEN + ABBREV_TAC `curlen = LENGTH(curlist:int32 list)` THEN + + MAP_EVERY ABBREV_TAC + [`q0 = read (memory :> bytes64 cur) s4`; + `q1 = read (memory :> bytes64 (word_add cur (word 8))) s4`; + `q2 = read (memory :> bytes64 (word_add cur (word 16))) s4`] THEN + + ABBREV_TAC + `(x:num->int32) j = + word_and (word_zx(word_subword + (read (memory :> wbytes cur) s4:192 word) + (24 * j,24):24 word):int32) (word 8388607)` THEN + + ARM_STEPS_TAC MLDSA_REJ_UNIFORM_EXEC (5--12) THEN + + SUBGOAL_THEN + `read Q16 s12 = + word_join (word_join ((x:num->int32) 3) (x 2):int64) + (word_join (x 1) (x 0):int64) /\ + read Q17 s12 = + word_join (word_join ((x:num->int32) 7) (x 6):int64) + (word_join (x 5) (x 4):int64)` + MP_TAC THENL + [FIRST_X_ASSUM(MP_TAC o check (is_forall o concl)) THEN + DISCH_THEN(fun th -> REWRITE_TAC[GSYM th]) THEN + ASM_REWRITE_TAC[READ_MEMORY_TRIPLES_SPLIT] THEN REPEAT CONJ_TAC THEN + GEN_REWRITE_TAC I [WORD_EQ_BITS_ALT] THEN + REWRITE_TAC[DIMINDEX_128] THEN CONV_TAC EXPAND_CASES_CONV THEN + CONV_TAC NUM_REDUCE_CONV THEN REPEAT CONJ_TAC THEN + CONV_TAC(TOP_DEPTH_CONV BIT_WORD_CONV) THEN + REWRITE_TAC[word_subdeinterleave; BIT_WORD_OF_BITS; IN_ELIM_THM] THEN + CONV_TAC(TOP_DEPTH_CONV let_CONV) THEN + CONV_TAC(DEPTH_CONV WORD_NUM_RED_CONV) THEN + CONV_TAC(TOP_DEPTH_CONV BIT_WORD_CONV) THEN + REWRITE_TAC[] THEN NO_TAC; + DISCARD_MATCHING_ASSUMPTIONS + [`read Q16 s = x`; `read Q17 s = x`] THEN + STRIP_TAC] THEN + + SUBGOAL_THEN + `!j. j < 8 + ==> EL j (MAP (\w:24 word. word(val w MOD 2 EXP 23):int32) + (SUB_LIST(16 * N,8) inlist)) = (x:num->int32) j` + ASSUME_TAC THENL + [UNDISCH_THEN + `read (memory :> bytes (buf,buflen)) s12 = + num_of_wordlist(inlist:(24 word)list)` + (MP_TAC o AP_TERM + `\n:num. n DIV 2 EXP (8 * 48 * N) MOD 2 EXP (8 * 24)`) THEN + CONV_TAC(DEPTH_CONV BETA_CONV) THEN + REWRITE_TAC[READ_COMPONENT_COMPOSE; READ_BYTES_DIV; READ_BYTES_MOD] THEN + REWRITE_TAC[GSYM READ_COMPONENT_COMPOSE] THEN + REWRITE_TAC[ARITH_RULE `8 * 24 = 24 * 8 * 1`; + ARITH_RULE `8 * 48 * n = 24 * 16 * n`] THEN + REWRITE_TAC[GSYM DIMINDEX_24; GSYM NUM_OF_WORDLIST_SUB_LIST] THEN + SUBGOAL_THEN `MIN (buflen - 48 * N) (dimindex(:24)) = dimindex(:192) DIV 8` + SUBST1_TAC THENL + [REWRITE_TAC[DIMINDEX_24; DIMINDEX_192] THEN + UNDISCH_TAC `48 * N + 24 = buflen` THEN ARITH_TAC; + REWRITE_TAC[MULT_CLAUSES] THEN ASM_REWRITE_TAC[] THEN + REWRITE_TAC[READ_COMPONENT_COMPOSE; GSYM VAL_READ_WBYTES] THEN + ASM_REWRITE_TAC[GSYM READ_COMPONENT_COMPOSE]] THEN + DISCH_TAC THEN X_GEN_TAC `j:num` THEN DISCH_TAC THEN + SUBGOAL_THEN `j < LENGTH(SUB_LIST(16 * N,8) (inlist:(24 word)list))` + ASSUME_TAC THENL + [REWRITE_TAC[LENGTH_SUB_LIST] THEN + MAP_EVERY UNDISCH_TAC [`j:num < 8`; `48 * N + 24 = buflen`; + `8 * buflen = 24 * LENGTH(inlist:(24 word)list)`] THEN + ARITH_TAC; ALL_TAC] THEN + ASM_SIMP_TAC[EL_MAP] THEN REWRITE_TAC[VAL_MOD_23_EQ_AND] THEN + FIRST_X_ASSUM(fun th -> + GEN_REWRITE_TAC RAND_CONV [SYM(SPEC `j:num` th)]) THEN + AP_THM_TAC THEN AP_TERM_TAC THEN AP_TERM_TAC THEN + REWRITE_TAC[GSYM VAL_EQ; VAL_WORD_SUBWORD; DIMINDEX_24; + ARITH_RULE `MIN 24 24 = 24`] THEN + ASM_REWRITE_TAC[GSYM DIMINDEX_24; NUM_OF_WORDLIST_EL; + LENGTH_SUB_LIST] THEN + COND_CASES_TAC THENL [REFL_TAC; ASM_MESON_TAC[]]; ALL_TAC] THEN + ARM_STEPS_TAC MLDSA_REJ_UNIFORM_EXEC (13--20) THEN + RULE_ASSUM_TAC(CONV_RULE(TOP_DEPTH_CONV WORD_SIMPLE_SUBWORD_CONV)) THEN + RULE_ASSUM_TAC(CONV_RULE WORD_REDUCE_CONV) THEN + RULE_ASSUM_TAC(REWRITE_RULE[word_ugt; relational2; GT; WORD_AND_MASK]) THEN + RULE_ASSUM_TAC(ONCE_REWRITE_RULE[COND_RAND]) THEN + RULE_ASSUM_TAC(CONV_RULE WORD_REDUCE_CONV) THEN + MAP_EVERY REABBREV_TAC + [`idx0 = read X12 s20`; `idx1 = read X13 s20`] THEN + MAP_EVERY ABBREV_TAC + [`tab0 = read(memory :> bytes128(word_add table + (word(16 * val(idx0:int64))))) s20`; + `tab1 = read(memory :> bytes128(word_add table + (word(16 * val(idx1:int64))))) s20`] THEN + + (*** Steps 21-28: LDR Q24/Q25 + CNT + UADDLV + FMOV ***) + + ARM_STEPS_TAC MLDSA_REJ_UNIFORM_EXEC (21--28) THEN + + RULE_ASSUM_TAC(REWRITE_RULE[WORD_SUBWORD_AND]) THEN + RULE_ASSUM_TAC(CONV_RULE(TOP_DEPTH_CONV WORD_SIMPLE_SUBWORD_CONV)) THEN + RULE_ASSUM_TAC(CONV_RULE WORD_REDUCE_CONV) THEN + RULE_ASSUM_TAC(REWRITE_RULE[word_ugt; relational2; GT; WORD_AND_MASK]) THEN + RULE_ASSUM_TAC(ONCE_REWRITE_RULE[COND_RAND]) THEN + RULE_ASSUM_TAC(CONV_RULE WORD_REDUCE_CONV) THEN + + (*** Steps 29-30: TBL Q16/Q17 ***) + + ARM_STEPS_TAC MLDSA_REJ_UNIFORM_EXEC (29--30) THEN + + (*** TBL correctness: Q16/Q17 = word(num_of_wordlist(FILTER ...)) ***) + + SUBGOAL_THEN + `read Q16 s30 = word(num_of_wordlist (FILTER (\x:int32. val x < 8380417) + [(x:num->int32) 0; x 1; x 2; x 3])) /\ + read Q17 s30 = word(num_of_wordlist (FILTER (\x. val x < 8380417) + [x 4; x 5; x 6; x 7]))` + MP_TAC THENL + [UNDISCH_TAC + `read(memory :> bytes(table,256)) s30 = + num_of_wordlist mldsa_rej_uniform_table` THEN + REPLICATE_TAC 4 + (GEN_REWRITE_TAC (LAND_CONV o ONCE_DEPTH_CONV) + [GSYM NUM_OF_PAIR_WORDLIST]) THEN + REWRITE_TAC[mldsa_rej_uniform_table; pair_wordlist] THEN + CONV_TAC WORD_REDUCE_CONV THEN + CONV_TAC(LAND_CONV BYTES_EQ_NUM_OF_WORDLIST_EXPAND_CONV) THEN + REWRITE_TAC[GSYM BYTES128_WBYTES] THEN REPEAT STRIP_TAC THEN + DISCARD_MATCHING_ASSUMPTIONS + [`read Q24 s = x`; `read Q25 s = x`] THEN + REPEAT(FIRST_X_ASSUM(SUBST_ALL_TAC o SYM o check + (fun th -> is_var(rhs(concl th)) && + let n = fst(dest_var(rhs(concl th))) in + n = "tab0" || n = "tab1"))) THEN + DISCARD_MATCHING_ASSUMPTIONS + [`read X12 s = x`; `read X13 s = x`] THEN + REPEAT(FIRST_X_ASSUM(SUBST_ALL_TAC o SYM o check + (fun th -> is_var(rhs(concl th)) && + let n = fst(dest_var(rhs(concl th))) in + n = "idx0" || n = "idx1"))) THEN + ASM_REWRITE_TAC[] THEN + DISCARD_MATCHING_ASSUMPTIONS + [`read Q16 s = x`; `read Q17 s = x`] THEN + ASM_REWRITE_TAC[FILTER] THEN + REPEAT(COND_CASES_TAC THEN ASM_REWRITE_TAC[]) THEN + ASM_REWRITE_TAC[bitval] THEN + CONV_TAC WORD_REDUCE_CONV THEN + CONV_TAC(TOP_DEPTH_CONV WORD_SIMPLE_SUBWORD_CONV) THEN + CONV_TAC(DEPTH_CONV WORD_NUM_RED_CONV) THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN + CONV_TAC(TOP_DEPTH_CONV WORD_SIMPLE_SUBWORD_CONV) THEN + CONV_TAC(DEPTH_CONV WORD_NUM_RED_CONV) THEN + REPLICATE_TAC 2 + (ONCE_REWRITE_TAC[GSYM NUM_OF_PAIR_WORDLIST]) THEN + REWRITE_TAC[pair_wordlist; NUM_OF_WORDLIST_SING; WORD_VAL] THEN + REWRITE_TAC[num_of_wordlist] THEN CONV_TAC WORD_BLAST; + DISCARD_MATCHING_ASSUMPTIONS + [`read Q16 s = x`; `read Q17 s = x`] THEN + STRIP_TAC] THEN + + (*** Counting: X12/X13 = word(LENGTH(FILTER ...)) ***) + + SUBGOAL_THEN + `read X12 s30 = word(LENGTH (FILTER (\x:int32. val x < 8380417) + [(x:num->int32) 0; x 1; x 2; x 3])) /\ + read X13 s30 = word(LENGTH (FILTER (\x. val x < 8380417) + [x 4; x 5; x 6; x 7]))` + MP_TAC THENL + [ASM_REWRITE_TAC[WORD_AND_0; WORD_POPCOUNT_0; BITBLAST_RULE + `word_popcount(word_and (word 1) x:byte) = bitval(bit 0 x) /\ + word_popcount(word_and (word 2) x:byte) = bitval(bit 1 x) /\ + word_popcount(word_and (word 4) x:byte) = bitval(bit 2 x) /\ + word_popcount(word_and (word 8) x:byte) = bitval(bit 3 x) /\ + word_popcount(word_and (word 16) x:byte) = bitval(bit 4 x) /\ + word_popcount(word_and (word 32) x:byte) = bitval(bit 5 x) /\ + word_popcount(word_and (word 64) x:byte) = bitval(bit 6 x) /\ + word_popcount(word_and (word 128) x:byte) = bitval(bit 7 x)`] THEN + DISCARD_STATE_TAC "s30" THEN REPEAT CONJ_TAC THEN + REWRITE_TAC[WORD_MASK_ALT] THEN + REPEAT(COND_CASES_TAC THEN ASM_REWRITE_TAC[]) THEN + ASM_REWRITE_TAC[FILTER] THEN CONV_TAC(DEPTH_CONV WORD_NUM_RED_CONV) THEN + REWRITE_TAC[LENGTH] THEN CONV_TAC(DEPTH_CONV WORD_NUM_RED_CONV); + DISCARD_MATCHING_ASSUMPTIONS + [`read X12 s = x`; `read X13 s = x`] THEN + STRIP_TAC] THEN + + (*** Abbreviate filtered lists and their lengths ***) + + MAP_EVERY ABBREV_TAC + [`lis0 = FILTER (\x:int32. val x < 8380417) + [(x:num->int32) 0; x 1; x 2; x 3]`; + `lis1 = FILTER (\x:int32. val x < 8380417) + [(x:num->int32) 4; x 5; x 6; x 7]`; + `len0 = LENGTH(lis0:int32 list)`; + `len1 = LENGTH(lis1:int32 list)`] THEN + + SUBGOAL_THEN `len0 <= 4 /\ len1 <= 4` + STRIP_ASSUME_TAC THENL + [MAP_EVERY EXPAND_TAC ["len0"; "len1"] THEN + MAP_EVERY EXPAND_TAC ["lis0"; "lis1"] THEN + REPEAT CONJ_TAC THEN + W(MP_TAC o PART_MATCH lhand LENGTH_FILTER o lhand o snd) THEN + REWRITE_TAC[LENGTH] THEN ARITH_TAC; + ALL_TAC] THEN + + (*** First writeback: STR Q16 + ADD X7 ***) + + ARM_STEPS_TAC MLDSA_REJ_UNIFORM_EXEC (31--32) THEN + ABBREV_TAC `curlist1:int32 list = APPEND curlist lis0` THEN + ABBREV_TAC `curlen1:num = curlen + len0` THEN + + SUBGOAL_THEN + `curlen1 < 260 /\ + read (memory :> bytes (stackpointer,4*curlen1)) s32 = + num_of_wordlist(curlist1:int32 list)` + STRIP_ASSUME_TAC THENL + [MAP_EVERY EXPAND_TAC ["curlen1"; "curlist1"] THEN CONJ_TAC THENL + [MAP_EVERY UNDISCH_TAC [`curlen < 256`; `len0 <= 4`] THEN ARITH_TAC; + REWRITE_TAC[LEFT_ADD_DISTRIB]] THEN + W(MP_TAC o PART_MATCH (lhand o rand) + BYTES_EQ_NUM_OF_WORDLIST_APPEND o snd) THEN + ASM_REWRITE_TAC[DIMINDEX_32; ARITH_RULE `8 * 4 * l = 32 * l`] THEN + DISCH_THEN SUBST1_TAC THEN + SUBGOAL_THEN + `read (memory :> bytes128 + (word_add stackpointer (word (4 * curlen)))) s32 = + word (num_of_wordlist(lis0:int32 list))` + MP_TAC THENL [ASM_REWRITE_TAC[]; ALL_TAC] THEN + DISCH_THEN(MP_TAC o AP_TERM `val:int128->num`) THEN + REWRITE_TAC[READ_COMPONENT_COMPOSE; BYTES128_WBYTES; + VAL_READ_WBYTES; + DIMINDEX_128; ARITH_RULE `128 DIV 8 = 16`] THEN + SUBGOAL_THEN `4 * len0 = MIN 16 (4 * len0)` SUBST1_TAC THENL + [UNDISCH_TAC `len0 <= 4` THEN ARITH_TAC; + REWRITE_TAC[GSYM READ_BYTES_MOD]] THEN + DISCH_THEN SUBST1_TAC THEN REWRITE_TAC[VAL_WORD] THEN + REWRITE_TAC[DIMINDEX_128; MOD_MOD_EXP_MIN] THEN + MATCH_MP_TAC MOD_LT THEN + MATCH_MP_TAC NUM_OF_WORDLIST_BOUND_GEN THEN + ASM_REWRITE_TAC[DIMINDEX_32] THEN + UNDISCH_TAC `len0 <= 4` THEN ARITH_TAC; + FIRST_X_ASSUM(MP_TAC o GEN_REWRITE_RULE RAND_CONV + [WORD_RULE `word_add (word_add a (word (4 * l0))) + (word_shl (word l1) 2) = + word_add a (word(4 * (l0 + l1)))`]) THEN + ASM_REWRITE_TAC[] THEN DISCH_TAC] THEN + SUBGOAL_THEN `LENGTH(curlist1:int32 list) = curlen1` ASSUME_TAC THENL + [MAP_EVERY EXPAND_TAC ["curlist1"; "curlen1"] THEN + REWRITE_TAC[LENGTH_APPEND] THEN ASM_REWRITE_TAC[]; + ALL_TAC] THEN + + (*** Second writeback: STR Q17 + ADD X7 ***) + + ARM_STEPS_TAC MLDSA_REJ_UNIFORM_EXEC (33--34) THEN + ABBREV_TAC `curlist2:int32 list = APPEND curlist1 lis1` THEN + ABBREV_TAC `curlen2:num = curlen1 + len1` THEN + + SUBGOAL_THEN + `curlen2 < 264 /\ + read (memory :> bytes (stackpointer,4*curlen2)) s34 = + num_of_wordlist(curlist2:int32 list)` + STRIP_ASSUME_TAC THENL + [MAP_EVERY EXPAND_TAC ["curlen2"; "curlist2"] THEN CONJ_TAC THENL + [MAP_EVERY UNDISCH_TAC [`curlen1 < 260`; `len1 <= 4`] THEN ARITH_TAC; + REWRITE_TAC[LEFT_ADD_DISTRIB]] THEN + W(MP_TAC o PART_MATCH (lhand o rand) + BYTES_EQ_NUM_OF_WORDLIST_APPEND o snd) THEN + ASM_REWRITE_TAC[DIMINDEX_32; ARITH_RULE `8 * 4 * l = 32 * l`] THEN + DISCH_THEN SUBST1_TAC THEN + SUBGOAL_THEN + `read (memory :> bytes128 + (word_add stackpointer (word (4 * curlen1)))) s34 = + word (num_of_wordlist(lis1:int32 list))` + MP_TAC THENL [ASM_REWRITE_TAC[]; ALL_TAC] THEN + DISCH_THEN(MP_TAC o AP_TERM `val:int128->num`) THEN + REWRITE_TAC[READ_COMPONENT_COMPOSE; BYTES128_WBYTES; + VAL_READ_WBYTES; + DIMINDEX_128; ARITH_RULE `128 DIV 8 = 16`] THEN + SUBGOAL_THEN `4 * len1 = MIN 16 (4 * len1)` SUBST1_TAC THENL + [UNDISCH_TAC `len1 <= 4` THEN ARITH_TAC; + REWRITE_TAC[GSYM READ_BYTES_MOD]] THEN + DISCH_THEN SUBST1_TAC THEN REWRITE_TAC[VAL_WORD] THEN + REWRITE_TAC[DIMINDEX_128; MOD_MOD_EXP_MIN] THEN + MATCH_MP_TAC MOD_LT THEN + MATCH_MP_TAC NUM_OF_WORDLIST_BOUND_GEN THEN + ASM_REWRITE_TAC[DIMINDEX_32] THEN + UNDISCH_TAC `len1 <= 4` THEN ARITH_TAC; + FIRST_X_ASSUM(MP_TAC o GEN_REWRITE_RULE RAND_CONV + [WORD_RULE `word_add (word_add a (word (4 * l0))) + (word_shl (word l1) 2) = + word_add a (word(4 * (l0 + l1)))`]) THEN + ASM_REWRITE_TAC[] THEN DISCH_TAC] THEN + SUBGOAL_THEN `LENGTH(curlist2:int32 list) = curlen2` ASSUME_TAC THENL + [MAP_EVERY EXPAND_TAC ["curlist2"; "curlen2"] THEN + REWRITE_TAC[LENGTH_APPEND] THEN ASM_REWRITE_TAC[]; + ALL_TAC] THEN + + (*** REJ_SAMPLE connection ***) + + SUBGOAL_THEN + `REJ_SAMPLE (SUB_LIST (0,16 * N + 8) inlist) = curlist2:int32 list` + ASSUME_TAC THENL + [ASM_REWRITE_TAC[SUB_LIST_SPLIT; REJ_SAMPLE_APPEND; ADD_CLAUSES] THEN + SUBGOAL_THEN + `REJ_SAMPLE (SUB_LIST (16 * N,8) inlist) = APPEND lis0 lis1` + SUBST1_TAC THENL + [MAP_EVERY EXPAND_TAC ["lis0"; "lis1"] THEN + REWRITE_TAC[GSYM FILTER_APPEND; APPEND; REJ_SAMPLE] THEN + AP_TERM_TAC THEN + REWRITE_TAC[LIST_EQ] THEN CONV_TAC(ONCE_DEPTH_CONV LENGTH_CONV) THEN + REWRITE_TAC[LENGTH_MAP] THEN + MATCH_MP_TAC(TAUT `p /\ (p ==> q) ==> p /\ q`) THEN CONJ_TAC THENL + [REWRITE_TAC[LENGTH_SUB_LIST] THEN MATCH_MP_TAC(ARITH_RULE + `16 * N + 8 <= l ==> MIN 8 (l - 16 * N) = 8`) THEN + MAP_EVERY UNDISCH_TAC + [`48 * N + 24 = buflen`; + `8 * buflen = 24 * LENGTH(inlist:(24 word)list)`] THEN + ARITH_TAC; + ASM_SIMP_TAC[EL_MAP] THEN DISCH_THEN(K ALL_TAC) THEN + CONV_TAC EXPAND_CASES_CONV THEN + CONV_TAC(ONCE_DEPTH_CONV EL_CONV) THEN REWRITE_TAC[]]; + MAP_EVERY EXPAND_TAC ["curlist2"; "curlist1"] THEN + ASM_REWRITE_TAC[APPEND_ASSOC]]; + ALL_TAC] THEN + + (*** Steps 35-36: accumulation (ADD X9 + ADD X9) + final steps ***) + + MAP_UNTIL_TARGET_PC + (fun n -> ARM_STEPS_TAC MLDSA_REJ_UNIFORM_EXEC [n]) 35 THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + + (*** Postcondition: exists n with witness 2*N+1 ***) + + EXISTS_TAC `2 * N + 1` THEN + REWRITE_TAC[ARITH_RULE `8 * (2 * N + 1) = 16 * N + 8`; + ARITH_RULE `24 * (2 * N + 1 + 1) = 48 * N + 48`] THEN + ASM_REWRITE_TAC[] THEN + CONJ_TAC THENL + [UNDISCH_TAC `curlen2 < 264` THEN ARITH_TAC; ALL_TAC] THEN + CONJ_TAC THENL + [SUBST1_TAC(SYM(ASSUME `curlen1 + len1:num = curlen2`)) THEN + SUBST1_TAC(SYM(ASSUME `curlen + len0:num = curlen1`)) THEN + CONV_TAC WORD_RULE; ALL_TAC] THEN + DISJ1_TAC THEN UNDISCH_TAC `48 * N + 24 = buflen` THEN ARITH_TAC);; + diff --git a/proofs/hol_light/aarch64/proofs/mldsa_rej_uniform_table.ml b/proofs/hol_light/aarch64/proofs/mldsa_rej_uniform_table.ml new file mode 100644 index 000000000..689b8a97f --- /dev/null +++ b/proofs/hol_light/aarch64/proofs/mldsa_rej_uniform_table.ml @@ -0,0 +1,38 @@ +(* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + *) + +(* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + *) + +(* + * Constant table values used in the AArch64 rejection sampling. + * Each entry is 16 bytes. There are 16 entries (one per 4-bit mask), + * for a total of 256 bytes. Entries use 4-byte (32-bit) coefficient + * indices since ML-DSA coefficients are 32-bit. + * See autogen for details. + *) + +let mldsa_rej_uniform_table = (REWRITE_RULE[MAP] o define) + `mldsa_rej_uniform_table:byte list = MAP word [ + 255; 255; 255; 255; 255; 255; 255; 255; 255; 255; 255; 255; 255; 255; 255; 255; + 0; 1; 2; 3; 255; 255; 255; 255; 255; 255; 255; 255; 255; 255; 255; 255; + 4; 5; 6; 7; 255; 255; 255; 255; 255; 255; 255; 255; 255; 255; 255; 255; + 0; 1; 2; 3; 4; 5; 6; 7; 255; 255; 255; 255; 255; 255; 255; 255; + 8; 9; 10; 11; 255; 255; 255; 255; 255; 255; 255; 255; 255; 255; 255; 255; + 0; 1; 2; 3; 8; 9; 10; 11; 255; 255; 255; 255; 255; 255; 255; 255; + 4; 5; 6; 7; 8; 9; 10; 11; 255; 255; 255; 255; 255; 255; 255; 255; + 0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 255; 255; 255; 255; + 12; 13; 14; 15; 255; 255; 255; 255; 255; 255; 255; 255; 255; 255; 255; 255; + 0; 1; 2; 3; 12; 13; 14; 15; 255; 255; 255; 255; 255; 255; 255; 255; + 4; 5; 6; 7; 12; 13; 14; 15; 255; 255; 255; 255; 255; 255; 255; 255; + 0; 1; 2; 3; 4; 5; 6; 7; 12; 13; 14; 15; 255; 255; 255; 255; + 8; 9; 10; 11; 12; 13; 14; 15; 255; 255; 255; 255; 255; 255; 255; 255; + 0; 1; 2; 3; 8; 9; 10; 11; 12; 13; 14; 15; 255; 255; 255; 255; + 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15; 255; 255; 255; 255; + 0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15 + ]`;; diff --git a/proofs/hol_light/aarch64/proofs/mldsa_zetas.ml b/proofs/hol_light/aarch64/proofs/mldsa_zetas.ml new file mode 100644 index 000000000..66cb8614e --- /dev/null +++ b/proofs/hol_light/aarch64/proofs/mldsa_zetas.ml @@ -0,0 +1,87 @@ +(* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + *) + +(* + * Table of zeta values used in the AArch64 NTTs + * See autogen for details. + *) + +let ntt_zetas_layer012345 = define `ntt_zetas_layer012345:int list = [ + -- &3572223; -- &915382907; &3765607; &964937599; &3761513; &963888510; -- &3201494; -- &820383522; + -- &2883726; -- &738955404; -- &3145678; -- &806080660; -- &3201430; -- &820367122; &0; &0; + -- &601683; -- &154181397; -- &3370349; -- &863652652; -- &4063053; -- &1041158200; &3602218; &923069133; + &3182878; &815613168; &2740543; &702264730; -- &3586446; -- &919027554; &0; &0; + &3542485; &907762539; &2663378; &682491182; -- &1674615; -- &429120452; -- &3110818; -- &797147778; + &2101410; &538486762; &3704823; &949361686; &1159875; &297218217; &0; &0; + &2682288; &687336873; -- &3524442; -- &903139016; -- &434125; -- &111244624; &394148; &101000509; + &928749; &237992130; &1095468; &280713909; -- &3506380; -- &898510625; &0; &0; + &2129892; &545785280; &676590; &173376332; -- &1335936; -- &342333886; &2071829; &530906624; + -- &4018989; -- &1029866791; &3241972; &830756018; &2156050; &552488273; &0; &0; + &3764867; &964747974; -- &3227876; -- &827143915; &1714295; &439288460; &3415069; &875112161; + &1759347; &450833045; -- &817536; -- &209493775; -- &3574466; -- &915957677; &0; &0; + -- &1005239; -- &257592709; &2453983; &628833668; &1460718; &374309300; &3756790; &962678241; + -- &1935799; -- &496048908; -- &1716988; -- &439978542; -- &3950053; -- &1012201926; &0; &0; + &557458; &142848732; -- &642628; -- &164673562; -- &3585098; -- &918682129; -- &2897314; -- &742437332; + &3192354; &818041395; &556856; &142694469; &3870317; &991769559; &0; &0; + -- &1221177; -- &312926867; &2815639; &721508096; &2283733; &585207070; &2917338; &747568486; + &1853806; &475038184; &3345963; &857403734; &1858416; &476219497; &0; &0 +]`;; + +let ntt_zetas_layer67 = define `ntt_zetas_layer67:int list = [ + &3073009; &1277625; -- &2635473; &3852015; &787459213; &327391679; -- &675340520; &987079667; + &1753; -- &2659525; &2660408; -- &59148; &449207; -- &681503850; &681730119; -- &15156688; + -- &1935420; -- &1455890; -- &1780227; &2772600; -- &495951789; -- &373072124; -- &456183549; &710479343; + &4183372; -- &3222807; -- &3121440; -- &274060; &1071989969; -- &825844983; -- &799869667; -- &70227934; + &1182243; &636927; -- &3956745; -- &3284915; &302950022; &163212680; -- &1013916752; -- &841760171; + &87208; -- &3965306; -- &2296397; -- &3716946; &22347069; -- &1016110510; -- &588452222; -- &952468207; + &2508980; &2028118; &1937570; -- &3815725; &642926661; &519705671; &496502727; -- &977780347; + -- &27812; &1009365; -- &1979497; -- &3956944; -- &7126831; &258649997; -- &507246529; -- &1013967746; + &822541; -- &2454145; &1596822; -- &3759465; &210776307; -- &628875181; &409185979; -- &963363710; + &2811291; -- &2983781; -- &1109516; &4158088; &720393920; -- &764594519; -- &284313712; &1065510939; + -- &1685153; &2678278; -- &3551006; -- &250446; -- &431820817; &686309310; -- &909946047; -- &64176841; + -- &3410568; -- &3768948; &635956; -- &2455377; -- &873958779; -- &965793731; &162963861; -- &629190881; + &1528066; &482649; &1148858; -- &2962264; &391567239; &123678909; &294395108; -- &759080783; + -- &4146264; &2192938; &2387513; -- &268456; -- &1062481036; &561940831; &611800717; -- &68791907; + -- &1772588; -- &1727088; -- &3611750; -- &3180456; -- &454226054; -- &442566669; -- &925511710; -- &814992530; + -- &565603; &169688; &2462444; -- &3334383; -- &144935890; &43482586; &631001801; -- &854436357; + &3747250; &1239911; &3195676; &1254190; &960233614; &317727459; &818892658; &321386456; + &2296099; -- &3838479; &2642980; -- &12417; &588375860; -- &983611064; &677264190; -- &3181859; + -- &4166425; -- &3488383; &1987814; -- &3197248; -- &1067647297; -- &893898890; &509377762; -- &819295484; + &2998219; -- &89301; -- &1354892; -- &1310261; &768294260; -- &22883400; -- &347191365; -- &335754661; + &141835; &2513018; &613238; -- &2218467; &36345249; &643961400; &157142369; -- &568482643; + &1736313; &235407; -- &3250154; &3258457; &444930577; &60323094; -- &832852657; &834980303; + -- &458740; &4040196; &2039144; -- &818761; -- &117552223; &1035301089; &522531086; -- &209807681; + -- &1921994; -- &3472069; -- &1879878; -- &2178965; -- &492511373; -- &889718424; -- &481719139; -- &558360247; + -- &2579253; &1787943; -- &2391089; -- &2254727; -- &660934133; &458160776; -- &612717067; -- &577774276; + -- &1623354; -- &2374402; &586241; &527981; -- &415984810; -- &608441020; &150224382; &135295244; + &2105286; -- &2033807; -- &1179613; -- &2743411; &539479988; -- &521163479; -- &302276083; -- &702999655; + &3482206; -- &4182915; -- &1300016; -- &2362063; &892316032; -- &1071872863; -- &333129378; -- &605279149; + -- &1476985; &2491325; &507927; -- &724804; -- &378477722; &638402564; &130156402; -- &185731180; + &1994046; -- &1393159; -- &1187885; -- &1834526; &510974714; -- &356997292; -- &304395785; -- &470097680; + -- &1317678; &2461387; &3035980; &621164; -- &337655269; &630730945; &777970524; &159173408; + -- &3033742; &2647994; -- &2612853; &749577; -- &777397036; &678549029; -- &669544140; &192079267; + -- &338420; &3009748; &4148469; -- &4022750; -- &86720197; &771248568; &1063046068; -- &1030830548; + &3901472; -- &1226661; &2925816; &3374250; &999753034; -- &314332144; &749740976; &864652284; + &3980599; -- &1615530; &1665318; &1163598; &1020029345; -- &413979908; &426738094; &298172236; + &2569011; &1723229; &2028038; -- &3369273; &658309618; &441577800; &519685171; -- &863376927; + &1356448; -- &2775755; &2683270; -- &2778788; &347590090; -- &711287812; &687588511; -- &712065019; + &3994671; -- &1370517; &3363542; &545376; &1023635298; -- &351195274; &861908357; &139752717; + -- &11879; &3020393; &214880; -- &770441; -- &3043996; &773976352; &55063046; -- &197425671; + -- &3467665; &2312838; -- &653275; -- &459163; -- &888589898; &592665232; -- &167401858; -- &117660617; + &3105558; &508145; &860144; &140244; &795799901; &130212265; &220412084; &35937555; + -- &1103344; -- &553718; &3430436; -- &1514152; -- &282732136; -- &141890356; &879049958; -- &388001774; + &348812; -- &327848; &1011223; -- &2354215; &89383150; -- &84011120; &259126110; -- &603268097; + -- &2185084; &2358373; -- &3014420; &2926054; -- &559928242; &604333585; -- &772445769; &749801963; + &3123762; -- &2193087; -- &1716814; -- &392707; &800464680; -- &561979013; -- &439933955; -- &100631253; + -- &3818627; -- &1922253; -- &2236726; &1744507; -- &978523985; -- &492577742; -- &573161516; &447030292; + -- &303005; -- &3974485; &1900052; &1054478; -- &77645096; -- &1018462631; &486888731; &270210213; + &3531229; -- &3773731; -- &781875; -- &731434; &904878186; -- &967019376; -- &200355636; -- &187430119 +]`;; diff --git a/proofs/hol_light/aarch64/proofs/subroutine_signatures.ml b/proofs/hol_light/aarch64/proofs/subroutine_signatures.ml new file mode 100644 index 000000000..ad53750b2 --- /dev/null +++ b/proofs/hol_light/aarch64/proofs/subroutine_signatures.ml @@ -0,0 +1,31 @@ +(* + * Copyright (c) The mldsa-native project authors + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(* ========================================================================= *) +(* ML-DSA subroutine signatures for constant-time proofs. *) +(* Trimmed version of s2n-bignum's arm/proofs/subroutine_signatures.ml. *) +(* ========================================================================= *) + +let subroutine_signatures = [ +("mldsa_ntt_arm", + ([(*args*) + ("a", "int32_t[static 256]", (*is const?*)"false"); + ("z_012345", "int32_t[144]", (*is const?*)"true"); + ("z_67", "int32_t[384]", (*is const?*)"true"); + ], + "void", + [(* input buffers *) + ("a", "256"(* num elems *), 4(* elem bytesize *)); + ("z_012345", "144"(* num elems *), 4(* elem bytesize *)); + ("z_67", "384"(* num elems *), 4(* elem bytesize *)); + ], + [(* output buffers *) + ("a", "256"(* num elems *), 4(* elem bytesize *)); + ], + [(* temporary buffers *) + ]) +); +];; diff --git a/proofs/hol_light/x86_64/proofs/mldsa_specs.ml b/proofs/hol_light/common/mldsa_specs.ml similarity index 84% rename from proofs/hol_light/x86_64/proofs/mldsa_specs.ml rename to proofs/hol_light/common/mldsa_specs.ml index d706cc826..661c42f33 100644 --- a/proofs/hol_light/x86_64/proofs/mldsa_specs.ml +++ b/proofs/hol_light/common/mldsa_specs.ml @@ -1,10 +1,12 @@ (* + * Copyright (c) The mldsa-native project authors * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 *) (* ========================================================================= *) (* Common specifications and tactics for ML-DSA, mainly related to the NTT. *) +(* Covers both AArch64 and x86_64 proofs. *) (* ========================================================================= *) needs "Library/words.ml";; @@ -27,6 +29,9 @@ let pure_forward_ntt_mldsa = define let bitreverse8 = define `bitreverse8(n) = val(word_reversefields 1 (word n:8 word))`;; +let reorder = define + `reorder p (a:num->int) = \i. a(p i)`;; + (* ------------------------------------------------------------------------- *) (* AVX2-optimized ordering for ML-DSA NTT (swaps bit fields then reverses) *) (* ------------------------------------------------------------------------- *) @@ -144,9 +149,53 @@ let MLDSA_INVERSE_NTT_CONV = GEN_REWRITE_CONV DEPTH_CONV [INT_OF_NUM_POW; INT_OF_NUM_REM] THENC ONCE_DEPTH_CONV EXP_MOD_CONV THENC INT_REDUCE_CONV;; +(* ------------------------------------------------------------------------- *) +(* The precise specs of the actual ARM code for ML-DSA. *) +(* ------------------------------------------------------------------------- *) + +let arm_mldsa_pure_forward_ntt = define + `arm_mldsa_pure_forward_ntt f k = + isum (0..255) (\j. f j * &1753 pow ((2 * k + 1) * j)) + rem &8380417`;; + +let arm_mldsa_forward_ntt = define + `arm_mldsa_forward_ntt f k = + isum (0..255) (\j. f j * &1753 pow ((2 * bitreverse8 k + 1) * j)) + rem &8380417`;; + +let ARM_MLDSA_FORWARD_NTT = prove + (`arm_mldsa_forward_ntt = reorder bitreverse8 o arm_mldsa_pure_forward_ntt`, + REWRITE_TAC[FUN_EQ_THM; o_DEF; reorder] THEN + REWRITE_TAC[arm_mldsa_forward_ntt; arm_mldsa_pure_forward_ntt]);; + +let ARM_MLDSA_FORWARD_NTT_ALT = prove + (`arm_mldsa_forward_ntt f k = + isum (0..255) + (\j. f j * + (&1753 pow ((2 * bitreverse8 k + 1) * j)) rem &8380417) + rem &8380417`, + REWRITE_TAC[arm_mldsa_forward_ntt] THEN MATCH_MP_TAC + (REWRITE_RULE[] (ISPEC + `(\x y. x rem &8380417 = y rem &8380417)` ISUM_RELATED)) THEN + REWRITE_TAC[INT_REM_EQ; FINITE_NUMSEG; INT_CONG_ADD] THEN + X_GEN_TAC `i:num` THEN DISCH_TAC THEN + REWRITE_TAC[GSYM INT_OF_NUM_REM; GSYM INT_OF_NUM_CLAUSES; + GSYM INT_REM_EQ] THEN + CONV_TAC INT_REM_DOWN_CONV THEN + AP_THM_TAC THEN AP_TERM_TAC THEN CONV_TAC INT_ARITH);; + +let ARM_MLDSA_FORWARD_NTT_CONV = + GEN_REWRITE_CONV I [ARM_MLDSA_FORWARD_NTT_ALT] THENC + LAND_CONV EXPAND_ISUM_CONV THENC + DEPTH_CONV NUM_RED_CONV THENC + GEN_REWRITE_CONV ONCE_DEPTH_CONV [BITREVERSE8_CLAUSES] THENC + DEPTH_CONV NUM_RED_CONV THENC + GEN_REWRITE_CONV DEPTH_CONV [INT_OF_NUM_POW; INT_OF_NUM_REM] THENC + ONCE_DEPTH_CONV EXP_MOD_CONV THENC INT_REDUCE_CONV;; + (* ------------------------------------------------------------------------- *) (* Abbreviate the Barrett reduction and multiplication and Montgomery *) -(* reduction patterns in the code. *) +(* reduction patterns in the x86 code. *) (* ------------------------------------------------------------------------- *) let mldsa_barred = define @@ -597,6 +646,73 @@ let CONGBOUND_MLDSA_MONTMUL = prove MATCH_MP lemma o CONJUNCT2) THEN INT_ARITH_TAC);; +(* ------------------------------------------------------------------------- *) +(* Abbreviate the Barrett multiplication pattern in the ARM code. *) +(* ------------------------------------------------------------------------- *) + +let arm_mldsa_barmul = define + `arm_mldsa_barmul (k,b) (a:int32):int32 = + word_sub (word_mul a b) + (word_mul (iword_saturate((&2 * ival a * k + &2147483648) div &4294967296)) + (word 8380417))`;; + +let CONGBOUND_ARM_MLDSA_BARMUL = prove + (`!a a' l u. + ((ival a == a') (mod &8380417) /\ l <= ival a /\ ival a <= u) + ==> !k b. abs(k) <= &2147483647 /\ + (max (abs l) (abs u) * + abs(&4294967296 * ival b - &16760834 * k) + &17996812765888511) div &4294967296 + <= &2147483647 + ==> (ival(arm_mldsa_barmul(k,b) a) == a' * ival b) (mod &8380417) /\ + --(max (abs l) (abs u) * + abs(&4294967296 * ival b - &16760834 * k) + &17996808470921216) + div &4294967296 + <= ival(arm_mldsa_barmul(k,b) a) /\ + ival(arm_mldsa_barmul(k,b) a) <= + (max (abs l) (abs u) * abs(&4294967296 * ival b - &16760834 * k) + + &17996812765888511) div &4294967296`, + REPEAT GEN_TAC THEN STRIP_TAC THEN REWRITE_TAC[INT_ABS_BOUNDS] THEN + REPEAT GEN_TAC THEN STRIP_TAC THEN REWRITE_TAC[arm_mldsa_barmul] THEN + REWRITE_TAC[iword_saturate; word_INT_MIN; word_INT_MAX; DIMINDEX_32] THEN + CONV_TAC(DEPTH_CONV WORD_NUM_RED_CONV) THEN + REPEAT(COND_CASES_TAC THENL + [FIRST_X_ASSUM(MATCH_MP_TAC o MATCH_MP (MESON[] `p ==> ~p ==> q`)) THEN + REWRITE_TAC[INT_GT; INT_NOT_LT] THEN ASM BOUNDER_TAC[]; + ASM_REWRITE_TAC[]]) THEN + REWRITE_TAC[WORD_RULE + `word_sub (word_mul a b) (word_mul (iword k) (word c)) = + iword(ival a * ival b - &c * k)`] THEN + MATCH_MP_TAC(MESON[] + `(x == k) (mod n) /\ + (a <= x /\ x <= b ==> ival(iword x:int32) = x) /\ + (a <= x /\ x <= b) + ==> (ival(iword x:int32) == k) (mod n) /\ + a <= ival(iword x:int32) /\ ival(iword x:int32) <= b`) THEN + ASM_SIMP_TAC[INTEGER_RULE + `(a:int == a') (mod n) ==> (a * b - n * c == a' * b) (mod n)`] THEN + CONJ_TAC THENL + [REPEAT STRIP_TAC THEN MATCH_MP_TAC IVAL_IWORD THEN + REWRITE_TAC[DIMINDEX_32; ARITH] THEN ASM_INT_ARITH_TAC; + ALL_TAC] THEN + MATCH_MP_TAC(INT_ARITH + `&4294967296 * l + &17996808470921216 <= a * (&4294967296 * b - &16760834 * k) /\ + a * (&4294967296 * b - &16760834 * k) <= &4294967296 * u - &17996808470921216 + ==> l <= a * b - &8380417 * (&2 * a * k + &2147483648) div &4294967296 /\ + a * b - &8380417 * (&2 * a * k + &2147483648) div &4294967296 <= u`) THEN + CONJ_TAC THENL + [MATCH_MP_TAC(INT_ARITH `abs(y):int <= --x ==> x <= y`); + MATCH_MP_TAC(INT_ARITH `abs(y):int <= x ==> y <= x`)] THEN + REWRITE_TAC[INT_ABS_MUL] THEN + TRANS_TAC INT_LE_TRANS + `max (abs l) (abs u) * abs(&4294967296 * ival(b:int32) - &16760834 * k)` THEN + ASM_SIMP_TAC[INT_LE_RMUL; INT_ABS_POS; INT_ARITH + `l:int <= x /\ x <= u ==> abs x <= max (abs l) (abs u)`] THEN + CONV_TAC INT_ARITH);; + +(* ------------------------------------------------------------------------- *) +(* Bound propagation rules and congruence-bound propagation engine. *) +(* ------------------------------------------------------------------------- *) + let CONCL_BOUNDS_RULE = CONV_RULE(BINOP2_CONV (LAND_CONV(RAND_CONV DIMINDEX_INT_REDUCE_CONV)) @@ -632,6 +748,11 @@ let rec ASM_CONGBOUND_RULE lfn tm = let th0' = WEAKEN_INTCONG_RULE (num 8380417) th0 in let th1 = SPECL [atm;btm] (MATCH_MP CONGBOUND_MLDSA_MONTMUL th0') in CONCL_BOUNDS_RULE(SIDE_ELIM_RULE th1) + | Comb(Comb(Const("arm_mldsa_barmul",_),kb),t) -> + let ktm,btm = dest_pair kb and th0 = ASM_CONGBOUND_RULE lfn t in + let th0' = WEAKEN_INTCONG_RULE (num 8380417) th0 in + let th1 = SPECL [ktm;btm] (MATCH_MP CONGBOUND_ARM_MLDSA_BARMUL th0') in + CONCL_BOUNDS_RULE(SIDE_ELIM_RULE th1) | Comb(Const("word_sx",_),t) -> let th0 = ASM_CONGBOUND_RULE lfn t in let tyin = type_match diff --git a/proofs/hol_light/x86_64/proofs/mldsa_intt.ml b/proofs/hol_light/x86_64/proofs/mldsa_intt.ml index 997571305..620df6f8f 100644 --- a/proofs/hol_light/x86_64/proofs/mldsa_intt.ml +++ b/proofs/hol_light/x86_64/proofs/mldsa_intt.ml @@ -8,7 +8,7 @@ (* ========================================================================= *) needs "x86/proofs/base.ml";; -needs "x86_64/proofs/mldsa_specs.ml";; +needs "common/mldsa_specs.ml";; needs "x86_64/proofs/mldsa_utils.ml";; needs "x86_64/proofs/mldsa_zetas.ml";; diff --git a/proofs/hol_light/x86_64/proofs/mldsa_ntt.ml b/proofs/hol_light/x86_64/proofs/mldsa_ntt.ml index 755df0425..efe96c74c 100644 --- a/proofs/hol_light/x86_64/proofs/mldsa_ntt.ml +++ b/proofs/hol_light/x86_64/proofs/mldsa_ntt.ml @@ -8,7 +8,7 @@ (* ========================================================================= *) needs "x86/proofs/base.ml";; -needs "x86_64/proofs/mldsa_specs.ml";; +needs "common/mldsa_specs.ml";; needs "x86_64/proofs/mldsa_utils.ml";; needs "x86_64/proofs/mldsa_zetas.ml";; diff --git a/scripts/autogen b/scripts/autogen index 890418a29..21642f65f 100755 --- a/scripts/autogen +++ b/scripts/autogen @@ -1290,6 +1290,26 @@ def gen_avx2_zetas_qdata(): return constants, offsets +def gen_aarch64_hol_light_zeta_file(): + def gen(): + yield from gen_hol_light_header() + yield "(*" + yield " * Table of zeta values used in the AArch64 NTTs" + yield " * See autogen for details." + yield " *)" + yield "" + yield "let ntt_zetas_layer012345 = define `ntt_zetas_layer012345:int list = [" + yield from print_hol_light_array(gen_aarch64_fwd_ntt_zetas_layer123456()) + yield "]`;;" + yield "" + yield "let ntt_zetas_layer67 = define `ntt_zetas_layer67:int list = [" + yield from print_hol_light_array(gen_aarch64_fwd_ntt_zetas_layer78()) + yield "]`;;" + yield "" + + update_file("proofs/hol_light/aarch64/proofs/mldsa_zetas.ml", "\n".join(gen())) + + def gen_avx2_hol_light_zeta_file(): def gen(): yield from gen_hol_light_header() @@ -2271,6 +2291,13 @@ def gen_hol_light_asm(): f"-Imldsa/src/native/aarch64/src {aarch64_flags}", "aarch64", ), + ( + "rej_uniform_asm.S", + "mldsa_rej_uniform.S", + "dev/aarch64_clean/src", + f"-Imldsa/src/native/aarch64/src {aarch64_flags}", + "aarch64", + ), ] x86_64_flags = "-mavx2 -mbmi2 -msse4 -fcf-protection=full" @@ -3301,6 +3328,7 @@ def _main(): def gen_zeta_tables(): gen_c_zeta_file() gen_aarch64_zeta_file() + gen_aarch64_hol_light_zeta_file() gen_aarch64_rej_uniform_table() gen_aarch64_rej_uniform_eta_table() gen_aarch64_polyz_unpack_table()