diff --git a/wolfcrypt/src/port/arm/armv8-32-aes-asm.S b/wolfcrypt/src/port/arm/armv8-32-aes-asm.S index 2112845ce06..7650a653bc1 100644 --- a/wolfcrypt/src/port/arm/armv8-32-aes-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-aes-asm.S @@ -8367,37 +8367,36 @@ L_AES_set_encrypt_key_loop_256: add r2, r2, #16 stm r2, {r4, r5, r6, r7} sub r2, r2, #16 - mov r3, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) - lsl r4, r3, #16 + lsl r4, r7, #16 lsr r4, r4, #24 #else - uxtb r4, r3, ror #8 + uxtb r4, r7, ror #8 #endif #else - ubfx r4, r3, #8, #8 + ubfx r4, r7, #8, #8 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) - lsl r5, r3, #8 + lsl r5, r7, #8 lsr r5, r5, #24 #else - uxtb r5, r3, ror #16 + uxtb r5, r7, ror #16 #endif #else - ubfx r5, r3, #16, #8 + ubfx r5, r7, #16, #8 #endif - lsr r6, r3, #24 + lsr r6, r7, #24 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) - lsl r3, r3, #24 + lsl r3, r7, #24 lsr r3, r3, #24 #else - uxtb r3, r3 + uxtb r3, r7 #endif #else - ubfx r3, r3, #0, #8 + ubfx r3, r7, #0, #8 #endif ldrb r4, [r8, r4, lsl #2] ldrb r6, [r8, r6, lsl #2] @@ -23252,14 +23251,22 @@ L_GCM_gmult_len_start_block: ldr r12, [r0, #12] ldr r3, [r2, #12] eor r12, r12, r3 - lsr r3, r12, #24 - and r3, r3, #15 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r3, r12, #4 + lsr r3, r3, #28 +#else + ubfx r3, r12, #24, #4 +#endif add r3, r1, r3, lsl #4 ldm r3, {r8, r9, r10, r11} lsr r6, r10, #4 and r3, r11, #15 lsr r11, r11, #4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) lsr r4, r12, #28 +#else + ubfx r4, r12, #28, #4 +#endif eor r11, r11, r10, lsl #28 ldr r3, [lr, r3, lsl #2] add r4, r1, r4, lsl #4 @@ -23275,9 +23282,13 @@ L_GCM_gmult_len_start_block: lsr r6, r10, #4 and r3, r11, #15 lsr r11, r11, #4 - lsr r4, r12, #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r4, r12, #12 + lsr r4, r4, #28 +#else + ubfx r4, r12, #16, #4 +#endif eor r11, r11, r10, lsl #28 - and r4, r4, #15 ldr r3, [lr, r3, lsl #2] add r4, r1, r4, lsl #4 eor r10, r6, r9, lsl #28 @@ -23292,9 +23303,13 @@ L_GCM_gmult_len_start_block: lsr r6, r10, #4 and r3, r11, #15 lsr r11, r11, #4 - lsr r4, r12, #20 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r4, r12, #8 + lsr r4, r4, #28 +#else + ubfx r4, r12, #20, #4 +#endif eor r11, r11, r10, lsl #28 - and r4, r4, #15 ldr r3, [lr, r3, lsl #2] add r4, r1, r4, lsl #4 eor r10, r6, r9, lsl #28 @@ -23309,9 +23324,13 @@ L_GCM_gmult_len_start_block: lsr r6, r10, #4 and r3, r11, #15 lsr r11, r11, #4 - lsr r4, r12, #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r4, r12, #20 + lsr r4, r4, #28 +#else + ubfx r4, r12, #8, #4 +#endif eor r11, r11, r10, lsl #28 - and r4, r4, #15 ldr r3, [lr, r3, lsl #2] add r4, r1, r4, lsl #4 eor r10, r6, r9, lsl #28 @@ -23326,9 +23345,13 @@ L_GCM_gmult_len_start_block: lsr r6, r10, #4 and r3, r11, #15 lsr r11, r11, #4 - lsr r4, r12, #12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r4, r12, #16 + lsr r4, r4, #28 +#else + ubfx r4, r12, #12, #4 +#endif eor r11, r11, r10, lsl #28 - and r4, r4, #15 ldr r3, [lr, r3, lsl #2] add r4, r1, r4, lsl #4 eor r10, r6, r9, lsl #28 @@ -23359,9 +23382,13 @@ L_GCM_gmult_len_start_block: lsr r6, r10, #4 and r3, r11, #15 lsr r11, r11, #4 - lsr r4, r12, #4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r4, r12, #24 + lsr r4, r4, #28 +#else + ubfx r4, r12, #4, #4 +#endif eor r11, r11, r10, lsl #28 - and r4, r4, #15 ldr r3, [lr, r3, lsl #2] add r4, r1, r4, lsl #4 eor r10, r6, r9, lsl #28 @@ -23385,8 +23412,12 @@ L_GCM_gmult_len_start_block: ldr r12, [r0, #8] ldr r3, [r2, #8] eor r12, r12, r3 - lsr r3, r12, #24 - and r3, r3, #15 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r3, r12, #4 + lsr r3, r3, #28 +#else + ubfx r3, r12, #24, #4 +#endif add r3, r1, r3, lsl #4 ldm r3, {r4, r5, r6, r7} eor r8, r8, r4 @@ -23396,7 +23427,11 @@ L_GCM_gmult_len_start_block: lsr r6, r10, #4 and r3, r11, #15 lsr r11, r11, #4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) lsr r4, r12, #28 +#else + ubfx r4, r12, #28, #4 +#endif eor r11, r11, r10, lsl #28 ldr r3, [lr, r3, lsl #2] add r4, r1, r4, lsl #4 @@ -23412,9 +23447,13 @@ L_GCM_gmult_len_start_block: lsr r6, r10, #4 and r3, r11, #15 lsr r11, r11, #4 - lsr r4, r12, #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r4, r12, #12 + lsr r4, r4, #28 +#else + ubfx r4, r12, #16, #4 +#endif eor r11, r11, r10, lsl #28 - and r4, r4, #15 ldr r3, [lr, r3, lsl #2] add r4, r1, r4, lsl #4 eor r10, r6, r9, lsl #28 @@ -23429,9 +23468,13 @@ L_GCM_gmult_len_start_block: lsr r6, r10, #4 and r3, r11, #15 lsr r11, r11, #4 - lsr r4, r12, #20 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r4, r12, #8 + lsr r4, r4, #28 +#else + ubfx r4, r12, #20, #4 +#endif eor r11, r11, r10, lsl #28 - and r4, r4, #15 ldr r3, [lr, r3, lsl #2] add r4, r1, r4, lsl #4 eor r10, r6, r9, lsl #28 @@ -23446,9 +23489,13 @@ L_GCM_gmult_len_start_block: lsr r6, r10, #4 and r3, r11, #15 lsr r11, r11, #4 - lsr r4, r12, #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r4, r12, #20 + lsr r4, r4, #28 +#else + ubfx r4, r12, #8, #4 +#endif eor r11, r11, r10, lsl #28 - and r4, r4, #15 ldr r3, [lr, r3, lsl #2] add r4, r1, r4, lsl #4 eor r10, r6, r9, lsl #28 @@ -23463,9 +23510,13 @@ L_GCM_gmult_len_start_block: lsr r6, r10, #4 and r3, r11, #15 lsr r11, r11, #4 - lsr r4, r12, #12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r4, r12, #16 + lsr r4, r4, #28 +#else + ubfx r4, r12, #12, #4 +#endif eor r11, r11, r10, lsl #28 - and r4, r4, #15 ldr r3, [lr, r3, lsl #2] add r4, r1, r4, lsl #4 eor r10, r6, r9, lsl #28 @@ -23496,9 +23547,13 @@ L_GCM_gmult_len_start_block: lsr r6, r10, #4 and r3, r11, #15 lsr r11, r11, #4 - lsr r4, r12, #4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r4, r12, #24 + lsr r4, r4, #28 +#else + ubfx r4, r12, #4, #4 +#endif eor r11, r11, r10, lsl #28 - and r4, r4, #15 ldr r3, [lr, r3, lsl #2] add r4, r1, r4, lsl #4 eor r10, r6, r9, lsl #28 @@ -23522,8 +23577,12 @@ L_GCM_gmult_len_start_block: ldr r12, [r0, #4] ldr r3, [r2, #4] eor r12, r12, r3 - lsr r3, r12, #24 - and r3, r3, #15 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r3, r12, #4 + lsr r3, r3, #28 +#else + ubfx r3, r12, #24, #4 +#endif add r3, r1, r3, lsl #4 ldm r3, {r4, r5, r6, r7} eor r8, r8, r4 @@ -23533,7 +23592,11 @@ L_GCM_gmult_len_start_block: lsr r6, r10, #4 and r3, r11, #15 lsr r11, r11, #4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) lsr r4, r12, #28 +#else + ubfx r4, r12, #28, #4 +#endif eor r11, r11, r10, lsl #28 ldr r3, [lr, r3, lsl #2] add r4, r1, r4, lsl #4 @@ -23549,9 +23612,13 @@ L_GCM_gmult_len_start_block: lsr r6, r10, #4 and r3, r11, #15 lsr r11, r11, #4 - lsr r4, r12, #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r4, r12, #12 + lsr r4, r4, #28 +#else + ubfx r4, r12, #16, #4 +#endif eor r11, r11, r10, lsl #28 - and r4, r4, #15 ldr r3, [lr, r3, lsl #2] add r4, r1, r4, lsl #4 eor r10, r6, r9, lsl #28 @@ -23566,9 +23633,13 @@ L_GCM_gmult_len_start_block: lsr r6, r10, #4 and r3, r11, #15 lsr r11, r11, #4 - lsr r4, r12, #20 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r4, r12, #8 + lsr r4, r4, #28 +#else + ubfx r4, r12, #20, #4 +#endif eor r11, r11, r10, lsl #28 - and r4, r4, #15 ldr r3, [lr, r3, lsl #2] add r4, r1, r4, lsl #4 eor r10, r6, r9, lsl #28 @@ -23583,9 +23654,13 @@ L_GCM_gmult_len_start_block: lsr r6, r10, #4 and r3, r11, #15 lsr r11, r11, #4 - lsr r4, r12, #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r4, r12, #20 + lsr r4, r4, #28 +#else + ubfx r4, r12, #8, #4 +#endif eor r11, r11, r10, lsl #28 - and r4, r4, #15 ldr r3, [lr, r3, lsl #2] add r4, r1, r4, lsl #4 eor r10, r6, r9, lsl #28 @@ -23600,9 +23675,13 @@ L_GCM_gmult_len_start_block: lsr r6, r10, #4 and r3, r11, #15 lsr r11, r11, #4 - lsr r4, r12, #12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r4, r12, #16 + lsr r4, r4, #28 +#else + ubfx r4, r12, #12, #4 +#endif eor r11, r11, r10, lsl #28 - and r4, r4, #15 ldr r3, [lr, r3, lsl #2] add r4, r1, r4, lsl #4 eor r10, r6, r9, lsl #28 @@ -23633,9 +23712,13 @@ L_GCM_gmult_len_start_block: lsr r6, r10, #4 and r3, r11, #15 lsr r11, r11, #4 - lsr r4, r12, #4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r4, r12, #24 + lsr r4, r4, #28 +#else + ubfx r4, r12, #4, #4 +#endif eor r11, r11, r10, lsl #28 - and r4, r4, #15 ldr r3, [lr, r3, lsl #2] add r4, r1, r4, lsl #4 eor r10, r6, r9, lsl #28 @@ -23659,8 +23742,12 @@ L_GCM_gmult_len_start_block: ldr r12, [r0] ldr r3, [r2] eor r12, r12, r3 - lsr r3, r12, #24 - and r3, r3, #15 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r3, r12, #4 + lsr r3, r3, #28 +#else + ubfx r3, r12, #24, #4 +#endif add r3, r1, r3, lsl #4 ldm r3, {r4, r5, r6, r7} eor r8, r8, r4 @@ -23670,7 +23757,11 @@ L_GCM_gmult_len_start_block: lsr r6, r10, #4 and r3, r11, #15 lsr r11, r11, #4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) lsr r4, r12, #28 +#else + ubfx r4, r12, #28, #4 +#endif eor r11, r11, r10, lsl #28 ldr r3, [lr, r3, lsl #2] add r4, r1, r4, lsl #4 @@ -23686,9 +23777,13 @@ L_GCM_gmult_len_start_block: lsr r6, r10, #4 and r3, r11, #15 lsr r11, r11, #4 - lsr r4, r12, #16 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r4, r12, #12 + lsr r4, r4, #28 +#else + ubfx r4, r12, #16, #4 +#endif eor r11, r11, r10, lsl #28 - and r4, r4, #15 ldr r3, [lr, r3, lsl #2] add r4, r1, r4, lsl #4 eor r10, r6, r9, lsl #28 @@ -23703,9 +23798,13 @@ L_GCM_gmult_len_start_block: lsr r6, r10, #4 and r3, r11, #15 lsr r11, r11, #4 - lsr r4, r12, #20 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r4, r12, #8 + lsr r4, r4, #28 +#else + ubfx r4, r12, #20, #4 +#endif eor r11, r11, r10, lsl #28 - and r4, r4, #15 ldr r3, [lr, r3, lsl #2] add r4, r1, r4, lsl #4 eor r10, r6, r9, lsl #28 @@ -23720,9 +23819,13 @@ L_GCM_gmult_len_start_block: lsr r6, r10, #4 and r3, r11, #15 lsr r11, r11, #4 - lsr r4, r12, #8 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r4, r12, #20 + lsr r4, r4, #28 +#else + ubfx r4, r12, #8, #4 +#endif eor r11, r11, r10, lsl #28 - and r4, r4, #15 ldr r3, [lr, r3, lsl #2] add r4, r1, r4, lsl #4 eor r10, r6, r9, lsl #28 @@ -23737,9 +23840,13 @@ L_GCM_gmult_len_start_block: lsr r6, r10, #4 and r3, r11, #15 lsr r11, r11, #4 - lsr r4, r12, #12 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r4, r12, #16 + lsr r4, r4, #28 +#else + ubfx r4, r12, #12, #4 +#endif eor r11, r11, r10, lsl #28 - and r4, r4, #15 ldr r3, [lr, r3, lsl #2] add r4, r1, r4, lsl #4 eor r10, r6, r9, lsl #28 @@ -23770,9 +23877,13 @@ L_GCM_gmult_len_start_block: lsr r6, r10, #4 and r3, r11, #15 lsr r11, r11, #4 - lsr r4, r12, #4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + lsl r4, r12, #24 + lsr r4, r4, #28 +#else + ubfx r4, r12, #4, #4 +#endif eor r11, r11, r10, lsl #28 - and r4, r4, #15 ldr r3, [lr, r3, lsl #2] add r4, r1, r4, lsl #4 eor r10, r6, r9, lsl #28 diff --git a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c index 52adcfc5f24..4e26c59de46 100644 --- a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c @@ -8993,37 +8993,36 @@ WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key, "add %[ks], %[ks], #16\n\t" "stm %[ks], {r4, r5, r6, r7}\n\t" "sub %[ks], %[ks], #16\n\t" - "mov r3, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) - "lsl r4, r3, #16\n\t" + "lsl r4, r7, #16\n\t" "lsr r4, r4, #24\n\t" #else - "uxtb r4, r3, ror #8\n\t" + "uxtb r4, r7, ror #8\n\t" #endif #else - "ubfx r4, r3, #8, #8\n\t" + "ubfx r4, r7, #8, #8\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) - "lsl r5, r3, #8\n\t" + "lsl r5, r7, #8\n\t" "lsr r5, r5, #24\n\t" #else - "uxtb r5, r3, ror #16\n\t" + "uxtb r5, r7, ror #16\n\t" #endif #else - "ubfx r5, r3, #16, #8\n\t" + "ubfx r5, r7, #16, #8\n\t" #endif - "lsr r6, r3, #24\n\t" + "lsr r6, r7, #24\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) - "lsl r3, r3, #24\n\t" + "lsl r3, r7, #24\n\t" "lsr r3, r3, #24\n\t" #else - "uxtb r3, r3\n\t" + "uxtb r3, r7\n\t" #endif #else - "ubfx r3, r3, #0, #8\n\t" + "ubfx r3, r7, #0, #8\n\t" #endif "ldrb r4, [r8, r4, lsl #2]\n\t" "ldrb r6, [r8, r6, lsl #2]\n\t" @@ -24111,14 +24110,22 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "ldr r12, [r0, #12]\n\t" "ldr %[len], [r2, #12]\n\t" "eor r12, r12, %[len]\n\t" - "lsr %[len], r12, #24\n\t" - "and %[len], %[len], #15\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl %[len], r12, #4\n\t" + "lsr %[len], %[len], #28\n\t" +#else + "ubfx %[len], r12, #24, #4\n\t" +#endif "add %[len], %[m], %[len], lsl #4\n\t" "ldm %[len], {r8, r9, r10, r11}\n\t" "lsr r6, r10, #4\n\t" "and %[len], r11, #15\n\t" "lsr r11, r11, #4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsr r4, r12, #28\n\t" +#else + "ubfx r4, r12, #28, #4\n\t" +#endif "eor r11, r11, r10, lsl #28\n\t" "ldr %[len], [lr, r3, lsl #2]\n\t" "add r4, %[m], r4, lsl #4\n\t" @@ -24134,9 +24141,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "lsr r6, r10, #4\n\t" "and %[len], r11, #15\n\t" "lsr r11, r11, #4\n\t" - "lsr r4, r12, #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r4, r12, #12\n\t" + "lsr r4, r4, #28\n\t" +#else + "ubfx r4, r12, #16, #4\n\t" +#endif "eor r11, r11, r10, lsl #28\n\t" - "and r4, r4, #15\n\t" "ldr %[len], [lr, r3, lsl #2]\n\t" "add r4, %[m], r4, lsl #4\n\t" "eor r10, r6, r9, lsl #28\n\t" @@ -24151,9 +24162,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "lsr r6, r10, #4\n\t" "and %[len], r11, #15\n\t" "lsr r11, r11, #4\n\t" - "lsr r4, r12, #20\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r4, r12, #8\n\t" + "lsr r4, r4, #28\n\t" +#else + "ubfx r4, r12, #20, #4\n\t" +#endif "eor r11, r11, r10, lsl #28\n\t" - "and r4, r4, #15\n\t" "ldr %[len], [lr, r3, lsl #2]\n\t" "add r4, %[m], r4, lsl #4\n\t" "eor r10, r6, r9, lsl #28\n\t" @@ -24168,9 +24183,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "lsr r6, r10, #4\n\t" "and %[len], r11, #15\n\t" "lsr r11, r11, #4\n\t" - "lsr r4, r12, #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r4, r12, #20\n\t" + "lsr r4, r4, #28\n\t" +#else + "ubfx r4, r12, #8, #4\n\t" +#endif "eor r11, r11, r10, lsl #28\n\t" - "and r4, r4, #15\n\t" "ldr %[len], [lr, r3, lsl #2]\n\t" "add r4, %[m], r4, lsl #4\n\t" "eor r10, r6, r9, lsl #28\n\t" @@ -24185,9 +24204,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "lsr r6, r10, #4\n\t" "and %[len], r11, #15\n\t" "lsr r11, r11, #4\n\t" - "lsr r4, r12, #12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r4, r12, #16\n\t" + "lsr r4, r4, #28\n\t" +#else + "ubfx r4, r12, #12, #4\n\t" +#endif "eor r11, r11, r10, lsl #28\n\t" - "and r4, r4, #15\n\t" "ldr %[len], [lr, r3, lsl #2]\n\t" "add r4, %[m], r4, lsl #4\n\t" "eor r10, r6, r9, lsl #28\n\t" @@ -24218,9 +24241,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "lsr r6, r10, #4\n\t" "and %[len], r11, #15\n\t" "lsr r11, r11, #4\n\t" - "lsr r4, r12, #4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r4, r12, #24\n\t" + "lsr r4, r4, #28\n\t" +#else + "ubfx r4, r12, #4, #4\n\t" +#endif "eor r11, r11, r10, lsl #28\n\t" - "and r4, r4, #15\n\t" "ldr %[len], [lr, r3, lsl #2]\n\t" "add r4, %[m], r4, lsl #4\n\t" "eor r10, r6, r9, lsl #28\n\t" @@ -24244,8 +24271,12 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "ldr r12, [r0, #8]\n\t" "ldr %[len], [r2, #8]\n\t" "eor r12, r12, %[len]\n\t" - "lsr %[len], r12, #24\n\t" - "and %[len], %[len], #15\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl %[len], r12, #4\n\t" + "lsr %[len], %[len], #28\n\t" +#else + "ubfx %[len], r12, #24, #4\n\t" +#endif "add %[len], %[m], %[len], lsl #4\n\t" "ldm %[len], {r4, r5, r6, r7}\n\t" "eor r8, r8, r4\n\t" @@ -24255,7 +24286,11 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "lsr r6, r10, #4\n\t" "and %[len], r11, #15\n\t" "lsr r11, r11, #4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsr r4, r12, #28\n\t" +#else + "ubfx r4, r12, #28, #4\n\t" +#endif "eor r11, r11, r10, lsl #28\n\t" "ldr %[len], [lr, r3, lsl #2]\n\t" "add r4, %[m], r4, lsl #4\n\t" @@ -24271,9 +24306,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "lsr r6, r10, #4\n\t" "and %[len], r11, #15\n\t" "lsr r11, r11, #4\n\t" - "lsr r4, r12, #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r4, r12, #12\n\t" + "lsr r4, r4, #28\n\t" +#else + "ubfx r4, r12, #16, #4\n\t" +#endif "eor r11, r11, r10, lsl #28\n\t" - "and r4, r4, #15\n\t" "ldr %[len], [lr, r3, lsl #2]\n\t" "add r4, %[m], r4, lsl #4\n\t" "eor r10, r6, r9, lsl #28\n\t" @@ -24288,9 +24327,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "lsr r6, r10, #4\n\t" "and %[len], r11, #15\n\t" "lsr r11, r11, #4\n\t" - "lsr r4, r12, #20\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r4, r12, #8\n\t" + "lsr r4, r4, #28\n\t" +#else + "ubfx r4, r12, #20, #4\n\t" +#endif "eor r11, r11, r10, lsl #28\n\t" - "and r4, r4, #15\n\t" "ldr %[len], [lr, r3, lsl #2]\n\t" "add r4, %[m], r4, lsl #4\n\t" "eor r10, r6, r9, lsl #28\n\t" @@ -24305,9 +24348,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "lsr r6, r10, #4\n\t" "and %[len], r11, #15\n\t" "lsr r11, r11, #4\n\t" - "lsr r4, r12, #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r4, r12, #20\n\t" + "lsr r4, r4, #28\n\t" +#else + "ubfx r4, r12, #8, #4\n\t" +#endif "eor r11, r11, r10, lsl #28\n\t" - "and r4, r4, #15\n\t" "ldr %[len], [lr, r3, lsl #2]\n\t" "add r4, %[m], r4, lsl #4\n\t" "eor r10, r6, r9, lsl #28\n\t" @@ -24322,9 +24369,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "lsr r6, r10, #4\n\t" "and %[len], r11, #15\n\t" "lsr r11, r11, #4\n\t" - "lsr r4, r12, #12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r4, r12, #16\n\t" + "lsr r4, r4, #28\n\t" +#else + "ubfx r4, r12, #12, #4\n\t" +#endif "eor r11, r11, r10, lsl #28\n\t" - "and r4, r4, #15\n\t" "ldr %[len], [lr, r3, lsl #2]\n\t" "add r4, %[m], r4, lsl #4\n\t" "eor r10, r6, r9, lsl #28\n\t" @@ -24355,9 +24406,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "lsr r6, r10, #4\n\t" "and %[len], r11, #15\n\t" "lsr r11, r11, #4\n\t" - "lsr r4, r12, #4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r4, r12, #24\n\t" + "lsr r4, r4, #28\n\t" +#else + "ubfx r4, r12, #4, #4\n\t" +#endif "eor r11, r11, r10, lsl #28\n\t" - "and r4, r4, #15\n\t" "ldr %[len], [lr, r3, lsl #2]\n\t" "add r4, %[m], r4, lsl #4\n\t" "eor r10, r6, r9, lsl #28\n\t" @@ -24381,8 +24436,12 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "ldr r12, [r0, #4]\n\t" "ldr %[len], [r2, #4]\n\t" "eor r12, r12, %[len]\n\t" - "lsr %[len], r12, #24\n\t" - "and %[len], %[len], #15\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl %[len], r12, #4\n\t" + "lsr %[len], %[len], #28\n\t" +#else + "ubfx %[len], r12, #24, #4\n\t" +#endif "add %[len], %[m], %[len], lsl #4\n\t" "ldm %[len], {r4, r5, r6, r7}\n\t" "eor r8, r8, r4\n\t" @@ -24392,7 +24451,11 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "lsr r6, r10, #4\n\t" "and %[len], r11, #15\n\t" "lsr r11, r11, #4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsr r4, r12, #28\n\t" +#else + "ubfx r4, r12, #28, #4\n\t" +#endif "eor r11, r11, r10, lsl #28\n\t" "ldr %[len], [lr, r3, lsl #2]\n\t" "add r4, %[m], r4, lsl #4\n\t" @@ -24408,9 +24471,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "lsr r6, r10, #4\n\t" "and %[len], r11, #15\n\t" "lsr r11, r11, #4\n\t" - "lsr r4, r12, #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r4, r12, #12\n\t" + "lsr r4, r4, #28\n\t" +#else + "ubfx r4, r12, #16, #4\n\t" +#endif "eor r11, r11, r10, lsl #28\n\t" - "and r4, r4, #15\n\t" "ldr %[len], [lr, r3, lsl #2]\n\t" "add r4, %[m], r4, lsl #4\n\t" "eor r10, r6, r9, lsl #28\n\t" @@ -24425,9 +24492,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "lsr r6, r10, #4\n\t" "and %[len], r11, #15\n\t" "lsr r11, r11, #4\n\t" - "lsr r4, r12, #20\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r4, r12, #8\n\t" + "lsr r4, r4, #28\n\t" +#else + "ubfx r4, r12, #20, #4\n\t" +#endif "eor r11, r11, r10, lsl #28\n\t" - "and r4, r4, #15\n\t" "ldr %[len], [lr, r3, lsl #2]\n\t" "add r4, %[m], r4, lsl #4\n\t" "eor r10, r6, r9, lsl #28\n\t" @@ -24442,9 +24513,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "lsr r6, r10, #4\n\t" "and %[len], r11, #15\n\t" "lsr r11, r11, #4\n\t" - "lsr r4, r12, #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r4, r12, #20\n\t" + "lsr r4, r4, #28\n\t" +#else + "ubfx r4, r12, #8, #4\n\t" +#endif "eor r11, r11, r10, lsl #28\n\t" - "and r4, r4, #15\n\t" "ldr %[len], [lr, r3, lsl #2]\n\t" "add r4, %[m], r4, lsl #4\n\t" "eor r10, r6, r9, lsl #28\n\t" @@ -24459,9 +24534,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "lsr r6, r10, #4\n\t" "and %[len], r11, #15\n\t" "lsr r11, r11, #4\n\t" - "lsr r4, r12, #12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r4, r12, #16\n\t" + "lsr r4, r4, #28\n\t" +#else + "ubfx r4, r12, #12, #4\n\t" +#endif "eor r11, r11, r10, lsl #28\n\t" - "and r4, r4, #15\n\t" "ldr %[len], [lr, r3, lsl #2]\n\t" "add r4, %[m], r4, lsl #4\n\t" "eor r10, r6, r9, lsl #28\n\t" @@ -24492,9 +24571,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "lsr r6, r10, #4\n\t" "and %[len], r11, #15\n\t" "lsr r11, r11, #4\n\t" - "lsr r4, r12, #4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r4, r12, #24\n\t" + "lsr r4, r4, #28\n\t" +#else + "ubfx r4, r12, #4, #4\n\t" +#endif "eor r11, r11, r10, lsl #28\n\t" - "and r4, r4, #15\n\t" "ldr %[len], [lr, r3, lsl #2]\n\t" "add r4, %[m], r4, lsl #4\n\t" "eor r10, r6, r9, lsl #28\n\t" @@ -24518,8 +24601,12 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "ldr r12, [r0]\n\t" "ldr %[len], [r2]\n\t" "eor r12, r12, %[len]\n\t" - "lsr %[len], r12, #24\n\t" - "and %[len], %[len], #15\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl %[len], r12, #4\n\t" + "lsr %[len], %[len], #28\n\t" +#else + "ubfx %[len], r12, #24, #4\n\t" +#endif "add %[len], %[m], %[len], lsl #4\n\t" "ldm %[len], {r4, r5, r6, r7}\n\t" "eor r8, r8, r4\n\t" @@ -24529,7 +24616,11 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "lsr r6, r10, #4\n\t" "and %[len], r11, #15\n\t" "lsr r11, r11, #4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsr r4, r12, #28\n\t" +#else + "ubfx r4, r12, #28, #4\n\t" +#endif "eor r11, r11, r10, lsl #28\n\t" "ldr %[len], [lr, r3, lsl #2]\n\t" "add r4, %[m], r4, lsl #4\n\t" @@ -24545,9 +24636,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "lsr r6, r10, #4\n\t" "and %[len], r11, #15\n\t" "lsr r11, r11, #4\n\t" - "lsr r4, r12, #16\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r4, r12, #12\n\t" + "lsr r4, r4, #28\n\t" +#else + "ubfx r4, r12, #16, #4\n\t" +#endif "eor r11, r11, r10, lsl #28\n\t" - "and r4, r4, #15\n\t" "ldr %[len], [lr, r3, lsl #2]\n\t" "add r4, %[m], r4, lsl #4\n\t" "eor r10, r6, r9, lsl #28\n\t" @@ -24562,9 +24657,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "lsr r6, r10, #4\n\t" "and %[len], r11, #15\n\t" "lsr r11, r11, #4\n\t" - "lsr r4, r12, #20\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r4, r12, #8\n\t" + "lsr r4, r4, #28\n\t" +#else + "ubfx r4, r12, #20, #4\n\t" +#endif "eor r11, r11, r10, lsl #28\n\t" - "and r4, r4, #15\n\t" "ldr %[len], [lr, r3, lsl #2]\n\t" "add r4, %[m], r4, lsl #4\n\t" "eor r10, r6, r9, lsl #28\n\t" @@ -24579,9 +24678,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "lsr r6, r10, #4\n\t" "and %[len], r11, #15\n\t" "lsr r11, r11, #4\n\t" - "lsr r4, r12, #8\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r4, r12, #20\n\t" + "lsr r4, r4, #28\n\t" +#else + "ubfx r4, r12, #8, #4\n\t" +#endif "eor r11, r11, r10, lsl #28\n\t" - "and r4, r4, #15\n\t" "ldr %[len], [lr, r3, lsl #2]\n\t" "add r4, %[m], r4, lsl #4\n\t" "eor r10, r6, r9, lsl #28\n\t" @@ -24596,9 +24699,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "lsr r6, r10, #4\n\t" "and %[len], r11, #15\n\t" "lsr r11, r11, #4\n\t" - "lsr r4, r12, #12\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r4, r12, #16\n\t" + "lsr r4, r4, #28\n\t" +#else + "ubfx r4, r12, #12, #4\n\t" +#endif "eor r11, r11, r10, lsl #28\n\t" - "and r4, r4, #15\n\t" "ldr %[len], [lr, r3, lsl #2]\n\t" "add r4, %[m], r4, lsl #4\n\t" "eor r10, r6, r9, lsl #28\n\t" @@ -24629,9 +24736,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "lsr r6, r10, #4\n\t" "and %[len], r11, #15\n\t" "lsr r11, r11, #4\n\t" - "lsr r4, r12, #4\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "lsl r4, r12, #24\n\t" + "lsr r4, r4, #28\n\t" +#else + "ubfx r4, r12, #4, #4\n\t" +#endif "eor r11, r11, r10, lsl #28\n\t" - "and r4, r4, #15\n\t" "ldr %[len], [lr, r3, lsl #2]\n\t" "add r4, %[m], r4, lsl #4\n\t" "eor r10, r6, r9, lsl #28\n\t" diff --git a/wolfcrypt/src/port/arm/armv8-32-curve25519.S b/wolfcrypt/src/port/arm/armv8-32-curve25519.S index 7171e8c060c..539f3ac2191 100644 --- a/wolfcrypt/src/port/arm/armv8-32-curve25519.S +++ b/wolfcrypt/src/port/arm/armv8-32-curve25519.S @@ -155,8 +155,7 @@ fe_add_sub_op: #endif # Sub sbcs r10, r4, r6 - sbcs r11, r5, r7 - sbc lr, lr, lr + sbc r11, r5, r7 # Add subs r12, r12, #1 adcs r8, r4, r6 @@ -222,12 +221,9 @@ fe_add_sub_op: #else strd r8, r9, [r0, #24] #endif - # Multiply -modulus by underflow - lsl r3, lr, #1 - mvn lr, #18 - orr r3, r3, r11, lsr #31 - mul lr, r3, lr - # Sub -x*modulus (if overflow) + # Add -modulus on underflow + mov lr, #19 + and lr, lr, r11, asr #31 ldm r1, {r4, r5, r6, r7, r8, r9} subs r4, r4, lr sbcs r5, r5, #0 @@ -263,12 +259,9 @@ fe_sub_op: sbcs r10, r2, r10 sbcs r11, r3, r11 sbcs r12, r4, r12 - sbcs lr, r5, lr - sbc r3, r3, r3 - mvn r2, #18 - lsl r3, r3, #1 - orr r3, r3, lr, lsr #31 - mul r2, r3, r2 + sbc lr, r5, lr + mov r2, #19 + and r2, r2, lr, asr #31 subs r6, r6, r2 sbcs r7, r7, #0 sbcs r8, r8, #0 @@ -312,13 +305,9 @@ fe_add_op: adcs r10, r2, r10 adcs r11, r3, r11 adcs r12, r4, r12 - mov r3, #0 - adcs lr, r5, lr - adc r3, r3, #0 + adc lr, r5, lr mov r2, #19 - lsl r3, r3, #1 - orr r3, r3, lr, lsr #31 - mul r2, r3, r2 + and r2, r2, lr, asr #31 adds r6, r6, r2 adcs r7, r7, #0 adcs r8, r8, #0 @@ -575,6 +564,7 @@ fe_isnonzero: fe_isnegative: push {r4, r5, lr} ldm r0!, {r2, r3, r4, r5} + and r12, r2, #1 adds r1, r2, #19 adcs r1, r3, #0 adcs r1, r4, #0 @@ -583,11 +573,9 @@ fe_isnegative: adcs r1, r2, #0 adcs r1, r3, #0 adcs r1, r4, #0 - ldr r2, [r0, #-16] adc r1, r5, #0 - and r0, r2, #1 lsr r1, r1, #31 - eor r0, r0, r1 + eor r0, r12, r1 pop {r4, r5, pc} .size fe_isnegative,.-fe_isnegative #if defined(HAVE_ED25519_MAKE_KEY) || defined(HAVE_ED25519_SIGN) || defined(WOLFSSL_CURVE25519_USE_ED25519) diff --git a/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c b/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c index a758fbd3ba6..7b4925b8933 100644 --- a/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c @@ -59,9 +59,9 @@ #if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL) #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_init(void) +WC_OMIT_FRAME_POINTER void fe_init() #else -WC_OMIT_FRAME_POINTER void fe_init(void) +WC_OMIT_FRAME_POINTER void fe_init() #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -81,9 +81,9 @@ WC_OMIT_FRAME_POINTER void fe_init(void) void fe_add_sub_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_add_sub_op(void) +WC_OMIT_FRAME_POINTER void fe_add_sub_op() #else -WC_OMIT_FRAME_POINTER void fe_add_sub_op(void) +WC_OMIT_FRAME_POINTER void fe_add_sub_op() #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -198,8 +198,7 @@ WC_OMIT_FRAME_POINTER void fe_add_sub_op(void) #endif /* Sub */ "sbcs r10, r4, r6\n\t" - "sbcs r11, r5, r7\n\t" - "sbc lr, lr, lr\n\t" + "sbc r11, r5, r7\n\t" /* Add */ "subs r12, r12, #1\n\t" "adcs r8, r4, r6\n\t" @@ -265,12 +264,9 @@ WC_OMIT_FRAME_POINTER void fe_add_sub_op(void) #else "strd r8, r9, [r0, #24]\n\t" #endif - /* Multiply -modulus by underflow */ - "lsl r3, lr, #1\n\t" - "mvn lr, #18\n\t" - "orr r3, r3, r11, lsr #31\n\t" - "mul lr, r3, lr\n\t" - /* Sub -x*modulus (if overflow) */ + /* Add -modulus on underflow */ + "mov lr, #19\n\t" + "and lr, lr, r11, asr #31\n\t" "ldm r1, {r4, r5, r6, r7, r8, r9}\n\t" "subs r4, r4, lr\n\t" "sbcs r5, r5, #0\n\t" @@ -300,9 +296,9 @@ WC_OMIT_FRAME_POINTER void fe_add_sub_op(void) void fe_sub_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_sub_op(void) +WC_OMIT_FRAME_POINTER void fe_sub_op() #else -WC_OMIT_FRAME_POINTER void fe_sub_op(void) +WC_OMIT_FRAME_POINTER void fe_sub_op() #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -319,12 +315,9 @@ WC_OMIT_FRAME_POINTER void fe_sub_op(void) "sbcs r10, r2, r10\n\t" "sbcs r11, r3, r11\n\t" "sbcs r12, r4, r12\n\t" - "sbcs lr, r5, lr\n\t" - "sbc r3, r3, r3\n\t" - "mvn r2, #18\n\t" - "lsl r3, r3, #1\n\t" - "orr r3, r3, lr, lsr #31\n\t" - "mul r2, r3, r2\n\t" + "sbc lr, r5, lr\n\t" + "mov r2, #19\n\t" + "and r2, r2, lr, asr #31\n\t" "subs r6, r6, r2\n\t" "sbcs r7, r7, #0\n\t" "sbcs r8, r8, #0\n\t" @@ -379,9 +372,9 @@ WC_OMIT_FRAME_POINTER void fe_sub(fe r, const fe a, const fe b) void fe_add_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_add_op(void) +WC_OMIT_FRAME_POINTER void fe_add_op() #else -WC_OMIT_FRAME_POINTER void fe_add_op(void) +WC_OMIT_FRAME_POINTER void fe_add_op() #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -398,13 +391,9 @@ WC_OMIT_FRAME_POINTER void fe_add_op(void) "adcs r10, r2, r10\n\t" "adcs r11, r3, r11\n\t" "adcs r12, r4, r12\n\t" - "mov r3, #0\n\t" - "adcs lr, r5, lr\n\t" - "adc r3, r3, #0\n\t" + "adc lr, r5, lr\n\t" "mov r2, #19\n\t" - "lsl r3, r3, #1\n\t" - "orr r3, r3, lr, lsr #31\n\t" - "mul r2, r3, r2\n\t" + "and r2, r2, lr, asr #31\n\t" "adds r6, r6, r2\n\t" "adcs r7, r7, #0\n\t" "adcs r8, r8, #0\n\t" @@ -797,6 +786,7 @@ WC_OMIT_FRAME_POINTER int fe_isnegative(const fe a) __asm__ __volatile__ ( "ldm %[a]!, {r2, r3, r4, r5}\n\t" + "and r12, r2, #1\n\t" "adds r1, r2, #19\n\t" "adcs r1, r3, #0\n\t" "adcs r1, r4, #0\n\t" @@ -805,11 +795,9 @@ WC_OMIT_FRAME_POINTER int fe_isnegative(const fe a) "adcs r1, r2, #0\n\t" "adcs r1, r3, #0\n\t" "adcs r1, r4, #0\n\t" - "ldr r2, [%[a], #-16]\n\t" "adc r1, r5, #0\n\t" - "and %[a], r2, #1\n\t" "lsr r1, r1, #31\n\t" - "eor %[a], %[a], r1\n\t" + "eor %[a], r12, r1\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a) : @@ -817,7 +805,7 @@ WC_OMIT_FRAME_POINTER int fe_isnegative(const fe a) : : [a] "r" (a) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ - : "memory", "cc", "r1", "r2", "r3", "r4", "r5" + : "memory", "cc", "r1", "r2", "r3", "r4", "r5", "r12" ); return (word32)(size_t)a; } @@ -2510,9 +2498,9 @@ WC_OMIT_FRAME_POINTER void fe_cmov_table(fe* r, const fe* base, signed char b) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) void fe_mul_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_mul_op(void) +WC_OMIT_FRAME_POINTER void fe_mul_op() #else -WC_OMIT_FRAME_POINTER void fe_mul_op(void) +WC_OMIT_FRAME_POINTER void fe_mul_op() #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -2905,9 +2893,9 @@ WC_OMIT_FRAME_POINTER void fe_mul_op(void) #else void fe_mul_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_mul_op(void) +WC_OMIT_FRAME_POINTER void fe_mul_op() #else -WC_OMIT_FRAME_POINTER void fe_mul_op(void) +WC_OMIT_FRAME_POINTER void fe_mul_op() #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -3086,9 +3074,9 @@ WC_OMIT_FRAME_POINTER void fe_mul(fe r, const fe a, const fe b) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) void fe_sq_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_sq_op(void) +WC_OMIT_FRAME_POINTER void fe_sq_op() #else -WC_OMIT_FRAME_POINTER void fe_sq_op(void) +WC_OMIT_FRAME_POINTER void fe_sq_op() #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -3374,9 +3362,9 @@ WC_OMIT_FRAME_POINTER void fe_sq_op(void) #else void fe_sq_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_sq_op(void) +WC_OMIT_FRAME_POINTER void fe_sq_op() #else -WC_OMIT_FRAME_POINTER void fe_sq_op(void) +WC_OMIT_FRAME_POINTER void fe_sq_op() #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG diff --git a/wolfcrypt/src/port/arm/armv8-aes-asm.S b/wolfcrypt/src/port/arm/armv8-aes-asm.S index 7a3d2ef74bc..fa48e67b178 100644 --- a/wolfcrypt/src/port/arm/armv8-aes-asm.S +++ b/wolfcrypt/src/port/arm/armv8-aes-asm.S @@ -5736,7 +5736,7 @@ L_aes_gcm_encrypt_arm64_crypto_nonce_end_bytes: # Done GHASH L_aes_gcm_encrypt_arm64_crypto_nonce_partial_done: eor x14, x14, x14 - ubfiz x24, x4, #3, #32 + lsl x24, x4, #3 mov v28.d[0], x14 mov v28.d[1], x24 rev64 v28.16b, v28.16b @@ -7099,10 +7099,10 @@ L_aes_gcm_encrypt_arm64_crypto_192_start_zero: # Done GHASH L_aes_gcm_encrypt_arm64_crypto_192_partial_done: ld1 {v14.2d}, [x12] - ubfiz x8, x8, #3, #32 + lsl x8, x8, #3 rbit x8, x8 mov v28.d[0], x8 - ubfiz x2, x2, #3, #32 + lsl x2, x2, #3 rbit x2, x2 mov v28.d[1], x2 eor v26.16b, v26.16b, v28.16b @@ -8637,10 +8637,10 @@ L_aes_gcm_encrypt_arm64_crypto_256_start_zero: # Done GHASH L_aes_gcm_encrypt_arm64_crypto_256_partial_done: ld1 {v14.2d}, [x12] - ubfiz x8, x8, #3, #32 + lsl x8, x8, #3 rbit x8, x8 mov v28.d[0], x8 - ubfiz x2, x2, #3, #32 + lsl x2, x2, #3 rbit x2, x2 mov v28.d[1], x2 aese v14.16b, v0.16b @@ -9944,10 +9944,10 @@ L_aes_gcm_encrypt_arm64_crypto_128_start_zero: # Done GHASH L_aes_gcm_encrypt_arm64_crypto_128_partial_done: ld1 {v14.2d}, [x12] - ubfiz x8, x8, #3, #32 + lsl x8, x8, #3 rbit x8, x8 mov v28.d[0], x8 - ubfiz x2, x2, #3, #32 + lsl x2, x2, #3 rbit x2, x2 mov v28.d[1], x2 eor v26.16b, v26.16b, v28.16b @@ -10537,7 +10537,7 @@ L_aes_gcm_decrypt_arm64_crypto_nonce_end_bytes: # Done GHASH L_aes_gcm_decrypt_arm64_crypto_nonce_partial_done: eor x14, x14, x14 - ubfiz x24, x4, #3, #32 + lsl x24, x4, #3 mov v28.d[0], x14 mov v28.d[1], x24 rev64 v28.16b, v28.16b @@ -11893,10 +11893,10 @@ L_aes_gcm_decrypt_arm64_crypto_192_out_start_byte: L_aes_gcm_decrypt_arm64_crypto_192_out_end_bytes: L_aes_gcm_decrypt_arm64_crypto_192_partial_done: ld1 {v14.2d}, [x12] - ubfiz x8, x8, #3, #32 + lsl x8, x8, #3 rbit x8, x8 mov v28.d[0], x8 - ubfiz x2, x2, #3, #32 + lsl x2, x2, #3 rbit x2, x2 mov v28.d[1], x2 eor v26.16b, v26.16b, v28.16b @@ -11945,7 +11945,6 @@ L_aes_gcm_decrypt_arm64_crypto_192_partial_done: ld1 {v28.16b}, [x5] b L_aes_gcm_decrypt_arm64_crypto_192_tag_loaded L_aes_gcm_decrypt_arm64_crypto_192_part_tag: - ubfiz x6, x6, #0, #32 eor v28.16b, v28.16b, v28.16b mov x17, x6 st1 {v28.2d}, [x11] @@ -13447,10 +13446,10 @@ L_aes_gcm_decrypt_arm64_crypto_256_out_start_byte: L_aes_gcm_decrypt_arm64_crypto_256_out_end_bytes: L_aes_gcm_decrypt_arm64_crypto_256_partial_done: ld1 {v14.2d}, [x12] - ubfiz x8, x8, #3, #32 + lsl x8, x8, #3 rbit x8, x8 mov v28.d[0], x8 - ubfiz x2, x2, #3, #32 + lsl x2, x2, #3 rbit x2, x2 mov v28.d[1], x2 aese v14.16b, v0.16b @@ -13507,7 +13506,6 @@ L_aes_gcm_decrypt_arm64_crypto_256_partial_done: ld1 {v28.16b}, [x5] b L_aes_gcm_decrypt_arm64_crypto_256_tag_loaded L_aes_gcm_decrypt_arm64_crypto_256_part_tag: - ubfiz x6, x6, #0, #32 eor v28.16b, v28.16b, v28.16b mov x17, x6 st1 {v28.2d}, [x11] @@ -14770,10 +14768,10 @@ L_aes_gcm_decrypt_arm64_crypto_128_out_start_byte: L_aes_gcm_decrypt_arm64_crypto_128_out_end_bytes: L_aes_gcm_decrypt_arm64_crypto_128_partial_done: ld1 {v14.2d}, [x12] - ubfiz x8, x8, #3, #32 + lsl x8, x8, #3 rbit x8, x8 mov v28.d[0], x8 - ubfiz x2, x2, #3, #32 + lsl x2, x2, #3 rbit x2, x2 mov v28.d[1], x2 eor v26.16b, v26.16b, v28.16b @@ -14818,7 +14816,6 @@ L_aes_gcm_decrypt_arm64_crypto_128_partial_done: ld1 {v28.16b}, [x5] b L_aes_gcm_decrypt_arm64_crypto_128_tag_loaded L_aes_gcm_decrypt_arm64_crypto_128_part_tag: - ubfiz x6, x6, #0, #32 eor v28.16b, v28.16b, v28.16b mov x17, x6 st1 {v28.2d}, [x11] @@ -15367,7 +15364,7 @@ L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_end_bytes: # Done GHASH L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_partial_done: eor x14, x14, x14 - ubfiz x24, x4, #3, #32 + lsl x24, x4, #3 mov v28.d[0], x14 mov v28.d[1], x24 rev64 v28.16b, v28.16b @@ -16701,10 +16698,10 @@ L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_zero: # Done GHASH L_aes_gcm_encrypt_arm64_crypto_eor3_192_partial_done: ld1 {v14.2d}, [x12] - ubfiz x8, x8, #3, #32 + lsl x8, x8, #3 rbit x8, x8 mov v28.d[0], x8 - ubfiz x2, x2, #3, #32 + lsl x2, x2, #3 rbit x2, x2 mov v28.d[1], x2 eor v26.16b, v26.16b, v28.16b @@ -18210,10 +18207,10 @@ L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_zero: # Done GHASH L_aes_gcm_encrypt_arm64_crypto_eor3_256_partial_done: ld1 {v14.2d}, [x12] - ubfiz x8, x8, #3, #32 + lsl x8, x8, #3 rbit x8, x8 mov v28.d[0], x8 - ubfiz x2, x2, #3, #32 + lsl x2, x2, #3 rbit x2, x2 mov v28.d[1], x2 aese v14.16b, v0.16b @@ -19488,10 +19485,10 @@ L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_zero: # Done GHASH L_aes_gcm_encrypt_arm64_crypto_eor3_128_partial_done: ld1 {v14.2d}, [x12] - ubfiz x8, x8, #3, #32 + lsl x8, x8, #3 rbit x8, x8 mov v28.d[0], x8 - ubfiz x2, x2, #3, #32 + lsl x2, x2, #3 rbit x2, x2 mov v28.d[1], x2 eor v26.16b, v26.16b, v28.16b @@ -20059,7 +20056,7 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_end_bytes: # Done GHASH L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_partial_done: eor x14, x14, x14 - ubfiz x24, x4, #3, #32 + lsl x24, x4, #3 mov v28.d[0], x14 mov v28.d[1], x24 rev64 v28.16b, v28.16b @@ -21386,10 +21383,10 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_byte: L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_end_bytes: L_aes_gcm_decrypt_arm64_crypto_eor3_192_partial_done: ld1 {v14.2d}, [x12] - ubfiz x8, x8, #3, #32 + lsl x8, x8, #3 rbit x8, x8 mov v28.d[0], x8 - ubfiz x2, x2, #3, #32 + lsl x2, x2, #3 rbit x2, x2 mov v28.d[1], x2 eor v26.16b, v26.16b, v28.16b @@ -21437,7 +21434,6 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_192_partial_done: ld1 {v28.16b}, [x5] b L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_loaded L_aes_gcm_decrypt_arm64_crypto_eor3_192_part_tag: - ubfiz x6, x6, #0, #32 eor v28.16b, v28.16b, v28.16b mov x17, x6 st1 {v28.2d}, [x11] @@ -22911,10 +22907,10 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_byte: L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_end_bytes: L_aes_gcm_decrypt_arm64_crypto_eor3_256_partial_done: ld1 {v14.2d}, [x12] - ubfiz x8, x8, #3, #32 + lsl x8, x8, #3 rbit x8, x8 mov v28.d[0], x8 - ubfiz x2, x2, #3, #32 + lsl x2, x2, #3 rbit x2, x2 mov v28.d[1], x2 aese v14.16b, v0.16b @@ -22970,7 +22966,6 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_256_partial_done: ld1 {v28.16b}, [x5] b L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_loaded L_aes_gcm_decrypt_arm64_crypto_eor3_256_part_tag: - ubfiz x6, x6, #0, #32 eor v28.16b, v28.16b, v28.16b mov x17, x6 st1 {v28.2d}, [x11] @@ -24205,10 +24200,10 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_byte: L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_end_bytes: L_aes_gcm_decrypt_arm64_crypto_eor3_128_partial_done: ld1 {v14.2d}, [x12] - ubfiz x8, x8, #3, #32 + lsl x8, x8, #3 rbit x8, x8 mov v28.d[0], x8 - ubfiz x2, x2, #3, #32 + lsl x2, x2, #3 rbit x2, x2 mov v28.d[1], x2 eor v26.16b, v26.16b, v28.16b @@ -24252,7 +24247,6 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_128_partial_done: ld1 {v28.16b}, [x5] b L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_loaded L_aes_gcm_decrypt_arm64_crypto_eor3_128_part_tag: - ubfiz x6, x6, #0, #32 eor v28.16b, v28.16b, v28.16b mov x17, x6 st1 {v28.2d}, [x11] @@ -24426,7 +24420,7 @@ L_aes_gcm_init_arm64_crypto_end_bytes: # Done GHASH L_aes_gcm_init_arm64_crypto_partial_done: eor x7, x7, x7 - ubfiz x13, x3, #3, #32 + lsl x13, x3, #3 mov v7.d[0], x7 mov v7.d[1], x13 rev64 v7.16b, v7.16b @@ -28788,10 +28782,10 @@ _AES_GCM_encrypt_final_AARCH64: ld1 {v4.2d}, [x5] ushr v6.2d, v6.2d, #56 ld1 {v7.2d}, [x6] - ubfiz x4, x4, #3, #32 + lsl x4, x4, #3 rbit x4, x4 mov v0.d[0], x4 - ubfiz x3, x3, #3, #32 + lsl x3, x3, #3 rbit x3, x3 mov v0.d[1], x3 eor v5.16b, v5.16b, v0.16b @@ -32674,10 +32668,10 @@ _AES_GCM_decrypt_final_AARCH64: ld1 {v4.2d}, [x5] ushr v6.2d, v6.2d, #56 ld1 {v7.2d}, [x6] - ubfiz x4, x4, #3, #32 + lsl x4, x4, #3 rbit x4, x4 mov v0.d[0], x4 - ubfiz x3, x3, #3, #32 + lsl x3, x3, #3 rbit x3, x3 mov v0.d[1], x3 eor v5.16b, v5.16b, v0.16b @@ -32702,7 +32696,6 @@ _AES_GCM_decrypt_final_AARCH64: ld1 {v0.16b}, [x1] b L_aes_gcm_decrypt_final_arm64_crypto_tag_loaded L_aes_gcm_decrypt_final_arm64_crypto_part_tag: - ubfiz x2, x2, #0, #32 eor v0.16b, v0.16b, v0.16b mov x10, x2 st1 {v0.2d}, [x0] @@ -32863,7 +32856,7 @@ L_aes_gcm_init_arm64_crypto_eor3_end_bytes: # Done GHASH L_aes_gcm_init_arm64_crypto_eor3_partial_done: eor x7, x7, x7 - ubfiz x13, x3, #3, #32 + lsl x13, x3, #3 mov v7.d[0], x7 mov v7.d[1], x13 rev64 v7.16b, v7.16b @@ -37121,10 +37114,10 @@ _AES_GCM_encrypt_final_AARCH64_EOR3: ld1 {v4.2d}, [x5] ushr v6.2d, v6.2d, #56 ld1 {v7.2d}, [x6] - ubfiz x4, x4, #3, #32 + lsl x4, x4, #3 rbit x4, x4 mov v0.d[0], x4 - ubfiz x3, x3, #3, #32 + lsl x3, x3, #3 rbit x3, x3 mov v0.d[1], x3 eor v5.16b, v5.16b, v0.16b @@ -40922,10 +40915,10 @@ _AES_GCM_decrypt_final_AARCH64_EOR3: ld1 {v4.2d}, [x5] ushr v6.2d, v6.2d, #56 ld1 {v7.2d}, [x6] - ubfiz x4, x4, #3, #32 + lsl x4, x4, #3 rbit x4, x4 mov v0.d[0], x4 - ubfiz x3, x3, #3, #32 + lsl x3, x3, #3 rbit x3, x3 mov v0.d[1], x3 eor v5.16b, v5.16b, v0.16b @@ -40949,7 +40942,6 @@ _AES_GCM_decrypt_final_AARCH64_EOR3: ld1 {v0.16b}, [x1] b L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_loaded L_aes_gcm_decrypt_final_arm64_crypto_eor3_part_tag: - ubfiz x2, x2, #0, #32 eor v0.16b, v0.16b, v0.16b mov x10, x2 st1 {v0.2d}, [x0] diff --git a/wolfcrypt/src/port/arm/armv8-aes-asm_c.c b/wolfcrypt/src/port/arm/armv8-aes-asm_c.c index 291d3d1214c..7c6e43e9729 100644 --- a/wolfcrypt/src/port/arm/armv8-aes-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-aes-asm_c.c @@ -3540,6 +3540,8 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, byte* key, byte* tmp, word32* left, word32 nr) { __asm__ __volatile__ ( + "stp x29, x30, [sp, #-32]!\n\t" + "add x29, sp, #0\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v15.2d}, [%x[reg]]\n\t" @@ -5189,6 +5191,7 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "rev x11, x10\n\t" "rev x12, x9\n\t" "stp x11, x12, [%x[reg]]\n\t" + "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [reg] "+r" (reg), [key] "+r" (key), [tmp] "+r" (tmp), [left] "+r" (left), [nr] "+r" (nr) : [in] "r" (in) @@ -5261,11 +5264,19 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, word32 aadSz, byte* key, byte* gcm_h, byte* tmp, byte* reg, int nr) { __asm__ __volatile__ ( + "stp x29, x30, [sp, #-80]!\n\t" + "add x29, sp, #0\n\t" + "str %w[nr], [sp, #72]\n\t" + "str %x[reg], [sp, #64]\n\t" + "str %x[tmp], [sp, #56]\n\t" + "str %x[gcm_h], [sp, #48]\n\t" + "str %x[key], [sp, #40]\n\t" + "str %w[aadSz], [sp, #32]\n\t" "movi v27.16b, #0x87\n\t" "eor v26.16b, v26.16b, v26.16b\n\t" "ushr v27.2d, v27.2d, #56\n\t" - "ld1 {v22.2d}, [%x[gcm_h]]\n\t" - "cmp %w[aadSz], #0x40\n\t" + "ld1 {v22.2d}, [x10]\n\t" + "cmp w8, #0x40\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #32\n\t" "csetm x17, lt\n\t" @@ -5280,7 +5291,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v23.16b, v30.16b, v31.16b\n\t" - "cmp %w[aadSz], #0x100\n\t" + "cmp w8, #0x100\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #0x40\n\t" "csetm x17, lt\n\t" @@ -5311,7 +5322,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v30.d[1], v29.d[0]\n\t" "eor v25.16b, v30.16b, v31.16b\n\t" /* Done */ - "cmp %w[aadSz], #0x400\n\t" + "cmp w8, #0x400\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #0x200\n\t" "csetm x17, lt\n\t" @@ -5368,7 +5379,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done */ "\n" "L_aes_gcm_encrypt_arm64_crypto_h_done_%=:\n\t" - "lsr w14, %w[aadSz], #4\n\t" + "lsr w14, w8, #4\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_start_1_%=\n\t" "cmp w14, #16\n\t" @@ -5606,41 +5617,41 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.ne L_aes_gcm_encrypt_arm64_crypto_aad_both_1_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_aad_done_%=:\n\t" - "and w14, %w[aadSz], #15\n\t" + "and w14, w8, #15\n\t" "cbz w14, L_aes_gcm_encrypt_arm64_crypto_aad_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov w20, w14\n\t" - "st1 {v28.2d}, [%x[tmp]]\n\t" + "st1 {v28.2d}, [x11]\n\t" "cmp w20, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_start_dw_%=\n\t" "ldr x19, [%x[aad]], #8\n\t" "sub w20, w20, #8\n\t" - "str x19, [%x[tmp]], #8\n\t" + "str x19, [x11], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_aad_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_start_sw_%=\n\t" "ldr w19, [%x[aad]], #4\n\t" "sub w20, w20, #4\n\t" - "str w19, [%x[tmp]], #4\n\t" + "str w19, [x11], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_aad_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_start_byte_%=\n\t" "ldrh w19, [%x[aad]], #2\n\t" "sub w20, w20, #2\n\t" - "strh w19, [%x[tmp]], #2\n\t" + "strh w19, [x11], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_aad_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_encrypt_arm64_crypto_aad_end_bytes_%=\n\t" "ldrb w19, [%x[aad]], #1\n\t" "subs w20, w20, #1\n\t" - "strb w19, [%x[tmp]], #1\n\t" + "strb w19, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_aad_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_aad_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], x14\n\t" - "ld1 {v18.2d}, [%x[tmp]]\n\t" + "sub x11, x11, x14\n\t" + "ld1 {v18.2d}, [x11]\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ @@ -5705,37 +5716,37 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "cbz x24, L_aes_gcm_encrypt_arm64_crypto_nonce_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov w20, w24\n\t" - "st1 {v28.2d}, [%x[tmp]]\n\t" + "st1 {v28.2d}, [x11]\n\t" "cmp w20, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_nonce_start_dw_%=\n\t" "ldr x19, [%x[nonce]], #8\n\t" "sub w20, w20, #8\n\t" - "str x19, [%x[tmp]], #8\n\t" + "str x19, [x11], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_nonce_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_nonce_start_sw_%=\n\t" "ldr w19, [%x[nonce]], #4\n\t" "sub w20, w20, #4\n\t" - "str w19, [%x[tmp]], #4\n\t" + "str w19, [x11], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_nonce_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_nonce_start_byte_%=\n\t" "ldrh w19, [%x[nonce]], #2\n\t" "sub w20, w20, #2\n\t" - "strh w19, [%x[tmp]], #2\n\t" + "strh w19, [x11], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_nonce_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_encrypt_arm64_crypto_nonce_end_bytes_%=\n\t" "ldrb w19, [%x[nonce]], #1\n\t" "subs w20, w20, #1\n\t" - "strb w19, [%x[tmp]], #1\n\t" + "strb w19, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_nonce_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_nonce_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], x24\n\t" - "ld1 {v18.2d}, [%x[tmp]]\n\t" + "sub x11, x11, x24\n\t" + "ld1 {v18.2d}, [x11]\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v13.16b, v18.16b\n\t" /* X = C * H^1 */ @@ -5757,7 +5768,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "\n" "L_aes_gcm_encrypt_arm64_crypto_nonce_partial_done_%=:\n\t" "eor x14, x14, x14\n\t" - "ubfiz x24, %x[nonceSz], #3, #32\n\t" + "lsl x24, %x[nonceSz], #3\n\t" "mov v28.d[0], x14\n\t" "mov v28.d[1], x24\n\t" "rev64 v28.16b, v28.16b\n\t" @@ -5781,9 +5792,9 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "rev w15, w15\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_done_nonce_%=:\n\t" - "st1 {v13.2d}, [%x[reg]]\n\t" + "st1 {v13.2d}, [x12]\n\t" "lsr w14, %w[sz], #4\n\t" - "cmp %w[nr], #12\n\t" + "cmp w13, #12\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_start_128_%=\n\t" "b.gt L_aes_gcm_encrypt_arm64_crypto_start_256_%=\n\t" /* AES_GCM_192 */ @@ -5792,7 +5803,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_encrypt_arm64_crypto_192_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_start_8_%=:\n\t" - "ldr q12, [%x[key]]\n\t" + "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -5825,7 +5836,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [%x[key], #16]\n\t" + "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -5842,7 +5853,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #32]\n\t" + "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -5859,7 +5870,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #48]\n\t" + "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -5876,7 +5887,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #64]\n\t" + "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -5893,7 +5904,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #80]\n\t" + "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -5910,7 +5921,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #96]\n\t" + "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -5927,7 +5938,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #112]\n\t" + "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -5945,7 +5956,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [%x[key], #128]\n\t" + "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -5962,7 +5973,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #144]\n\t" + "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -5983,7 +5994,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #160]\n\t" + "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -6004,7 +6015,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #176]\n\t" + "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -6021,7 +6032,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #192]\n\t" + "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -6046,14 +6057,14 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" - "ld1 {v13.2d}, [%x[reg]]\n\t" + "ld1 {v13.2d}, [x12]\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_end_8_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_both_8_%=:\n\t" - "ldr q12, [%x[key]]\n\t" + "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -6094,7 +6105,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [%x[key], #16]\n\t" + "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" @@ -6124,7 +6135,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #32]\n\t" + "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" @@ -6152,7 +6163,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #48]\n\t" + "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" @@ -6181,7 +6192,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #64]\n\t" + "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" @@ -6209,7 +6220,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #80]\n\t" + "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" @@ -6237,7 +6248,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v30.16b, v30.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #96]\n\t" + "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^7 */ @@ -6267,7 +6278,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #112]\n\t" + "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" @@ -6295,7 +6306,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [%x[key], #128]\n\t" + "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" @@ -6320,7 +6331,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #144]\n\t" + "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" @@ -6343,7 +6354,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #160]\n\t" + "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -6360,7 +6371,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #176]\n\t" + "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -6377,7 +6388,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #192]\n\t" + "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -6394,7 +6405,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" - "ld1 {v13.2d}, [%x[reg]]\n\t" + "ld1 {v13.2d}, [x12]\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" @@ -6506,10 +6517,10 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_192_start_4_%=:\n\t" - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" - "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" - "ld1 {v12.2d}, [%x[key]]\n\t" + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" + "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" + "ld1 {v12.2d}, [x9]\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_done_%=\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_192_start_1_%=\n\t" @@ -7020,37 +7031,37 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_encrypt_arm64_crypto_192_partial_done_%=\n\t" "eor v16.16b, v16.16b, v16.16b\n\t" "mov w19, w14\n\t" - "st1 {v16.2d}, [%x[tmp]]\n\t" + "st1 {v16.2d}, [x11]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" - "str x17, [%x[tmp]], #8\n\t" + "str x17, [x11], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" - "str w17, [%x[tmp]], #4\n\t" + "str w17, [x11], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" - "strh w17, [%x[tmp]], #2\n\t" + "strh w17, [x11], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_192_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" - "strb w17, [%x[tmp]], #1\n\t" + "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_192_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], x14\n\t" - "ld1 {v16.2d}, [%x[tmp]]\n\t" + "sub x11, x11, x14\n\t" + "ld1 {v16.2d}, [x11]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" @@ -7080,31 +7091,31 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "eor v16.16b, v16.16b, v14.16b\n\t" - "st1 {v16.2d}, [%x[tmp]]\n\t" + "st1 {v16.2d}, [x11]\n\t" "mov w19, w14\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_out_start_dw_%=\n\t" - "ldr x17, [%x[tmp]], #8\n\t" + "ldr x17, [x11], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_out_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_out_start_sw_%=\n\t" - "ldr w17, [%x[tmp]], #4\n\t" + "ldr w17, [x11], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_out_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_out_start_byte_%=\n\t" - "ldrh w17, [%x[tmp]], #2\n\t" + "ldrh w17, [x11], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_out_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_192_out_end_bytes_%=\n\t" - "ldrb w17, [%x[tmp]], #1\n\t" + "ldrb w17, [x11], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_192_out_start_byte_%=\n\t" @@ -7115,10 +7126,10 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "\n" "L_aes_gcm_encrypt_arm64_crypto_192_start_zero_%=:\n\t" "subs x17, x17, #1\n\t" - "strb wzr, [%x[tmp]], #1\n\t" + "strb wzr, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_192_start_zero_%=\n\t" - "sub %x[tmp], %x[tmp], #16\n\t" - "ld1 {v14.2d}, [%x[tmp]]\n\t" + "sub x11, x11, #16\n\t" + "ld1 {v14.2d}, [x11]\n\t" "rbit v14.16b, v14.16b\n\t" "eor v15.16b, v26.16b, v14.16b\n\t" /* X = C * H^1 */ @@ -7139,11 +7150,11 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_192_partial_done_%=:\n\t" - "ld1 {v14.2d}, [%x[reg]]\n\t" - "ubfiz %x[aadSz], %x[aadSz], #3, #32\n\t" - "rbit %x[aadSz], %x[aadSz]\n\t" - "mov v28.d[0], %x[aadSz]\n\t" - "ubfiz %x[sz], %x[sz], #3, #32\n\t" + "ld1 {v14.2d}, [x12]\n\t" + "lsl x8, x8, #3\n\t" + "rbit x8, x8\n\t" + "mov v28.d[0], x8\n\t" + "lsl %x[sz], %x[sz], #3\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" @@ -7193,30 +7204,30 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b L_aes_gcm_encrypt_arm64_crypto_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_tag_partial_%=:\n\t" - "st1 {v26.16b}, [%x[tmp]]\n\t" + "st1 {v26.16b}, [x11]\n\t" "cmp %w[tagSz], #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_tag_start_dw_%=\n\t" - "ldr x16, [%x[tmp]], #8\n\t" + "ldr x16, [x11], #8\n\t" "sub %w[tagSz], %w[tagSz], #8\n\t" "str x16, [%x[tag]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_tag_start_dw_%=:\n\t" "cmp %w[tagSz], #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_tag_start_sw_%=\n\t" - "ldr w16, [%x[tmp]], #4\n\t" + "ldr w16, [x11], #4\n\t" "sub %w[tagSz], %w[tagSz], #4\n\t" "str w16, [%x[tag]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_tag_start_sw_%=:\n\t" "cmp %w[tagSz], #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_tag_start_byte_%=\n\t" - "ldrh w16, [%x[tmp]], #2\n\t" + "ldrh w16, [x11], #2\n\t" "sub %w[tagSz], %w[tagSz], #2\n\t" "strh w16, [%x[tag]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_tag_start_byte_%=:\n\t" "cbz %w[tagSz], L_aes_gcm_encrypt_arm64_crypto_192_tag_end_bytes_%=\n\t" - "ldrb w16, [%x[tmp]], #1\n\t" + "ldrb w16, [x11], #1\n\t" "subs %w[tagSz], %w[tagSz], #1\n\t" "strb w16, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_192_tag_start_byte_%=\n\t" @@ -7232,7 +7243,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_encrypt_arm64_crypto_256_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_start_8_%=:\n\t" - "ldr q12, [%x[key]]\n\t" + "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -7265,7 +7276,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [%x[key], #16]\n\t" + "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -7282,7 +7293,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #32]\n\t" + "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -7299,7 +7310,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #48]\n\t" + "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -7316,7 +7327,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #64]\n\t" + "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -7333,7 +7344,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #80]\n\t" + "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -7350,7 +7361,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #96]\n\t" + "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -7367,7 +7378,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #112]\n\t" + "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -7385,7 +7396,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [%x[key], #128]\n\t" + "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -7402,7 +7413,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #144]\n\t" + "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -7423,7 +7434,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #160]\n\t" + "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -7444,7 +7455,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #176]\n\t" + "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -7461,7 +7472,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #192]\n\t" + "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -7478,7 +7489,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #208]\n\t" + "ldr q13, [x9, #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -7495,7 +7506,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #224]\n\t" + "ldr q12, [x9, #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -7520,14 +7531,14 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" - "ld1 {v13.2d}, [%x[reg]]\n\t" + "ld1 {v13.2d}, [x12]\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_end_8_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_both_8_%=:\n\t" - "ldr q12, [%x[key]]\n\t" + "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -7568,7 +7579,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [%x[key], #16]\n\t" + "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" @@ -7598,7 +7609,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #32]\n\t" + "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" @@ -7626,7 +7637,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #48]\n\t" + "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" @@ -7655,7 +7666,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #64]\n\t" + "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" @@ -7683,7 +7694,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #80]\n\t" + "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" @@ -7711,7 +7722,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v30.16b, v30.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #96]\n\t" + "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^7 */ @@ -7741,7 +7752,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #112]\n\t" + "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" @@ -7769,7 +7780,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [%x[key], #128]\n\t" + "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" @@ -7794,7 +7805,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #144]\n\t" + "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" @@ -7817,7 +7828,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #160]\n\t" + "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -7834,7 +7845,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #176]\n\t" + "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -7851,7 +7862,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #192]\n\t" + "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -7868,7 +7879,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #208]\n\t" + "ldr q13, [x9, #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -7885,7 +7896,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #224]\n\t" + "ldr q12, [x9, #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -7902,7 +7913,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" - "ld1 {v13.2d}, [%x[reg]]\n\t" + "ld1 {v13.2d}, [x12]\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" @@ -8014,10 +8025,10 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_256_start_4_%=:\n\t" - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" - "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" - "ld1 {v12.2d}, [%x[key]], #16\n\t" + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" + "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" + "ld1 {v12.2d}, [x9], #16\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_done_%=\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_256_start_1_%=\n\t" @@ -8140,7 +8151,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" - "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" + "ld1 {v29.2d, v30.2d}, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -8331,7 +8342,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" - "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" + "ld1 {v29.2d, v30.2d}, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -8472,7 +8483,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" - "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" + "ld1 {v29.2d, v30.2d}, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -8545,10 +8556,10 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q29, [%x[key]]\n\t" + "ldr q29, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q30, [%x[key], #16]\n\t" + "ldr q30, [x9, #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" @@ -8577,37 +8588,37 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_encrypt_arm64_crypto_256_partial_done_%=\n\t" "eor v16.16b, v16.16b, v16.16b\n\t" "mov w19, w14\n\t" - "st1 {v16.2d}, [%x[tmp]]\n\t" + "st1 {v16.2d}, [x11]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" - "str x17, [%x[tmp]], #8\n\t" + "str x17, [x11], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" - "str w17, [%x[tmp]], #4\n\t" + "str w17, [x11], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" - "strh w17, [%x[tmp]], #2\n\t" + "strh w17, [x11], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_256_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" - "strb w17, [%x[tmp]], #1\n\t" + "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_256_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], x14\n\t" - "ld1 {v16.2d}, [%x[tmp]]\n\t" + "sub x11, x11, x14\n\t" + "ld1 {v16.2d}, [x11]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" @@ -8636,38 +8647,38 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q29, [%x[key]]\n\t" + "ldr q29, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q30, [%x[key], #16]\n\t" + "ldr q30, [x9, #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v16.16b, v16.16b, v14.16b\n\t" - "st1 {v16.2d}, [%x[tmp]]\n\t" + "st1 {v16.2d}, [x11]\n\t" "mov w19, w14\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_out_start_dw_%=\n\t" - "ldr x17, [%x[tmp]], #8\n\t" + "ldr x17, [x11], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_out_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_out_start_sw_%=\n\t" - "ldr w17, [%x[tmp]], #4\n\t" + "ldr w17, [x11], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_out_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_out_start_byte_%=\n\t" - "ldrh w17, [%x[tmp]], #2\n\t" + "ldrh w17, [x11], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_out_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_256_out_end_bytes_%=\n\t" - "ldrb w17, [%x[tmp]], #1\n\t" + "ldrb w17, [x11], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_256_out_start_byte_%=\n\t" @@ -8678,10 +8689,10 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "\n" "L_aes_gcm_encrypt_arm64_crypto_256_start_zero_%=:\n\t" "subs x17, x17, #1\n\t" - "strb wzr, [%x[tmp]], #1\n\t" + "strb wzr, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_256_start_zero_%=\n\t" - "sub %x[tmp], %x[tmp], #16\n\t" - "ld1 {v14.2d}, [%x[tmp]]\n\t" + "sub x11, x11, #16\n\t" + "ld1 {v14.2d}, [x11]\n\t" "rbit v14.16b, v14.16b\n\t" "eor v15.16b, v26.16b, v14.16b\n\t" /* X = C * H^1 */ @@ -8702,11 +8713,11 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_256_partial_done_%=:\n\t" - "ld1 {v14.2d}, [%x[reg]]\n\t" - "ubfiz %x[aadSz], %x[aadSz], #3, #32\n\t" - "rbit %x[aadSz], %x[aadSz]\n\t" - "mov v28.d[0], %x[aadSz]\n\t" - "ubfiz %x[sz], %x[sz], #3, #32\n\t" + "ld1 {v14.2d}, [x12]\n\t" + "lsl x8, x8, #3\n\t" + "rbit x8, x8\n\t" + "mov v28.d[0], x8\n\t" + "lsl %x[sz], %x[sz], #3\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "aese v14.16b, v0.16b\n\t" @@ -8742,19 +8753,19 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" - "ldr q11, [%x[key], #-32]\n\t" + "ldr q11, [x9, #-32]\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" - "ldr q12, [%x[key], #-16]\n\t" + "ldr q12, [x9, #-16]\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" - "ldr q29, [%x[key]]\n\t" + "ldr q29, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v26.16b, v26.16b\n\t" - "ldr q30, [%x[key], #16]\n\t" + "ldr q30, [x9, #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v26.16b, v26.16b, v14.16b\n\t" @@ -8764,30 +8775,30 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b L_aes_gcm_encrypt_arm64_crypto_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_tag_partial_%=:\n\t" - "st1 {v26.16b}, [%x[tmp]]\n\t" + "st1 {v26.16b}, [x11]\n\t" "cmp %w[tagSz], #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_tag_start_dw_%=\n\t" - "ldr x16, [%x[tmp]], #8\n\t" + "ldr x16, [x11], #8\n\t" "sub %w[tagSz], %w[tagSz], #8\n\t" "str x16, [%x[tag]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_tag_start_dw_%=:\n\t" "cmp %w[tagSz], #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_tag_start_sw_%=\n\t" - "ldr w16, [%x[tmp]], #4\n\t" + "ldr w16, [x11], #4\n\t" "sub %w[tagSz], %w[tagSz], #4\n\t" "str w16, [%x[tag]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_tag_start_sw_%=:\n\t" "cmp %w[tagSz], #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_tag_start_byte_%=\n\t" - "ldrh w16, [%x[tmp]], #2\n\t" + "ldrh w16, [x11], #2\n\t" "sub %w[tagSz], %w[tagSz], #2\n\t" "strh w16, [%x[tag]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_tag_start_byte_%=:\n\t" "cbz %w[tagSz], L_aes_gcm_encrypt_arm64_crypto_256_tag_end_bytes_%=\n\t" - "ldrb w16, [%x[tmp]], #1\n\t" + "ldrb w16, [x11], #1\n\t" "subs %w[tagSz], %w[tagSz], #1\n\t" "strb w16, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_256_tag_start_byte_%=\n\t" @@ -8803,7 +8814,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_encrypt_arm64_crypto_128_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_start_8_%=:\n\t" - "ldr q12, [%x[key]]\n\t" + "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -8836,7 +8847,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [%x[key], #16]\n\t" + "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -8853,7 +8864,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #32]\n\t" + "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -8870,7 +8881,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #48]\n\t" + "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -8887,7 +8898,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #64]\n\t" + "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -8904,7 +8915,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #80]\n\t" + "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -8921,7 +8932,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #96]\n\t" + "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -8938,7 +8949,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #112]\n\t" + "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -8956,7 +8967,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [%x[key], #128]\n\t" + "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -8973,7 +8984,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #144]\n\t" + "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -8994,7 +9005,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #160]\n\t" + "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -9023,14 +9034,14 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" - "ld1 {v13.2d}, [%x[reg]]\n\t" + "ld1 {v13.2d}, [x12]\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_end_8_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_both_8_%=:\n\t" - "ldr q12, [%x[key]]\n\t" + "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -9071,7 +9082,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [%x[key], #16]\n\t" + "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" @@ -9101,7 +9112,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #32]\n\t" + "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" @@ -9129,7 +9140,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #48]\n\t" + "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" @@ -9158,7 +9169,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #64]\n\t" + "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" @@ -9186,7 +9197,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #80]\n\t" + "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" @@ -9214,7 +9225,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v30.16b, v30.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #96]\n\t" + "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^7 */ @@ -9244,7 +9255,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #112]\n\t" + "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" @@ -9272,7 +9283,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [%x[key], #128]\n\t" + "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" @@ -9297,7 +9308,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #144]\n\t" + "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" @@ -9320,7 +9331,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #160]\n\t" + "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -9337,7 +9348,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" - "ld1 {v13.2d}, [%x[reg]]\n\t" + "ld1 {v13.2d}, [x12]\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" @@ -9449,10 +9460,10 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_128_start_4_%=:\n\t" - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" - "ld1 {v8.2d, v9.2d}, [%x[key]], #32\n\t" - "ld1 {v10.2d}, [%x[key]]\n\t" + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" + "ld1 {v8.2d, v9.2d}, [x9], #32\n\t" + "ld1 {v10.2d}, [x9]\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_done_%=\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_128_start_1_%=\n\t" @@ -9919,37 +9930,37 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_encrypt_arm64_crypto_128_partial_done_%=\n\t" "eor v16.16b, v16.16b, v16.16b\n\t" "mov w19, w14\n\t" - "st1 {v16.2d}, [%x[tmp]]\n\t" + "st1 {v16.2d}, [x11]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" - "str x17, [%x[tmp]], #8\n\t" + "str x17, [x11], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" - "str w17, [%x[tmp]], #4\n\t" + "str w17, [x11], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" - "strh w17, [%x[tmp]], #2\n\t" + "strh w17, [x11], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_128_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" - "strb w17, [%x[tmp]], #1\n\t" + "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_128_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], x14\n\t" - "ld1 {v16.2d}, [%x[tmp]]\n\t" + "sub x11, x11, x14\n\t" + "ld1 {v16.2d}, [x11]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" @@ -9975,31 +9986,31 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "eor v16.16b, v16.16b, v14.16b\n\t" - "st1 {v16.2d}, [%x[tmp]]\n\t" + "st1 {v16.2d}, [x11]\n\t" "mov w19, w14\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_out_start_dw_%=\n\t" - "ldr x17, [%x[tmp]], #8\n\t" + "ldr x17, [x11], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_out_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_out_start_sw_%=\n\t" - "ldr w17, [%x[tmp]], #4\n\t" + "ldr w17, [x11], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_out_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_out_start_byte_%=\n\t" - "ldrh w17, [%x[tmp]], #2\n\t" + "ldrh w17, [x11], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_out_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_128_out_end_bytes_%=\n\t" - "ldrb w17, [%x[tmp]], #1\n\t" + "ldrb w17, [x11], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_128_out_start_byte_%=\n\t" @@ -10010,10 +10021,10 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "\n" "L_aes_gcm_encrypt_arm64_crypto_128_start_zero_%=:\n\t" "subs x17, x17, #1\n\t" - "strb wzr, [%x[tmp]], #1\n\t" + "strb wzr, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_128_start_zero_%=\n\t" - "sub %x[tmp], %x[tmp], #16\n\t" - "ld1 {v14.2d}, [%x[tmp]]\n\t" + "sub x11, x11, #16\n\t" + "ld1 {v14.2d}, [x11]\n\t" "rbit v14.16b, v14.16b\n\t" "eor v15.16b, v26.16b, v14.16b\n\t" /* X = C * H^1 */ @@ -10034,11 +10045,11 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_128_partial_done_%=:\n\t" - "ld1 {v14.2d}, [%x[reg]]\n\t" - "ubfiz %x[aadSz], %x[aadSz], #3, #32\n\t" - "rbit %x[aadSz], %x[aadSz]\n\t" - "mov v28.d[0], %x[aadSz]\n\t" - "ubfiz %x[sz], %x[sz], #3, #32\n\t" + "ld1 {v14.2d}, [x12]\n\t" + "lsl x8, x8, #3\n\t" + "rbit x8, x8\n\t" + "mov v28.d[0], x8\n\t" + "lsl %x[sz], %x[sz], #3\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" @@ -10084,30 +10095,30 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b L_aes_gcm_encrypt_arm64_crypto_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_tag_partial_%=:\n\t" - "st1 {v26.16b}, [%x[tmp]]\n\t" + "st1 {v26.16b}, [x11]\n\t" "cmp %w[tagSz], #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_tag_start_dw_%=\n\t" - "ldr x16, [%x[tmp]], #8\n\t" + "ldr x16, [x11], #8\n\t" "sub %w[tagSz], %w[tagSz], #8\n\t" "str x16, [%x[tag]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_tag_start_dw_%=:\n\t" "cmp %w[tagSz], #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_tag_start_sw_%=\n\t" - "ldr w16, [%x[tmp]], #4\n\t" + "ldr w16, [x11], #4\n\t" "sub %w[tagSz], %w[tagSz], #4\n\t" "str w16, [%x[tag]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_tag_start_sw_%=:\n\t" "cmp %w[tagSz], #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_tag_start_byte_%=\n\t" - "ldrh w16, [%x[tmp]], #2\n\t" + "ldrh w16, [x11], #2\n\t" "sub %w[tagSz], %w[tagSz], #2\n\t" "strh w16, [%x[tag]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_tag_start_byte_%=:\n\t" "cbz %w[tagSz], L_aes_gcm_encrypt_arm64_crypto_128_tag_end_bytes_%=\n\t" - "ldrb w16, [%x[tmp]], #1\n\t" + "ldrb w16, [x11], #1\n\t" "subs %w[tagSz], %w[tagSz], #1\n\t" "strb w16, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_128_tag_start_byte_%=\n\t" @@ -10116,6 +10127,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, #endif /* !NO_AES_128 */ "\n" "L_aes_gcm_encrypt_arm64_crypto_done_%=:\n\t" + "ldp x29, x30, [sp], #0x50\n\t" : [out] "+r" (out), [sz] "+r" (sz), [nonceSz] "+r" (nonceSz), [tag] "+r" (tag), [tagSz] "+r" (tagSz), [aadSz] "+r" (aadSz), [key] "+r" (key), [gcm_h] "+r" (gcm_h), [tmp] "+r" (tmp), @@ -10136,11 +10148,19 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, int nr) { __asm__ __volatile__ ( + "stp x29, x30, [sp, #-80]!\n\t" + "add x29, sp, #0\n\t" + "str %w[nr], [sp, #72]\n\t" + "str %x[reg], [sp, #64]\n\t" + "str %x[tmp], [sp, #56]\n\t" + "str %x[gcm_h], [sp, #48]\n\t" + "str %x[key], [sp, #40]\n\t" + "str %w[aadSz], [sp, #32]\n\t" "movi v27.16b, #0x87\n\t" "eor v26.16b, v26.16b, v26.16b\n\t" "ushr v27.2d, v27.2d, #56\n\t" - "ld1 {v22.2d}, [%x[gcm_h]]\n\t" - "cmp %w[aadSz], #0x40\n\t" + "ld1 {v22.2d}, [x10]\n\t" + "cmp w8, #0x40\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #32\n\t" "csetm x17, lt\n\t" @@ -10155,7 +10175,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v23.16b, v30.16b, v31.16b\n\t" - "cmp %w[aadSz], #0x100\n\t" + "cmp w8, #0x100\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #0x40\n\t" "csetm x17, lt\n\t" @@ -10186,7 +10206,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v30.d[1], v29.d[0]\n\t" "eor v25.16b, v30.16b, v31.16b\n\t" /* Done */ - "cmp %w[aadSz], #0x400\n\t" + "cmp w8, #0x400\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #0x200\n\t" "csetm x17, lt\n\t" @@ -10243,7 +10263,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done */ "\n" "L_aes_gcm_decrypt_arm64_crypto_h_done_%=:\n\t" - "lsr w14, %w[aadSz], #4\n\t" + "lsr w14, w8, #4\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_start_1_%=\n\t" "cmp w14, #16\n\t" @@ -10481,41 +10501,41 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.ne L_aes_gcm_decrypt_arm64_crypto_aad_both_1_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_aad_done_%=:\n\t" - "and w14, %w[aadSz], #15\n\t" + "and w14, w8, #15\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_aad_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov w20, w14\n\t" - "st1 {v28.2d}, [%x[tmp]]\n\t" + "st1 {v28.2d}, [x11]\n\t" "cmp w20, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_start_dw_%=\n\t" "ldr x19, [%x[aad]], #8\n\t" "sub w20, w20, #8\n\t" - "str x19, [%x[tmp]], #8\n\t" + "str x19, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_aad_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_start_sw_%=\n\t" "ldr w19, [%x[aad]], #4\n\t" "sub w20, w20, #4\n\t" - "str w19, [%x[tmp]], #4\n\t" + "str w19, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_aad_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_start_byte_%=\n\t" "ldrh w19, [%x[aad]], #2\n\t" "sub w20, w20, #2\n\t" - "strh w19, [%x[tmp]], #2\n\t" + "strh w19, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_aad_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_decrypt_arm64_crypto_aad_end_bytes_%=\n\t" "ldrb w19, [%x[aad]], #1\n\t" "subs w20, w20, #1\n\t" - "strb w19, [%x[tmp]], #1\n\t" + "strb w19, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_aad_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_aad_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], x14\n\t" - "ld1 {v18.2d}, [%x[tmp]]\n\t" + "sub x11, x11, x14\n\t" + "ld1 {v18.2d}, [x11]\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ @@ -10580,37 +10600,37 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "cbz x24, L_aes_gcm_decrypt_arm64_crypto_nonce_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov w20, w24\n\t" - "st1 {v28.2d}, [%x[tmp]]\n\t" + "st1 {v28.2d}, [x11]\n\t" "cmp w20, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_nonce_start_dw_%=\n\t" "ldr x19, [%x[nonce]], #8\n\t" "sub w20, w20, #8\n\t" - "str x19, [%x[tmp]], #8\n\t" + "str x19, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_nonce_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_nonce_start_sw_%=\n\t" "ldr w19, [%x[nonce]], #4\n\t" "sub w20, w20, #4\n\t" - "str w19, [%x[tmp]], #4\n\t" + "str w19, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_nonce_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_nonce_start_byte_%=\n\t" "ldrh w19, [%x[nonce]], #2\n\t" "sub w20, w20, #2\n\t" - "strh w19, [%x[tmp]], #2\n\t" + "strh w19, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_nonce_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_decrypt_arm64_crypto_nonce_end_bytes_%=\n\t" "ldrb w19, [%x[nonce]], #1\n\t" "subs w20, w20, #1\n\t" - "strb w19, [%x[tmp]], #1\n\t" + "strb w19, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_nonce_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_nonce_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], x24\n\t" - "ld1 {v18.2d}, [%x[tmp]]\n\t" + "sub x11, x11, x24\n\t" + "ld1 {v18.2d}, [x11]\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v13.16b, v18.16b\n\t" /* X = C * H^1 */ @@ -10632,7 +10652,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "\n" "L_aes_gcm_decrypt_arm64_crypto_nonce_partial_done_%=:\n\t" "eor x14, x14, x14\n\t" - "ubfiz x24, %x[nonceSz], #3, #32\n\t" + "lsl x24, %x[nonceSz], #3\n\t" "mov v28.d[0], x14\n\t" "mov v28.d[1], x24\n\t" "rev64 v28.16b, v28.16b\n\t" @@ -10656,9 +10676,9 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "rev w15, w15\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_done_nonce_%=:\n\t" - "st1 {v13.2d}, [%x[reg]]\n\t" + "st1 {v13.2d}, [x12]\n\t" "lsr w14, %w[sz], #4\n\t" - "cmp %w[nr], #12\n\t" + "cmp w13, #12\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_start_128_%=\n\t" "b.gt L_aes_gcm_decrypt_arm64_crypto_start_256_%=\n\t" /* AES_GCM_192 */ @@ -10667,7 +10687,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_decrypt_arm64_crypto_192_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_start_8_%=:\n\t" - "ldr q12, [%x[key]]\n\t" + "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -10700,7 +10720,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v10.s[3], w17\n\t" "rev w16, w15\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [%x[key], #16]\n\t" + "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -10717,7 +10737,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #32]\n\t" + "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -10734,7 +10754,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #48]\n\t" + "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -10751,7 +10771,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #64]\n\t" + "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -10768,7 +10788,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #80]\n\t" + "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -10785,7 +10805,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #96]\n\t" + "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -10802,7 +10822,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #112]\n\t" + "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -10820,7 +10840,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [%x[key], #128]\n\t" + "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -10837,7 +10857,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #144]\n\t" + "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -10858,7 +10878,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #160]\n\t" + "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -10879,7 +10899,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #176]\n\t" + "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -10896,7 +10916,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #192]\n\t" + "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -10921,14 +10941,14 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" - "ld1 {v13.2d}, [%x[reg]]\n\t" + "ld1 {v13.2d}, [x12]\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_end_8_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_both_8_%=:\n\t" - "ldr q12, [%x[key]]\n\t" + "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -10970,7 +10990,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "rev w16, w15\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [%x[key], #16]\n\t" + "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ @@ -11000,7 +11020,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #32]\n\t" + "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" @@ -11029,7 +11049,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #48]\n\t" + "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" @@ -11057,7 +11077,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #64]\n\t" + "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" @@ -11085,7 +11105,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v30.16b, v30.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #80]\n\t" + "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^6 */ @@ -11115,7 +11135,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #96]\n\t" + "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" @@ -11144,7 +11164,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #112]\n\t" + "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" @@ -11171,7 +11191,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [%x[key], #128]\n\t" + "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" @@ -11196,7 +11216,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #144]\n\t" + "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" @@ -11218,7 +11238,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #160]\n\t" + "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -11235,7 +11255,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #176]\n\t" + "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -11252,7 +11272,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #192]\n\t" + "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -11269,7 +11289,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" - "ld1 {v13.2d}, [%x[reg]]\n\t" + "ld1 {v13.2d}, [x12]\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" @@ -11381,10 +11401,10 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_192_start_4_%=:\n\t" - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" - "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" - "ld1 {v12.2d}, [%x[key]]\n\t" + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" + "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" + "ld1 {v12.2d}, [x9]\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_done_%=\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_192_start_1_%=\n\t" @@ -11896,37 +11916,37 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_decrypt_arm64_crypto_192_partial_done_%=\n\t" "eor v15.16b, v15.16b, v15.16b\n\t" "mov w19, w14\n\t" - "st1 {v15.2d}, [%x[tmp]]\n\t" + "st1 {v15.2d}, [x11]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" - "str x17, [%x[tmp]], #8\n\t" + "str x17, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" - "str w17, [%x[tmp]], #4\n\t" + "str w17, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" - "strh w17, [%x[tmp]], #2\n\t" + "strh w17, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_decrypt_arm64_crypto_192_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" - "strb w17, [%x[tmp]], #1\n\t" + "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_192_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], x14\n\t" - "ld1 {v15.2d}, [%x[tmp]]\n\t" + "sub x11, x11, x14\n\t" + "ld1 {v15.2d}, [x11]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rbit v15.16b, v15.16b\n\t" @@ -11975,30 +11995,30 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "rbit v15.16b, v15.16b\n\t" "eor v14.16b, v14.16b, v15.16b\n\t" - "st1 {v14.2d}, [%x[tmp]]\n\t" + "st1 {v14.2d}, [x11]\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_out_start_dw_%=\n\t" - "ldr x17, [%x[tmp]], #8\n\t" + "ldr x17, [x11], #8\n\t" "sub w14, w14, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_out_start_dw_%=:\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_out_start_sw_%=\n\t" - "ldr w17, [%x[tmp]], #4\n\t" + "ldr w17, [x11], #4\n\t" "sub w14, w14, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_out_start_sw_%=:\n\t" "cmp w14, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_out_start_byte_%=\n\t" - "ldrh w17, [%x[tmp]], #2\n\t" + "ldrh w17, [x11], #2\n\t" "sub w14, w14, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_out_start_byte_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_192_out_end_bytes_%=\n\t" - "ldrb w17, [%x[tmp]], #1\n\t" + "ldrb w17, [x11], #1\n\t" "subs w14, w14, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_192_out_start_byte_%=\n\t" @@ -12006,11 +12026,11 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "L_aes_gcm_decrypt_arm64_crypto_192_out_end_bytes_%=:\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_partial_done_%=:\n\t" - "ld1 {v14.2d}, [%x[reg]]\n\t" - "ubfiz %x[aadSz], %x[aadSz], #3, #32\n\t" - "rbit %x[aadSz], %x[aadSz]\n\t" - "mov v28.d[0], %x[aadSz]\n\t" - "ubfiz %x[sz], %x[sz], #3, #32\n\t" + "ld1 {v14.2d}, [x12]\n\t" + "lsl x8, x8, #3\n\t" + "rbit x8, x8\n\t" + "mov v28.d[0], x8\n\t" + "lsl %x[sz], %x[sz], #3\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" @@ -12060,51 +12080,50 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b L_aes_gcm_decrypt_arm64_crypto_192_tag_loaded_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_part_tag_%=:\n\t" - "ubfiz %x[tagSz], %x[tagSz], #0, #32\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov x17, %x[tagSz]\n\t" - "st1 {v28.2d}, [%x[tmp]]\n\t" + "st1 {v28.2d}, [x11]\n\t" "cmp x17, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_tag_start_dw_%=\n\t" "ldr x16, [%x[tag]], #8\n\t" "sub x17, x17, #8\n\t" - "str x16, [%x[tmp]], #8\n\t" + "str x16, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_tag_start_dw_%=:\n\t" "cmp x17, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_tag_start_sw_%=\n\t" "ldr w16, [%x[tag]], #4\n\t" "sub x17, x17, #4\n\t" - "str w16, [%x[tmp]], #4\n\t" + "str w16, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_tag_start_sw_%=:\n\t" "cmp x17, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_tag_start_byte_%=\n\t" "ldrh w16, [%x[tag]], #2\n\t" "sub x17, x17, #2\n\t" - "strh w16, [%x[tmp]], #2\n\t" + "strh w16, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_tag_start_byte_%=:\n\t" "cbz x17, L_aes_gcm_decrypt_arm64_crypto_192_tag_end_bytes_%=\n\t" "ldrb w16, [%x[tag]], #1\n\t" "subs x17, x17, #1\n\t" - "strb w16, [%x[tmp]], #1\n\t" + "strb w16, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_192_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_tag_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], %x[tagSz]\n\t" - "ld1 {v28.2d}, [%x[tmp]]\n\t" + "sub x11, x11, %x[tagSz]\n\t" + "ld1 {v28.2d}, [x11]\n\t" "mov x17, #16\n\t" - "st1 {v26.2d}, [%x[tmp]]\n\t" + "st1 {v26.2d}, [x11]\n\t" "sub x17, x17, %x[tagSz]\n\t" - "add %x[tmp], %x[tmp], %x[tagSz]\n\t" + "add x11, x11, %x[tagSz]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_calc_tag_byte_%=:\n\t" - "strb wzr, [%x[tmp]], #1\n\t" + "strb wzr, [x11], #1\n\t" "subs x17, x17, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_192_calc_tag_byte_%=\n\t" - "subs %x[tmp], %x[tmp], #16\n\t" - "ld1 {v26.2d}, [%x[tmp]]\n\t" + "subs x11, x11, #16\n\t" + "ld1 {v26.2d}, [x11]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_tag_loaded_%=:\n\t" "eor v28.16b, v28.16b, v26.16b\n\t" @@ -12125,7 +12144,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_decrypt_arm64_crypto_256_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_start_8_%=:\n\t" - "ldr q12, [%x[key]]\n\t" + "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -12158,7 +12177,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v10.s[3], w17\n\t" "rev w16, w15\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [%x[key], #16]\n\t" + "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -12175,7 +12194,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #32]\n\t" + "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -12192,7 +12211,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #48]\n\t" + "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -12209,7 +12228,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #64]\n\t" + "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -12226,7 +12245,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #80]\n\t" + "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -12243,7 +12262,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #96]\n\t" + "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -12260,7 +12279,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #112]\n\t" + "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -12278,7 +12297,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [%x[key], #128]\n\t" + "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -12295,7 +12314,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #144]\n\t" + "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -12316,7 +12335,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #160]\n\t" + "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -12337,7 +12356,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #176]\n\t" + "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -12354,7 +12373,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #192]\n\t" + "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -12371,7 +12390,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #208]\n\t" + "ldr q13, [x9, #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -12388,7 +12407,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #224]\n\t" + "ldr q12, [x9, #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -12413,14 +12432,14 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" - "ld1 {v13.2d}, [%x[reg]]\n\t" + "ld1 {v13.2d}, [x12]\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_end_8_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_both_8_%=:\n\t" - "ldr q12, [%x[key]]\n\t" + "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -12462,7 +12481,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "rev w16, w15\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [%x[key], #16]\n\t" + "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ @@ -12492,7 +12511,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #32]\n\t" + "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" @@ -12521,7 +12540,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #48]\n\t" + "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" @@ -12549,7 +12568,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #64]\n\t" + "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" @@ -12577,7 +12596,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v30.16b, v30.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #80]\n\t" + "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^6 */ @@ -12607,7 +12626,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #96]\n\t" + "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" @@ -12636,7 +12655,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #112]\n\t" + "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" @@ -12663,7 +12682,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [%x[key], #128]\n\t" + "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" @@ -12688,7 +12707,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #144]\n\t" + "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" @@ -12710,7 +12729,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #160]\n\t" + "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -12727,7 +12746,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #176]\n\t" + "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -12744,7 +12763,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #192]\n\t" + "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -12761,7 +12780,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #208]\n\t" + "ldr q13, [x9, #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -12778,7 +12797,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #224]\n\t" + "ldr q12, [x9, #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -12795,7 +12814,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" - "ld1 {v13.2d}, [%x[reg]]\n\t" + "ld1 {v13.2d}, [x12]\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" @@ -12907,10 +12926,10 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_256_start_4_%=:\n\t" - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" - "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" - "ld1 {v12.2d}, [%x[key]], #16\n\t" + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" + "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" + "ld1 {v12.2d}, [x9], #16\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_done_%=\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_256_start_1_%=\n\t" @@ -13033,7 +13052,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" - "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" + "ld1 {v29.2d, v30.2d}, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -13224,7 +13243,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" - "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" + "ld1 {v29.2d, v30.2d}, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -13365,7 +13384,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" - "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" + "ld1 {v29.2d, v30.2d}, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -13438,10 +13457,10 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q29, [%x[key]]\n\t" + "ldr q29, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q30, [%x[key], #16]\n\t" + "ldr q30, [x9, #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" @@ -13470,37 +13489,37 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_decrypt_arm64_crypto_256_partial_done_%=\n\t" "eor v15.16b, v15.16b, v15.16b\n\t" "mov w19, w14\n\t" - "st1 {v15.2d}, [%x[tmp]]\n\t" + "st1 {v15.2d}, [x11]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" - "str x17, [%x[tmp]], #8\n\t" + "str x17, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" - "str w17, [%x[tmp]], #4\n\t" + "str w17, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" - "strh w17, [%x[tmp]], #2\n\t" + "strh w17, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_decrypt_arm64_crypto_256_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" - "strb w17, [%x[tmp]], #1\n\t" + "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_256_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], x14\n\t" - "ld1 {v15.2d}, [%x[tmp]]\n\t" + "sub x11, x11, x14\n\t" + "ld1 {v15.2d}, [x11]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rbit v15.16b, v15.16b\n\t" @@ -13545,40 +13564,40 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v28.d[1], v31.d[0]\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q29, [%x[key]]\n\t" + "ldr q29, [x9]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q30, [%x[key], #16]\n\t" + "ldr q30, [x9, #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "rbit v15.16b, v15.16b\n\t" "eor v14.16b, v14.16b, v15.16b\n\t" - "st1 {v14.2d}, [%x[tmp]]\n\t" + "st1 {v14.2d}, [x11]\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_out_start_dw_%=\n\t" - "ldr x17, [%x[tmp]], #8\n\t" + "ldr x17, [x11], #8\n\t" "sub w14, w14, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_out_start_dw_%=:\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_out_start_sw_%=\n\t" - "ldr w17, [%x[tmp]], #4\n\t" + "ldr w17, [x11], #4\n\t" "sub w14, w14, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_out_start_sw_%=:\n\t" "cmp w14, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_out_start_byte_%=\n\t" - "ldrh w17, [%x[tmp]], #2\n\t" + "ldrh w17, [x11], #2\n\t" "sub w14, w14, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_out_start_byte_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_256_out_end_bytes_%=\n\t" - "ldrb w17, [%x[tmp]], #1\n\t" + "ldrb w17, [x11], #1\n\t" "subs w14, w14, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_256_out_start_byte_%=\n\t" @@ -13586,11 +13605,11 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "L_aes_gcm_decrypt_arm64_crypto_256_out_end_bytes_%=:\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_partial_done_%=:\n\t" - "ld1 {v14.2d}, [%x[reg]]\n\t" - "ubfiz %x[aadSz], %x[aadSz], #3, #32\n\t" - "rbit %x[aadSz], %x[aadSz]\n\t" - "mov v28.d[0], %x[aadSz]\n\t" - "ubfiz %x[sz], %x[sz], #3, #32\n\t" + "ld1 {v14.2d}, [x12]\n\t" + "lsl x8, x8, #3\n\t" + "rbit x8, x8\n\t" + "mov v28.d[0], x8\n\t" + "lsl %x[sz], %x[sz], #3\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "aese v14.16b, v0.16b\n\t" @@ -13626,19 +13645,19 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" - "ldr q11, [%x[key], #-32]\n\t" + "ldr q11, [x9, #-32]\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" - "ldr q12, [%x[key], #-16]\n\t" + "ldr q12, [x9, #-16]\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" - "ldr q29, [%x[key]]\n\t" + "ldr q29, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v26.16b, v26.16b\n\t" - "ldr q30, [%x[key], #16]\n\t" + "ldr q30, [x9, #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v26.16b, v26.16b, v14.16b\n\t" @@ -13648,51 +13667,50 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b L_aes_gcm_decrypt_arm64_crypto_256_tag_loaded_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_part_tag_%=:\n\t" - "ubfiz %x[tagSz], %x[tagSz], #0, #32\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov x17, %x[tagSz]\n\t" - "st1 {v28.2d}, [%x[tmp]]\n\t" + "st1 {v28.2d}, [x11]\n\t" "cmp x17, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_tag_start_dw_%=\n\t" "ldr x16, [%x[tag]], #8\n\t" "sub x17, x17, #8\n\t" - "str x16, [%x[tmp]], #8\n\t" + "str x16, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_tag_start_dw_%=:\n\t" "cmp x17, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_tag_start_sw_%=\n\t" "ldr w16, [%x[tag]], #4\n\t" "sub x17, x17, #4\n\t" - "str w16, [%x[tmp]], #4\n\t" + "str w16, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_tag_start_sw_%=:\n\t" "cmp x17, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_tag_start_byte_%=\n\t" "ldrh w16, [%x[tag]], #2\n\t" "sub x17, x17, #2\n\t" - "strh w16, [%x[tmp]], #2\n\t" + "strh w16, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_tag_start_byte_%=:\n\t" "cbz x17, L_aes_gcm_decrypt_arm64_crypto_256_tag_end_bytes_%=\n\t" "ldrb w16, [%x[tag]], #1\n\t" "subs x17, x17, #1\n\t" - "strb w16, [%x[tmp]], #1\n\t" + "strb w16, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_256_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_tag_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], %x[tagSz]\n\t" - "ld1 {v28.2d}, [%x[tmp]]\n\t" + "sub x11, x11, %x[tagSz]\n\t" + "ld1 {v28.2d}, [x11]\n\t" "mov x17, #16\n\t" - "st1 {v26.2d}, [%x[tmp]]\n\t" + "st1 {v26.2d}, [x11]\n\t" "sub x17, x17, %x[tagSz]\n\t" - "add %x[tmp], %x[tmp], %x[tagSz]\n\t" + "add x11, x11, %x[tagSz]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_calc_tag_byte_%=:\n\t" - "strb wzr, [%x[tmp]], #1\n\t" + "strb wzr, [x11], #1\n\t" "subs x17, x17, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_256_calc_tag_byte_%=\n\t" - "subs %x[tmp], %x[tmp], #16\n\t" - "ld1 {v26.2d}, [%x[tmp]]\n\t" + "subs x11, x11, #16\n\t" + "ld1 {v26.2d}, [x11]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_tag_loaded_%=:\n\t" "eor v28.16b, v28.16b, v26.16b\n\t" @@ -13713,7 +13731,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_decrypt_arm64_crypto_128_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_start_8_%=:\n\t" - "ldr q12, [%x[key]]\n\t" + "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -13746,7 +13764,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v10.s[3], w17\n\t" "rev w16, w15\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [%x[key], #16]\n\t" + "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -13763,7 +13781,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #32]\n\t" + "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -13780,7 +13798,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #48]\n\t" + "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -13797,7 +13815,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #64]\n\t" + "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -13814,7 +13832,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #80]\n\t" + "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -13831,7 +13849,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #96]\n\t" + "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -13848,7 +13866,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #112]\n\t" + "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -13866,7 +13884,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [%x[key], #128]\n\t" + "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -13883,7 +13901,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #144]\n\t" + "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -13904,7 +13922,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #160]\n\t" + "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -13933,14 +13951,14 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" - "ld1 {v13.2d}, [%x[reg]]\n\t" + "ld1 {v13.2d}, [x12]\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_end_8_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_both_8_%=:\n\t" - "ldr q12, [%x[key]]\n\t" + "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -13982,7 +14000,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "rev w16, w15\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [%x[key], #16]\n\t" + "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ @@ -14012,7 +14030,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #32]\n\t" + "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" @@ -14041,7 +14059,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #48]\n\t" + "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" @@ -14069,7 +14087,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #64]\n\t" + "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" @@ -14097,7 +14115,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v30.16b, v30.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #80]\n\t" + "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^6 */ @@ -14127,7 +14145,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #96]\n\t" + "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" @@ -14156,7 +14174,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #112]\n\t" + "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" @@ -14183,7 +14201,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [%x[key], #128]\n\t" + "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" @@ -14208,7 +14226,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #144]\n\t" + "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" @@ -14230,7 +14248,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #160]\n\t" + "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -14247,7 +14265,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" - "ld1 {v13.2d}, [%x[reg]]\n\t" + "ld1 {v13.2d}, [x12]\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" @@ -14359,10 +14377,10 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_128_start_4_%=:\n\t" - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" - "ld1 {v8.2d, v9.2d}, [%x[key]], #32\n\t" - "ld1 {v10.2d}, [%x[key]]\n\t" + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" + "ld1 {v8.2d, v9.2d}, [x9], #32\n\t" + "ld1 {v10.2d}, [x9]\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_done_%=\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_128_start_1_%=\n\t" @@ -14829,37 +14847,37 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_decrypt_arm64_crypto_128_partial_done_%=\n\t" "eor v15.16b, v15.16b, v15.16b\n\t" "mov w19, w14\n\t" - "st1 {v15.2d}, [%x[tmp]]\n\t" + "st1 {v15.2d}, [x11]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" - "str x17, [%x[tmp]], #8\n\t" + "str x17, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" - "str w17, [%x[tmp]], #4\n\t" + "str w17, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" - "strh w17, [%x[tmp]], #2\n\t" + "strh w17, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_decrypt_arm64_crypto_128_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" - "strb w17, [%x[tmp]], #1\n\t" + "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_128_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], x14\n\t" - "ld1 {v15.2d}, [%x[tmp]]\n\t" + "sub x11, x11, x14\n\t" + "ld1 {v15.2d}, [x11]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rbit v15.16b, v15.16b\n\t" @@ -14904,30 +14922,30 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "rbit v15.16b, v15.16b\n\t" "eor v14.16b, v14.16b, v15.16b\n\t" - "st1 {v14.2d}, [%x[tmp]]\n\t" + "st1 {v14.2d}, [x11]\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_out_start_dw_%=\n\t" - "ldr x17, [%x[tmp]], #8\n\t" + "ldr x17, [x11], #8\n\t" "sub w14, w14, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_out_start_dw_%=:\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_out_start_sw_%=\n\t" - "ldr w17, [%x[tmp]], #4\n\t" + "ldr w17, [x11], #4\n\t" "sub w14, w14, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_out_start_sw_%=:\n\t" "cmp w14, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_out_start_byte_%=\n\t" - "ldrh w17, [%x[tmp]], #2\n\t" + "ldrh w17, [x11], #2\n\t" "sub w14, w14, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_out_start_byte_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_128_out_end_bytes_%=\n\t" - "ldrb w17, [%x[tmp]], #1\n\t" + "ldrb w17, [x11], #1\n\t" "subs w14, w14, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_128_out_start_byte_%=\n\t" @@ -14935,11 +14953,11 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "L_aes_gcm_decrypt_arm64_crypto_128_out_end_bytes_%=:\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_partial_done_%=:\n\t" - "ld1 {v14.2d}, [%x[reg]]\n\t" - "ubfiz %x[aadSz], %x[aadSz], #3, #32\n\t" - "rbit %x[aadSz], %x[aadSz]\n\t" - "mov v28.d[0], %x[aadSz]\n\t" - "ubfiz %x[sz], %x[sz], #3, #32\n\t" + "ld1 {v14.2d}, [x12]\n\t" + "lsl x8, x8, #3\n\t" + "rbit x8, x8\n\t" + "mov v28.d[0], x8\n\t" + "lsl %x[sz], %x[sz], #3\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" @@ -14985,51 +15003,50 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b L_aes_gcm_decrypt_arm64_crypto_128_tag_loaded_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_part_tag_%=:\n\t" - "ubfiz %x[tagSz], %x[tagSz], #0, #32\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov x17, %x[tagSz]\n\t" - "st1 {v28.2d}, [%x[tmp]]\n\t" + "st1 {v28.2d}, [x11]\n\t" "cmp x17, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_tag_start_dw_%=\n\t" "ldr x16, [%x[tag]], #8\n\t" "sub x17, x17, #8\n\t" - "str x16, [%x[tmp]], #8\n\t" + "str x16, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_tag_start_dw_%=:\n\t" "cmp x17, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_tag_start_sw_%=\n\t" "ldr w16, [%x[tag]], #4\n\t" "sub x17, x17, #4\n\t" - "str w16, [%x[tmp]], #4\n\t" + "str w16, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_tag_start_sw_%=:\n\t" "cmp x17, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_tag_start_byte_%=\n\t" "ldrh w16, [%x[tag]], #2\n\t" "sub x17, x17, #2\n\t" - "strh w16, [%x[tmp]], #2\n\t" + "strh w16, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_tag_start_byte_%=:\n\t" "cbz x17, L_aes_gcm_decrypt_arm64_crypto_128_tag_end_bytes_%=\n\t" "ldrb w16, [%x[tag]], #1\n\t" "subs x17, x17, #1\n\t" - "strb w16, [%x[tmp]], #1\n\t" + "strb w16, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_128_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_tag_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], %x[tagSz]\n\t" - "ld1 {v28.2d}, [%x[tmp]]\n\t" + "sub x11, x11, %x[tagSz]\n\t" + "ld1 {v28.2d}, [x11]\n\t" "mov x17, #16\n\t" - "st1 {v26.2d}, [%x[tmp]]\n\t" + "st1 {v26.2d}, [x11]\n\t" "sub x17, x17, %x[tagSz]\n\t" - "add %x[tmp], %x[tmp], %x[tagSz]\n\t" + "add x11, x11, %x[tagSz]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_calc_tag_byte_%=:\n\t" - "strb wzr, [%x[tmp]], #1\n\t" + "strb wzr, [x11], #1\n\t" "subs x17, x17, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_128_calc_tag_byte_%=\n\t" - "subs %x[tmp], %x[tmp], #16\n\t" - "ld1 {v26.2d}, [%x[tmp]]\n\t" + "subs x11, x11, #16\n\t" + "ld1 {v26.2d}, [x11]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_tag_loaded_%=:\n\t" "eor v28.16b, v28.16b, v26.16b\n\t" @@ -15043,11 +15060,12 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, #endif /* !NO_AES_128 */ "\n" "L_aes_gcm_decrypt_arm64_crypto_done_%=:\n\t" - : [in] "+r" (in), [out] "+r" (out), [sz] "+r" (sz), - [nonceSz] "+r" (nonceSz), [tagSz] "+r" (tagSz), [aadSz] "+r" (aadSz), - [key] "+r" (key), [gcm_h] "+r" (gcm_h), [tmp] "+r" (tmp), - [reg] "+r" (reg), [nr] "+r" (nr) - : [nonce] "r" (nonce), [tag] "r" (tag), [aad] "r" (aad) + "ldp x29, x30, [sp], #0x50\n\t" + : [out] "+r" (out), [sz] "+r" (sz), [nonceSz] "+r" (nonceSz), + [tagSz] "+r" (tagSz), [aadSz] "+r" (aadSz), [key] "+r" (key), + [gcm_h] "+r" (gcm_h), [tmp] "+r" (tmp), [reg] "+r" (reg), + [nr] "+r" (nr) + : [in] "r" (in), [nonce] "r" (nonce), [tag] "r" (tag), [aad] "r" (aad) : "memory", "cc", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", @@ -15064,11 +15082,19 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, word32 aadSz, byte* key, byte* gcm_h, byte* tmp, byte* reg, int nr) { __asm__ __volatile__ ( + "stp x29, x30, [sp, #-80]!\n\t" + "add x29, sp, #0\n\t" + "str %w[nr], [sp, #72]\n\t" + "str %x[reg], [sp, #64]\n\t" + "str %x[tmp], [sp, #56]\n\t" + "str %x[gcm_h], [sp, #48]\n\t" + "str %x[key], [sp, #40]\n\t" + "str %w[aadSz], [sp, #32]\n\t" "movi v27.16b, #0x87\n\t" "eor v26.16b, v26.16b, v26.16b\n\t" "ushr v27.2d, v27.2d, #56\n\t" - "ld1 {v22.2d}, [%x[gcm_h]]\n\t" - "cmp %w[aadSz], #0x40\n\t" + "ld1 {v22.2d}, [x10]\n\t" + "cmp w8, #0x40\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #32\n\t" "csetm x17, lt\n\t" @@ -15083,7 +15109,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v23.16b, v30.16b, v31.16b\n\t" - "cmp %w[aadSz], #0x100\n\t" + "cmp w8, #0x100\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #0x40\n\t" "csetm x17, lt\n\t" @@ -15113,7 +15139,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov v30.d[1], v29.d[0]\n\t" "eor v25.16b, v30.16b, v31.16b\n\t" /* Done */ - "cmp %w[aadSz], #0x400\n\t" + "cmp w8, #0x400\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #0x200\n\t" "csetm x17, lt\n\t" @@ -15168,7 +15194,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_h_done_%=:\n\t" - "lsr w14, %w[aadSz], #4\n\t" + "lsr w14, w8, #4\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_1_%=\n\t" "cmp w14, #16\n\t" @@ -15391,41 +15417,41 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_aad_both_1_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_done_%=:\n\t" - "and w14, %w[aadSz], #15\n\t" + "and w14, w8, #15\n\t" "cbz w14, L_aes_gcm_encrypt_arm64_crypto_eor3_aad_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov w20, w14\n\t" - "st1 {v28.2d}, [%x[tmp]]\n\t" + "st1 {v28.2d}, [x11]\n\t" "cmp w20, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_dw_%=\n\t" "ldr x19, [%x[aad]], #8\n\t" "sub w20, w20, #8\n\t" - "str x19, [%x[tmp]], #8\n\t" + "str x19, [x11], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_sw_%=\n\t" "ldr w19, [%x[aad]], #4\n\t" "sub w20, w20, #4\n\t" - "str w19, [%x[tmp]], #4\n\t" + "str w19, [x11], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_byte_%=\n\t" "ldrh w19, [%x[aad]], #2\n\t" "sub w20, w20, #2\n\t" - "strh w19, [%x[tmp]], #2\n\t" + "strh w19, [x11], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_encrypt_arm64_crypto_eor3_aad_end_bytes_%=\n\t" "ldrb w19, [%x[aad]], #1\n\t" "subs w20, w20, #1\n\t" - "strb w19, [%x[tmp]], #1\n\t" + "strb w19, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], x14\n\t" - "ld1 {v18.2d}, [%x[tmp]]\n\t" + "sub x11, x11, x14\n\t" + "ld1 {v18.2d}, [x11]\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ @@ -15488,37 +15514,37 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "cbz x24, L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov w20, w24\n\t" - "st1 {v28.2d}, [%x[tmp]]\n\t" + "st1 {v28.2d}, [x11]\n\t" "cmp w20, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_dw_%=\n\t" "ldr x19, [%x[nonce]], #8\n\t" "sub w20, w20, #8\n\t" - "str x19, [%x[tmp]], #8\n\t" + "str x19, [x11], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_sw_%=\n\t" "ldr w19, [%x[nonce]], #4\n\t" "sub w20, w20, #4\n\t" - "str w19, [%x[tmp]], #4\n\t" + "str w19, [x11], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_byte_%=\n\t" "ldrh w19, [%x[nonce]], #2\n\t" "sub w20, w20, #2\n\t" - "strh w19, [%x[tmp]], #2\n\t" + "strh w19, [x11], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_end_bytes_%=\n\t" "ldrb w19, [%x[nonce]], #1\n\t" "subs w20, w20, #1\n\t" - "strb w19, [%x[tmp]], #1\n\t" + "strb w19, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], x24\n\t" - "ld1 {v18.2d}, [%x[tmp]]\n\t" + "sub x11, x11, x24\n\t" + "ld1 {v18.2d}, [x11]\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v13.16b, v18.16b\n\t" /* X = C * H^1 */ @@ -15539,7 +15565,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_partial_done_%=:\n\t" "eor x14, x14, x14\n\t" - "ubfiz x24, %x[nonceSz], #3, #32\n\t" + "lsl x24, %x[nonceSz], #3\n\t" "mov v28.d[0], x14\n\t" "mov v28.d[1], x24\n\t" "rev64 v28.16b, v28.16b\n\t" @@ -15562,9 +15588,9 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "rev w15, w15\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_done_nonce_%=:\n\t" - "st1 {v13.2d}, [%x[reg]]\n\t" + "st1 {v13.2d}, [x12]\n\t" "lsr w14, %w[sz], #4\n\t" - "cmp %w[nr], #12\n\t" + "cmp w13, #12\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_start_128_%=\n\t" "b.gt L_aes_gcm_encrypt_arm64_crypto_eor3_start_256_%=\n\t" /* AES_GCM_192 */ @@ -15573,7 +15599,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_8_%=:\n\t" - "ldr q12, [%x[key]]\n\t" + "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -15606,7 +15632,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [%x[key], #16]\n\t" + "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -15623,7 +15649,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #32]\n\t" + "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -15640,7 +15666,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #48]\n\t" + "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -15657,7 +15683,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #64]\n\t" + "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -15674,7 +15700,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #80]\n\t" + "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -15691,7 +15717,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #96]\n\t" + "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -15708,7 +15734,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #112]\n\t" + "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -15726,7 +15752,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [%x[key], #128]\n\t" + "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -15743,7 +15769,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #144]\n\t" + "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -15764,7 +15790,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #160]\n\t" + "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -15785,7 +15811,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #176]\n\t" + "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -15802,7 +15828,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #192]\n\t" + "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -15827,14 +15853,14 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" - "ld1 {v13.2d}, [%x[reg]]\n\t" + "ld1 {v13.2d}, [x12]\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_8_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_both_8_%=:\n\t" - "ldr q12, [%x[key]]\n\t" + "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -15875,7 +15901,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [%x[key], #16]\n\t" + "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" @@ -15905,7 +15931,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #32]\n\t" + "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" @@ -15932,7 +15958,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #48]\n\t" + "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" @@ -15960,7 +15986,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #64]\n\t" + "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" @@ -15987,7 +16013,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #80]\n\t" + "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -16013,7 +16039,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #96]\n\t" + "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^7 */ @@ -16042,7 +16068,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #112]\n\t" + "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" @@ -16069,7 +16095,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [%x[key], #128]\n\t" + "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" @@ -16093,7 +16119,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #144]\n\t" + "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" @@ -16116,7 +16142,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #160]\n\t" + "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -16133,7 +16159,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #176]\n\t" + "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -16150,7 +16176,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #192]\n\t" + "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -16167,7 +16193,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" - "ld1 {v13.2d}, [%x[reg]]\n\t" + "ld1 {v13.2d}, [x12]\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" @@ -16271,10 +16297,10 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_4_%=:\n\t" - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" - "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" - "ld1 {v12.2d}, [%x[key]]\n\t" + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" + "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" + "ld1 {v12.2d}, [x9]\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_done_%=\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_1_%=\n\t" @@ -16774,37 +16800,37 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_192_partial_done_%=\n\t" "eor v16.16b, v16.16b, v16.16b\n\t" "mov w19, w14\n\t" - "st1 {v16.2d}, [%x[tmp]]\n\t" + "st1 {v16.2d}, [x11]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" - "str x17, [%x[tmp]], #8\n\t" + "str x17, [x11], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" - "str w17, [%x[tmp]], #4\n\t" + "str w17, [x11], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" - "strh w17, [%x[tmp]], #2\n\t" + "strh w17, [x11], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" - "strb w17, [%x[tmp]], #1\n\t" + "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], x14\n\t" - "ld1 {v16.2d}, [%x[tmp]]\n\t" + "sub x11, x11, x14\n\t" + "ld1 {v16.2d}, [x11]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" @@ -16834,31 +16860,31 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "eor v16.16b, v16.16b, v14.16b\n\t" - "st1 {v16.2d}, [%x[tmp]]\n\t" + "st1 {v16.2d}, [x11]\n\t" "mov w19, w14\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_dw_%=\n\t" - "ldr x17, [%x[tmp]], #8\n\t" + "ldr x17, [x11], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_sw_%=\n\t" - "ldr w17, [%x[tmp]], #4\n\t" + "ldr w17, [x11], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_byte_%=\n\t" - "ldrh w17, [%x[tmp]], #2\n\t" + "ldrh w17, [x11], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_end_bytes_%=\n\t" - "ldrb w17, [%x[tmp]], #1\n\t" + "ldrb w17, [x11], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_byte_%=\n\t" @@ -16869,10 +16895,10 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_zero_%=:\n\t" "subs x17, x17, #1\n\t" - "strb wzr, [%x[tmp]], #1\n\t" + "strb wzr, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_zero_%=\n\t" - "sub %x[tmp], %x[tmp], #16\n\t" - "ld1 {v14.2d}, [%x[tmp]]\n\t" + "sub x11, x11, #16\n\t" + "ld1 {v14.2d}, [x11]\n\t" "rbit v14.16b, v14.16b\n\t" "eor v15.16b, v26.16b, v14.16b\n\t" /* X = C * H^1 */ @@ -16892,11 +16918,11 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_partial_done_%=:\n\t" - "ld1 {v14.2d}, [%x[reg]]\n\t" - "ubfiz %x[aadSz], %x[aadSz], #3, #32\n\t" - "rbit %x[aadSz], %x[aadSz]\n\t" - "mov v28.d[0], %x[aadSz]\n\t" - "ubfiz %x[sz], %x[sz], #3, #32\n\t" + "ld1 {v14.2d}, [x12]\n\t" + "lsl x8, x8, #3\n\t" + "rbit x8, x8\n\t" + "mov v28.d[0], x8\n\t" + "lsl %x[sz], %x[sz], #3\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" @@ -16945,30 +16971,30 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_partial_%=:\n\t" - "st1 {v26.16b}, [%x[tmp]]\n\t" + "st1 {v26.16b}, [x11]\n\t" "cmp %w[tagSz], #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_dw_%=\n\t" - "ldr x16, [%x[tmp]], #8\n\t" + "ldr x16, [x11], #8\n\t" "sub %w[tagSz], %w[tagSz], #8\n\t" "str x16, [%x[tag]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_dw_%=:\n\t" "cmp %w[tagSz], #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_sw_%=\n\t" - "ldr w16, [%x[tmp]], #4\n\t" + "ldr w16, [x11], #4\n\t" "sub %w[tagSz], %w[tagSz], #4\n\t" "str w16, [%x[tag]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_sw_%=:\n\t" "cmp %w[tagSz], #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_byte_%=\n\t" - "ldrh w16, [%x[tmp]], #2\n\t" + "ldrh w16, [x11], #2\n\t" "sub %w[tagSz], %w[tagSz], #2\n\t" "strh w16, [%x[tag]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_byte_%=:\n\t" "cbz %w[tagSz], L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_end_bytes_%=\n\t" - "ldrb w16, [%x[tmp]], #1\n\t" + "ldrb w16, [x11], #1\n\t" "subs %w[tagSz], %w[tagSz], #1\n\t" "strb w16, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_byte_%=\n\t" @@ -16984,7 +17010,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_8_%=:\n\t" - "ldr q12, [%x[key]]\n\t" + "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -17017,7 +17043,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [%x[key], #16]\n\t" + "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -17034,7 +17060,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #32]\n\t" + "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -17051,7 +17077,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #48]\n\t" + "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -17068,7 +17094,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #64]\n\t" + "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -17085,7 +17111,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #80]\n\t" + "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -17102,7 +17128,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #96]\n\t" + "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -17119,7 +17145,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #112]\n\t" + "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -17137,7 +17163,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [%x[key], #128]\n\t" + "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -17154,7 +17180,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #144]\n\t" + "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -17175,7 +17201,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #160]\n\t" + "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -17196,7 +17222,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #176]\n\t" + "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -17213,7 +17239,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #192]\n\t" + "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -17230,7 +17256,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #208]\n\t" + "ldr q13, [x9, #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -17247,7 +17273,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #224]\n\t" + "ldr q12, [x9, #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -17272,14 +17298,14 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" - "ld1 {v13.2d}, [%x[reg]]\n\t" + "ld1 {v13.2d}, [x12]\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_8_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_both_8_%=:\n\t" - "ldr q12, [%x[key]]\n\t" + "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -17320,7 +17346,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [%x[key], #16]\n\t" + "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" @@ -17350,7 +17376,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #32]\n\t" + "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" @@ -17377,7 +17403,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #48]\n\t" + "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" @@ -17405,7 +17431,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #64]\n\t" + "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" @@ -17432,7 +17458,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #80]\n\t" + "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -17458,7 +17484,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #96]\n\t" + "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^7 */ @@ -17487,7 +17513,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #112]\n\t" + "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" @@ -17514,7 +17540,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [%x[key], #128]\n\t" + "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" @@ -17538,7 +17564,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #144]\n\t" + "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" @@ -17561,7 +17587,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #160]\n\t" + "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -17578,7 +17604,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #176]\n\t" + "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -17595,7 +17621,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #192]\n\t" + "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -17612,7 +17638,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #208]\n\t" + "ldr q13, [x9, #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -17629,7 +17655,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #224]\n\t" + "ldr q12, [x9, #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -17646,7 +17672,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" - "ld1 {v13.2d}, [%x[reg]]\n\t" + "ld1 {v13.2d}, [x12]\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" @@ -17750,10 +17776,10 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_4_%=:\n\t" - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" - "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" - "ld1 {v12.2d}, [%x[key]], #16\n\t" + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" + "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" + "ld1 {v12.2d}, [x9], #16\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_done_%=\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_1_%=\n\t" @@ -17876,7 +17902,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" - "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" + "ld1 {v29.2d, v30.2d}, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -18063,7 +18089,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" - "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" + "ld1 {v29.2d, v30.2d}, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -18200,7 +18226,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" - "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" + "ld1 {v29.2d, v30.2d}, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -18271,10 +18297,10 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q29, [%x[key]]\n\t" + "ldr q29, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q30, [%x[key], #16]\n\t" + "ldr q30, [x9, #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" @@ -18302,37 +18328,37 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_256_partial_done_%=\n\t" "eor v16.16b, v16.16b, v16.16b\n\t" "mov w19, w14\n\t" - "st1 {v16.2d}, [%x[tmp]]\n\t" + "st1 {v16.2d}, [x11]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" - "str x17, [%x[tmp]], #8\n\t" + "str x17, [x11], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" - "str w17, [%x[tmp]], #4\n\t" + "str w17, [x11], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" - "strh w17, [%x[tmp]], #2\n\t" + "strh w17, [x11], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" - "strb w17, [%x[tmp]], #1\n\t" + "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], x14\n\t" - "ld1 {v16.2d}, [%x[tmp]]\n\t" + "sub x11, x11, x14\n\t" + "ld1 {v16.2d}, [x11]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" @@ -18361,38 +18387,38 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q29, [%x[key]]\n\t" + "ldr q29, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q30, [%x[key], #16]\n\t" + "ldr q30, [x9, #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v16.16b, v16.16b, v14.16b\n\t" - "st1 {v16.2d}, [%x[tmp]]\n\t" + "st1 {v16.2d}, [x11]\n\t" "mov w19, w14\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_dw_%=\n\t" - "ldr x17, [%x[tmp]], #8\n\t" + "ldr x17, [x11], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_sw_%=\n\t" - "ldr w17, [%x[tmp]], #4\n\t" + "ldr w17, [x11], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_byte_%=\n\t" - "ldrh w17, [%x[tmp]], #2\n\t" + "ldrh w17, [x11], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_end_bytes_%=\n\t" - "ldrb w17, [%x[tmp]], #1\n\t" + "ldrb w17, [x11], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_byte_%=\n\t" @@ -18403,10 +18429,10 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_zero_%=:\n\t" "subs x17, x17, #1\n\t" - "strb wzr, [%x[tmp]], #1\n\t" + "strb wzr, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_zero_%=\n\t" - "sub %x[tmp], %x[tmp], #16\n\t" - "ld1 {v14.2d}, [%x[tmp]]\n\t" + "sub x11, x11, #16\n\t" + "ld1 {v14.2d}, [x11]\n\t" "rbit v14.16b, v14.16b\n\t" "eor v15.16b, v26.16b, v14.16b\n\t" /* X = C * H^1 */ @@ -18426,11 +18452,11 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_partial_done_%=:\n\t" - "ld1 {v14.2d}, [%x[reg]]\n\t" - "ubfiz %x[aadSz], %x[aadSz], #3, #32\n\t" - "rbit %x[aadSz], %x[aadSz]\n\t" - "mov v28.d[0], %x[aadSz]\n\t" - "ubfiz %x[sz], %x[sz], #3, #32\n\t" + "ld1 {v14.2d}, [x12]\n\t" + "lsl x8, x8, #3\n\t" + "rbit x8, x8\n\t" + "mov v28.d[0], x8\n\t" + "lsl %x[sz], %x[sz], #3\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "aese v14.16b, v0.16b\n\t" @@ -18466,18 +18492,18 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" - "ldr q11, [%x[key], #-32]\n\t" + "ldr q11, [x9, #-32]\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" - "ldr q12, [%x[key], #-16]\n\t" + "ldr q12, [x9, #-16]\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q29, [%x[key]]\n\t" + "ldr q29, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v26.16b, v26.16b\n\t" - "ldr q30, [%x[key], #16]\n\t" + "ldr q30, [x9, #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v26.16b, v26.16b, v14.16b\n\t" @@ -18487,30 +18513,30 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_partial_%=:\n\t" - "st1 {v26.16b}, [%x[tmp]]\n\t" + "st1 {v26.16b}, [x11]\n\t" "cmp %w[tagSz], #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_dw_%=\n\t" - "ldr x16, [%x[tmp]], #8\n\t" + "ldr x16, [x11], #8\n\t" "sub %w[tagSz], %w[tagSz], #8\n\t" "str x16, [%x[tag]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_dw_%=:\n\t" "cmp %w[tagSz], #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_sw_%=\n\t" - "ldr w16, [%x[tmp]], #4\n\t" + "ldr w16, [x11], #4\n\t" "sub %w[tagSz], %w[tagSz], #4\n\t" "str w16, [%x[tag]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_sw_%=:\n\t" "cmp %w[tagSz], #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_byte_%=\n\t" - "ldrh w16, [%x[tmp]], #2\n\t" + "ldrh w16, [x11], #2\n\t" "sub %w[tagSz], %w[tagSz], #2\n\t" "strh w16, [%x[tag]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_byte_%=:\n\t" "cbz %w[tagSz], L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_end_bytes_%=\n\t" - "ldrb w16, [%x[tmp]], #1\n\t" + "ldrb w16, [x11], #1\n\t" "subs %w[tagSz], %w[tagSz], #1\n\t" "strb w16, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_byte_%=\n\t" @@ -18526,7 +18552,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_8_%=:\n\t" - "ldr q12, [%x[key]]\n\t" + "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -18559,7 +18585,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [%x[key], #16]\n\t" + "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -18576,7 +18602,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #32]\n\t" + "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -18593,7 +18619,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #48]\n\t" + "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -18610,7 +18636,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #64]\n\t" + "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -18627,7 +18653,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #80]\n\t" + "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -18644,7 +18670,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #96]\n\t" + "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -18661,7 +18687,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #112]\n\t" + "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -18679,7 +18705,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [%x[key], #128]\n\t" + "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -18696,7 +18722,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #144]\n\t" + "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -18717,7 +18743,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #160]\n\t" + "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -18746,14 +18772,14 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" - "ld1 {v13.2d}, [%x[reg]]\n\t" + "ld1 {v13.2d}, [x12]\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_8_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_both_8_%=:\n\t" - "ldr q12, [%x[key]]\n\t" + "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -18794,7 +18820,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [%x[key], #16]\n\t" + "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" @@ -18824,7 +18850,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #32]\n\t" + "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" @@ -18851,7 +18877,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #48]\n\t" + "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" @@ -18879,7 +18905,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #64]\n\t" + "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" @@ -18906,7 +18932,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #80]\n\t" + "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -18932,7 +18958,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #96]\n\t" + "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^7 */ @@ -18961,7 +18987,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #112]\n\t" + "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" @@ -18988,7 +19014,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [%x[key], #128]\n\t" + "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" @@ -19012,7 +19038,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #144]\n\t" + "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" @@ -19035,7 +19061,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #160]\n\t" + "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -19052,7 +19078,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" - "ld1 {v13.2d}, [%x[reg]]\n\t" + "ld1 {v13.2d}, [x12]\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" @@ -19156,10 +19182,10 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_4_%=:\n\t" - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" - "ld1 {v8.2d, v9.2d}, [%x[key]], #32\n\t" - "ld1 {v10.2d}, [%x[key]]\n\t" + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" + "ld1 {v8.2d, v9.2d}, [x9], #32\n\t" + "ld1 {v10.2d}, [x9]\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_done_%=\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_1_%=\n\t" @@ -19615,37 +19641,37 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_128_partial_done_%=\n\t" "eor v16.16b, v16.16b, v16.16b\n\t" "mov w19, w14\n\t" - "st1 {v16.2d}, [%x[tmp]]\n\t" + "st1 {v16.2d}, [x11]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" - "str x17, [%x[tmp]], #8\n\t" + "str x17, [x11], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" - "str w17, [%x[tmp]], #4\n\t" + "str w17, [x11], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" - "strh w17, [%x[tmp]], #2\n\t" + "strh w17, [x11], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" - "strb w17, [%x[tmp]], #1\n\t" + "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], x14\n\t" - "ld1 {v16.2d}, [%x[tmp]]\n\t" + "sub x11, x11, x14\n\t" + "ld1 {v16.2d}, [x11]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" @@ -19671,31 +19697,31 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "eor v16.16b, v16.16b, v14.16b\n\t" - "st1 {v16.2d}, [%x[tmp]]\n\t" + "st1 {v16.2d}, [x11]\n\t" "mov w19, w14\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_dw_%=\n\t" - "ldr x17, [%x[tmp]], #8\n\t" + "ldr x17, [x11], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_sw_%=\n\t" - "ldr w17, [%x[tmp]], #4\n\t" + "ldr w17, [x11], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_byte_%=\n\t" - "ldrh w17, [%x[tmp]], #2\n\t" + "ldrh w17, [x11], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_end_bytes_%=\n\t" - "ldrb w17, [%x[tmp]], #1\n\t" + "ldrb w17, [x11], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_byte_%=\n\t" @@ -19706,10 +19732,10 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_zero_%=:\n\t" "subs x17, x17, #1\n\t" - "strb wzr, [%x[tmp]], #1\n\t" + "strb wzr, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_zero_%=\n\t" - "sub %x[tmp], %x[tmp], #16\n\t" - "ld1 {v14.2d}, [%x[tmp]]\n\t" + "sub x11, x11, #16\n\t" + "ld1 {v14.2d}, [x11]\n\t" "rbit v14.16b, v14.16b\n\t" "eor v15.16b, v26.16b, v14.16b\n\t" /* X = C * H^1 */ @@ -19729,11 +19755,11 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_partial_done_%=:\n\t" - "ld1 {v14.2d}, [%x[reg]]\n\t" - "ubfiz %x[aadSz], %x[aadSz], #3, #32\n\t" - "rbit %x[aadSz], %x[aadSz]\n\t" - "mov v28.d[0], %x[aadSz]\n\t" - "ubfiz %x[sz], %x[sz], #3, #32\n\t" + "ld1 {v14.2d}, [x12]\n\t" + "lsl x8, x8, #3\n\t" + "rbit x8, x8\n\t" + "mov v28.d[0], x8\n\t" + "lsl %x[sz], %x[sz], #3\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" @@ -19778,30 +19804,30 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_partial_%=:\n\t" - "st1 {v26.16b}, [%x[tmp]]\n\t" + "st1 {v26.16b}, [x11]\n\t" "cmp %w[tagSz], #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_dw_%=\n\t" - "ldr x16, [%x[tmp]], #8\n\t" + "ldr x16, [x11], #8\n\t" "sub %w[tagSz], %w[tagSz], #8\n\t" "str x16, [%x[tag]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_dw_%=:\n\t" "cmp %w[tagSz], #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_sw_%=\n\t" - "ldr w16, [%x[tmp]], #4\n\t" + "ldr w16, [x11], #4\n\t" "sub %w[tagSz], %w[tagSz], #4\n\t" "str w16, [%x[tag]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_sw_%=:\n\t" "cmp %w[tagSz], #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_byte_%=\n\t" - "ldrh w16, [%x[tmp]], #2\n\t" + "ldrh w16, [x11], #2\n\t" "sub %w[tagSz], %w[tagSz], #2\n\t" "strh w16, [%x[tag]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_byte_%=:\n\t" "cbz %w[tagSz], L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_end_bytes_%=\n\t" - "ldrb w16, [%x[tmp]], #1\n\t" + "ldrb w16, [x11], #1\n\t" "subs %w[tagSz], %w[tagSz], #1\n\t" "strb w16, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_byte_%=\n\t" @@ -19810,6 +19836,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, #endif /* !NO_AES_128 */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=:\n\t" + "ldp x29, x30, [sp], #0x50\n\t" : [out] "+r" (out), [sz] "+r" (sz), [nonceSz] "+r" (nonceSz), [tag] "+r" (tag), [tagSz] "+r" (tagSz), [aadSz] "+r" (aadSz), [key] "+r" (key), [gcm_h] "+r" (gcm_h), [tmp] "+r" (tmp), @@ -19830,11 +19857,19 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, int nr) { __asm__ __volatile__ ( + "stp x29, x30, [sp, #-80]!\n\t" + "add x29, sp, #0\n\t" + "str %w[nr], [sp, #72]\n\t" + "str %x[reg], [sp, #64]\n\t" + "str %x[tmp], [sp, #56]\n\t" + "str %x[gcm_h], [sp, #48]\n\t" + "str %x[key], [sp, #40]\n\t" + "str %w[aadSz], [sp, #32]\n\t" "movi v27.16b, #0x87\n\t" "eor v26.16b, v26.16b, v26.16b\n\t" "ushr v27.2d, v27.2d, #56\n\t" - "ld1 {v22.2d}, [%x[gcm_h]]\n\t" - "cmp %w[aadSz], #0x40\n\t" + "ld1 {v22.2d}, [x10]\n\t" + "cmp w8, #0x40\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #32\n\t" "csetm x17, lt\n\t" @@ -19849,7 +19884,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v23.16b, v30.16b, v31.16b\n\t" - "cmp %w[aadSz], #0x100\n\t" + "cmp w8, #0x100\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #0x40\n\t" "csetm x17, lt\n\t" @@ -19879,7 +19914,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov v30.d[1], v29.d[0]\n\t" "eor v25.16b, v30.16b, v31.16b\n\t" /* Done */ - "cmp %w[aadSz], #0x400\n\t" + "cmp w8, #0x400\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #0x200\n\t" "csetm x17, lt\n\t" @@ -19934,7 +19969,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done */ "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_h_done_%=:\n\t" - "lsr w14, %w[aadSz], #4\n\t" + "lsr w14, w8, #4\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_1_%=\n\t" "cmp w14, #16\n\t" @@ -20157,41 +20192,41 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_aad_both_1_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_done_%=:\n\t" - "and w14, %w[aadSz], #15\n\t" + "and w14, w8, #15\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_aad_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov w20, w14\n\t" - "st1 {v28.2d}, [%x[tmp]]\n\t" + "st1 {v28.2d}, [x11]\n\t" "cmp w20, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_dw_%=\n\t" "ldr x19, [%x[aad]], #8\n\t" "sub w20, w20, #8\n\t" - "str x19, [%x[tmp]], #8\n\t" + "str x19, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_sw_%=\n\t" "ldr w19, [%x[aad]], #4\n\t" "sub w20, w20, #4\n\t" - "str w19, [%x[tmp]], #4\n\t" + "str w19, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_byte_%=\n\t" "ldrh w19, [%x[aad]], #2\n\t" "sub w20, w20, #2\n\t" - "strh w19, [%x[tmp]], #2\n\t" + "strh w19, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_decrypt_arm64_crypto_eor3_aad_end_bytes_%=\n\t" "ldrb w19, [%x[aad]], #1\n\t" "subs w20, w20, #1\n\t" - "strb w19, [%x[tmp]], #1\n\t" + "strb w19, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], x14\n\t" - "ld1 {v18.2d}, [%x[tmp]]\n\t" + "sub x11, x11, x14\n\t" + "ld1 {v18.2d}, [x11]\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ @@ -20254,37 +20289,37 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "cbz x24, L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov w20, w24\n\t" - "st1 {v28.2d}, [%x[tmp]]\n\t" + "st1 {v28.2d}, [x11]\n\t" "cmp w20, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_dw_%=\n\t" "ldr x19, [%x[nonce]], #8\n\t" "sub w20, w20, #8\n\t" - "str x19, [%x[tmp]], #8\n\t" + "str x19, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_sw_%=\n\t" "ldr w19, [%x[nonce]], #4\n\t" "sub w20, w20, #4\n\t" - "str w19, [%x[tmp]], #4\n\t" + "str w19, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_byte_%=\n\t" "ldrh w19, [%x[nonce]], #2\n\t" "sub w20, w20, #2\n\t" - "strh w19, [%x[tmp]], #2\n\t" + "strh w19, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_end_bytes_%=\n\t" "ldrb w19, [%x[nonce]], #1\n\t" "subs w20, w20, #1\n\t" - "strb w19, [%x[tmp]], #1\n\t" + "strb w19, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], x24\n\t" - "ld1 {v18.2d}, [%x[tmp]]\n\t" + "sub x11, x11, x24\n\t" + "ld1 {v18.2d}, [x11]\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v13.16b, v18.16b\n\t" /* X = C * H^1 */ @@ -20305,7 +20340,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_partial_done_%=:\n\t" "eor x14, x14, x14\n\t" - "ubfiz x24, %x[nonceSz], #3, #32\n\t" + "lsl x24, %x[nonceSz], #3\n\t" "mov v28.d[0], x14\n\t" "mov v28.d[1], x24\n\t" "rev64 v28.16b, v28.16b\n\t" @@ -20328,9 +20363,9 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "rev w15, w15\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_done_nonce_%=:\n\t" - "st1 {v13.2d}, [%x[reg]]\n\t" + "st1 {v13.2d}, [x12]\n\t" "lsr w14, %w[sz], #4\n\t" - "cmp %w[nr], #12\n\t" + "cmp w13, #12\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_start_128_%=\n\t" "b.gt L_aes_gcm_decrypt_arm64_crypto_eor3_start_256_%=\n\t" /* AES_GCM_192 */ @@ -20339,7 +20374,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_8_%=:\n\t" - "ldr q12, [%x[key]]\n\t" + "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -20372,7 +20407,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov v10.s[3], w17\n\t" "rev w16, w15\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [%x[key], #16]\n\t" + "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -20389,7 +20424,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #32]\n\t" + "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -20406,7 +20441,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #48]\n\t" + "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -20423,7 +20458,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #64]\n\t" + "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -20440,7 +20475,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #80]\n\t" + "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -20457,7 +20492,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #96]\n\t" + "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -20474,7 +20509,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #112]\n\t" + "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -20492,7 +20527,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [%x[key], #128]\n\t" + "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -20509,7 +20544,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #144]\n\t" + "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -20530,7 +20565,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #160]\n\t" + "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -20551,7 +20586,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #176]\n\t" + "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -20568,7 +20603,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #192]\n\t" + "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -20593,14 +20628,14 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" - "ld1 {v13.2d}, [%x[reg]]\n\t" + "ld1 {v13.2d}, [x12]\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_8_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_both_8_%=:\n\t" - "ldr q12, [%x[key]]\n\t" + "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -20642,7 +20677,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "rev w16, w15\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [%x[key], #16]\n\t" + "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ @@ -20672,7 +20707,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #32]\n\t" + "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" @@ -20700,7 +20735,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #48]\n\t" + "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" @@ -20727,7 +20762,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #64]\n\t" + "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -20753,7 +20788,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #80]\n\t" + "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^6 */ @@ -20782,7 +20817,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #96]\n\t" + "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" @@ -20810,7 +20845,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #112]\n\t" + "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" @@ -20836,7 +20871,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [%x[key], #128]\n\t" + "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" @@ -20860,7 +20895,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #144]\n\t" + "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" @@ -20882,7 +20917,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #160]\n\t" + "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -20899,7 +20934,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #176]\n\t" + "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -20916,7 +20951,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #192]\n\t" + "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -20933,7 +20968,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" - "ld1 {v13.2d}, [%x[reg]]\n\t" + "ld1 {v13.2d}, [x12]\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" @@ -21037,10 +21072,10 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_4_%=:\n\t" - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" - "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" - "ld1 {v12.2d}, [%x[key]]\n\t" + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" + "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" + "ld1 {v12.2d}, [x9]\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_done_%=\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_1_%=\n\t" @@ -21541,37 +21576,37 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_192_partial_done_%=\n\t" "eor v15.16b, v15.16b, v15.16b\n\t" "mov w19, w14\n\t" - "st1 {v15.2d}, [%x[tmp]]\n\t" + "st1 {v15.2d}, [x11]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" - "str x17, [%x[tmp]], #8\n\t" + "str x17, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" - "str w17, [%x[tmp]], #4\n\t" + "str w17, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" - "strh w17, [%x[tmp]], #2\n\t" + "strh w17, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" - "strb w17, [%x[tmp]], #1\n\t" + "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], x14\n\t" - "ld1 {v15.2d}, [%x[tmp]]\n\t" + "sub x11, x11, x14\n\t" + "ld1 {v15.2d}, [x11]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rbit v15.16b, v15.16b\n\t" @@ -21619,30 +21654,30 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "rbit v15.16b, v15.16b\n\t" "eor v14.16b, v14.16b, v15.16b\n\t" - "st1 {v14.2d}, [%x[tmp]]\n\t" + "st1 {v14.2d}, [x11]\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_dw_%=\n\t" - "ldr x17, [%x[tmp]], #8\n\t" + "ldr x17, [x11], #8\n\t" "sub w14, w14, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_dw_%=:\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_sw_%=\n\t" - "ldr w17, [%x[tmp]], #4\n\t" + "ldr w17, [x11], #4\n\t" "sub w14, w14, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_sw_%=:\n\t" "cmp w14, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_byte_%=\n\t" - "ldrh w17, [%x[tmp]], #2\n\t" + "ldrh w17, [x11], #2\n\t" "sub w14, w14, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_byte_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_end_bytes_%=\n\t" - "ldrb w17, [%x[tmp]], #1\n\t" + "ldrb w17, [x11], #1\n\t" "subs w14, w14, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_byte_%=\n\t" @@ -21650,11 +21685,11 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_end_bytes_%=:\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_partial_done_%=:\n\t" - "ld1 {v14.2d}, [%x[reg]]\n\t" - "ubfiz %x[aadSz], %x[aadSz], #3, #32\n\t" - "rbit %x[aadSz], %x[aadSz]\n\t" - "mov v28.d[0], %x[aadSz]\n\t" - "ubfiz %x[sz], %x[sz], #3, #32\n\t" + "ld1 {v14.2d}, [x12]\n\t" + "lsl x8, x8, #3\n\t" + "rbit x8, x8\n\t" + "mov v28.d[0], x8\n\t" + "lsl %x[sz], %x[sz], #3\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" @@ -21703,51 +21738,50 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_loaded_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_part_tag_%=:\n\t" - "ubfiz %x[tagSz], %x[tagSz], #0, #32\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov x17, %x[tagSz]\n\t" - "st1 {v28.2d}, [%x[tmp]]\n\t" + "st1 {v28.2d}, [x11]\n\t" "cmp x17, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_dw_%=\n\t" "ldr x16, [%x[tag]], #8\n\t" "sub x17, x17, #8\n\t" - "str x16, [%x[tmp]], #8\n\t" + "str x16, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_dw_%=:\n\t" "cmp x17, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_sw_%=\n\t" "ldr w16, [%x[tag]], #4\n\t" "sub x17, x17, #4\n\t" - "str w16, [%x[tmp]], #4\n\t" + "str w16, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_sw_%=:\n\t" "cmp x17, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_byte_%=\n\t" "ldrh w16, [%x[tag]], #2\n\t" "sub x17, x17, #2\n\t" - "strh w16, [%x[tmp]], #2\n\t" + "strh w16, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_byte_%=:\n\t" "cbz x17, L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_end_bytes_%=\n\t" "ldrb w16, [%x[tag]], #1\n\t" "subs x17, x17, #1\n\t" - "strb w16, [%x[tmp]], #1\n\t" + "strb w16, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], %x[tagSz]\n\t" - "ld1 {v28.2d}, [%x[tmp]]\n\t" + "sub x11, x11, %x[tagSz]\n\t" + "ld1 {v28.2d}, [x11]\n\t" "mov x17, #16\n\t" - "st1 {v26.2d}, [%x[tmp]]\n\t" + "st1 {v26.2d}, [x11]\n\t" "sub x17, x17, %x[tagSz]\n\t" - "add %x[tmp], %x[tmp], %x[tagSz]\n\t" + "add x11, x11, %x[tagSz]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_calc_tag_byte_%=:\n\t" - "strb wzr, [%x[tmp]], #1\n\t" + "strb wzr, [x11], #1\n\t" "subs x17, x17, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_192_calc_tag_byte_%=\n\t" - "subs %x[tmp], %x[tmp], #16\n\t" - "ld1 {v26.2d}, [%x[tmp]]\n\t" + "subs x11, x11, #16\n\t" + "ld1 {v26.2d}, [x11]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_loaded_%=:\n\t" "eor v28.16b, v28.16b, v26.16b\n\t" @@ -21768,7 +21802,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_8_%=:\n\t" - "ldr q12, [%x[key]]\n\t" + "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -21801,7 +21835,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov v10.s[3], w17\n\t" "rev w16, w15\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [%x[key], #16]\n\t" + "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -21818,7 +21852,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #32]\n\t" + "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -21835,7 +21869,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #48]\n\t" + "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -21852,7 +21886,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #64]\n\t" + "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -21869,7 +21903,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #80]\n\t" + "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -21886,7 +21920,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #96]\n\t" + "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -21903,7 +21937,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #112]\n\t" + "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -21921,7 +21955,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [%x[key], #128]\n\t" + "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -21938,7 +21972,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #144]\n\t" + "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -21959,7 +21993,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #160]\n\t" + "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -21980,7 +22014,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #176]\n\t" + "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -21997,7 +22031,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #192]\n\t" + "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -22014,7 +22048,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #208]\n\t" + "ldr q13, [x9, #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -22031,7 +22065,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #224]\n\t" + "ldr q12, [x9, #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -22056,14 +22090,14 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" - "ld1 {v13.2d}, [%x[reg]]\n\t" + "ld1 {v13.2d}, [x12]\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_8_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_both_8_%=:\n\t" - "ldr q12, [%x[key]]\n\t" + "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -22105,7 +22139,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "rev w16, w15\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [%x[key], #16]\n\t" + "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ @@ -22135,7 +22169,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #32]\n\t" + "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" @@ -22163,7 +22197,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #48]\n\t" + "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" @@ -22190,7 +22224,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #64]\n\t" + "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -22216,7 +22250,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #80]\n\t" + "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^6 */ @@ -22245,7 +22279,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #96]\n\t" + "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" @@ -22273,7 +22307,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #112]\n\t" + "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" @@ -22299,7 +22333,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [%x[key], #128]\n\t" + "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" @@ -22323,7 +22357,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #144]\n\t" + "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" @@ -22345,7 +22379,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #160]\n\t" + "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -22362,7 +22396,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #176]\n\t" + "ldr q13, [x9, #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -22379,7 +22413,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #192]\n\t" + "ldr q12, [x9, #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -22396,7 +22430,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #208]\n\t" + "ldr q13, [x9, #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -22413,7 +22447,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #224]\n\t" + "ldr q12, [x9, #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -22430,7 +22464,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" - "ld1 {v13.2d}, [%x[reg]]\n\t" + "ld1 {v13.2d}, [x12]\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" @@ -22534,10 +22568,10 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_4_%=:\n\t" - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" - "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" - "ld1 {v12.2d}, [%x[key]], #16\n\t" + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" + "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" + "ld1 {v12.2d}, [x9], #16\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_done_%=\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_1_%=\n\t" @@ -22660,7 +22694,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" - "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" + "ld1 {v29.2d, v30.2d}, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -22847,7 +22881,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" - "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" + "ld1 {v29.2d, v30.2d}, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -22984,7 +23018,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" - "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" + "ld1 {v29.2d, v30.2d}, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -23055,10 +23089,10 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q29, [%x[key]]\n\t" + "ldr q29, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q30, [%x[key], #16]\n\t" + "ldr q30, [x9, #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" @@ -23086,37 +23120,37 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_256_partial_done_%=\n\t" "eor v15.16b, v15.16b, v15.16b\n\t" "mov w19, w14\n\t" - "st1 {v15.2d}, [%x[tmp]]\n\t" + "st1 {v15.2d}, [x11]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" - "str x17, [%x[tmp]], #8\n\t" + "str x17, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" - "str w17, [%x[tmp]], #4\n\t" + "str w17, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" - "strh w17, [%x[tmp]], #2\n\t" + "strh w17, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" - "strb w17, [%x[tmp]], #1\n\t" + "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], x14\n\t" - "ld1 {v15.2d}, [%x[tmp]]\n\t" + "sub x11, x11, x14\n\t" + "ld1 {v15.2d}, [x11]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rbit v15.16b, v15.16b\n\t" @@ -23161,39 +23195,39 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q29, [%x[key]]\n\t" + "ldr q29, [x9]\n\t" /* Done GHASH */ "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q30, [%x[key], #16]\n\t" + "ldr q30, [x9, #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "rbit v15.16b, v15.16b\n\t" "eor v14.16b, v14.16b, v15.16b\n\t" - "st1 {v14.2d}, [%x[tmp]]\n\t" + "st1 {v14.2d}, [x11]\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_dw_%=\n\t" - "ldr x17, [%x[tmp]], #8\n\t" + "ldr x17, [x11], #8\n\t" "sub w14, w14, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_dw_%=:\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_sw_%=\n\t" - "ldr w17, [%x[tmp]], #4\n\t" + "ldr w17, [x11], #4\n\t" "sub w14, w14, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_sw_%=:\n\t" "cmp w14, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_byte_%=\n\t" - "ldrh w17, [%x[tmp]], #2\n\t" + "ldrh w17, [x11], #2\n\t" "sub w14, w14, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_byte_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_end_bytes_%=\n\t" - "ldrb w17, [%x[tmp]], #1\n\t" + "ldrb w17, [x11], #1\n\t" "subs w14, w14, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_byte_%=\n\t" @@ -23201,11 +23235,11 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_end_bytes_%=:\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_partial_done_%=:\n\t" - "ld1 {v14.2d}, [%x[reg]]\n\t" - "ubfiz %x[aadSz], %x[aadSz], #3, #32\n\t" - "rbit %x[aadSz], %x[aadSz]\n\t" - "mov v28.d[0], %x[aadSz]\n\t" - "ubfiz %x[sz], %x[sz], #3, #32\n\t" + "ld1 {v14.2d}, [x12]\n\t" + "lsl x8, x8, #3\n\t" + "rbit x8, x8\n\t" + "mov v28.d[0], x8\n\t" + "lsl %x[sz], %x[sz], #3\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "aese v14.16b, v0.16b\n\t" @@ -23241,18 +23275,18 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" - "ldr q11, [%x[key], #-32]\n\t" + "ldr q11, [x9, #-32]\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" - "ldr q12, [%x[key], #-16]\n\t" + "ldr q12, [x9, #-16]\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q29, [%x[key]]\n\t" + "ldr q29, [x9]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v26.16b, v26.16b\n\t" - "ldr q30, [%x[key], #16]\n\t" + "ldr q30, [x9, #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v26.16b, v26.16b, v14.16b\n\t" @@ -23262,51 +23296,50 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_loaded_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_part_tag_%=:\n\t" - "ubfiz %x[tagSz], %x[tagSz], #0, #32\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov x17, %x[tagSz]\n\t" - "st1 {v28.2d}, [%x[tmp]]\n\t" + "st1 {v28.2d}, [x11]\n\t" "cmp x17, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_dw_%=\n\t" "ldr x16, [%x[tag]], #8\n\t" "sub x17, x17, #8\n\t" - "str x16, [%x[tmp]], #8\n\t" + "str x16, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_dw_%=:\n\t" "cmp x17, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_sw_%=\n\t" "ldr w16, [%x[tag]], #4\n\t" "sub x17, x17, #4\n\t" - "str w16, [%x[tmp]], #4\n\t" + "str w16, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_sw_%=:\n\t" "cmp x17, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_byte_%=\n\t" "ldrh w16, [%x[tag]], #2\n\t" "sub x17, x17, #2\n\t" - "strh w16, [%x[tmp]], #2\n\t" + "strh w16, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_byte_%=:\n\t" "cbz x17, L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_end_bytes_%=\n\t" "ldrb w16, [%x[tag]], #1\n\t" "subs x17, x17, #1\n\t" - "strb w16, [%x[tmp]], #1\n\t" + "strb w16, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], %x[tagSz]\n\t" - "ld1 {v28.2d}, [%x[tmp]]\n\t" + "sub x11, x11, %x[tagSz]\n\t" + "ld1 {v28.2d}, [x11]\n\t" "mov x17, #16\n\t" - "st1 {v26.2d}, [%x[tmp]]\n\t" + "st1 {v26.2d}, [x11]\n\t" "sub x17, x17, %x[tagSz]\n\t" - "add %x[tmp], %x[tmp], %x[tagSz]\n\t" + "add x11, x11, %x[tagSz]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_calc_tag_byte_%=:\n\t" - "strb wzr, [%x[tmp]], #1\n\t" + "strb wzr, [x11], #1\n\t" "subs x17, x17, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_256_calc_tag_byte_%=\n\t" - "subs %x[tmp], %x[tmp], #16\n\t" - "ld1 {v26.2d}, [%x[tmp]]\n\t" + "subs x11, x11, #16\n\t" + "ld1 {v26.2d}, [x11]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_loaded_%=:\n\t" "eor v28.16b, v28.16b, v26.16b\n\t" @@ -23327,7 +23360,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_8_%=:\n\t" - "ldr q12, [%x[key]]\n\t" + "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -23360,7 +23393,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov v10.s[3], w17\n\t" "rev w16, w15\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [%x[key], #16]\n\t" + "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -23377,7 +23410,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #32]\n\t" + "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -23394,7 +23427,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #48]\n\t" + "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -23411,7 +23444,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #64]\n\t" + "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -23428,7 +23461,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #80]\n\t" + "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -23445,7 +23478,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #96]\n\t" + "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -23462,7 +23495,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #112]\n\t" + "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -23480,7 +23513,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [%x[key], #128]\n\t" + "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -23497,7 +23530,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #144]\n\t" + "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -23518,7 +23551,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #160]\n\t" + "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -23547,14 +23580,14 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" - "ld1 {v13.2d}, [%x[reg]]\n\t" + "ld1 {v13.2d}, [x12]\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_8_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_both_8_%=:\n\t" - "ldr q12, [%x[key]]\n\t" + "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -23596,7 +23629,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "rev w16, w15\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [%x[key], #16]\n\t" + "ldr q13, [x9, #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ @@ -23626,7 +23659,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #32]\n\t" + "ldr q12, [x9, #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" @@ -23654,7 +23687,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #48]\n\t" + "ldr q13, [x9, #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" @@ -23681,7 +23714,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #64]\n\t" + "ldr q12, [x9, #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -23707,7 +23740,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #80]\n\t" + "ldr q13, [x9, #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^6 */ @@ -23736,7 +23769,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #96]\n\t" + "ldr q12, [x9, #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" @@ -23764,7 +23797,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #112]\n\t" + "ldr q13, [x9, #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" @@ -23790,7 +23823,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [%x[key], #128]\n\t" + "ldr q12, [x9, #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" @@ -23814,7 +23847,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [%x[key], #144]\n\t" + "ldr q13, [x9, #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" @@ -23836,7 +23869,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [%x[key], #160]\n\t" + "ldr q12, [x9, #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -23853,7 +23886,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" - "ld1 {v13.2d}, [%x[reg]]\n\t" + "ld1 {v13.2d}, [x12]\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" @@ -23957,10 +23990,10 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_4_%=:\n\t" - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" - "ld1 {v8.2d, v9.2d}, [%x[key]], #32\n\t" - "ld1 {v10.2d}, [%x[key]]\n\t" + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" + "ld1 {v8.2d, v9.2d}, [x9], #32\n\t" + "ld1 {v10.2d}, [x9]\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_done_%=\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_1_%=\n\t" @@ -24416,37 +24449,37 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_128_partial_done_%=\n\t" "eor v15.16b, v15.16b, v15.16b\n\t" "mov w19, w14\n\t" - "st1 {v15.2d}, [%x[tmp]]\n\t" + "st1 {v15.2d}, [x11]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" - "str x17, [%x[tmp]], #8\n\t" + "str x17, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" - "str w17, [%x[tmp]], #4\n\t" + "str w17, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" - "strh w17, [%x[tmp]], #2\n\t" + "strh w17, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" - "strb w17, [%x[tmp]], #1\n\t" + "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], x14\n\t" - "ld1 {v15.2d}, [%x[tmp]]\n\t" + "sub x11, x11, x14\n\t" + "ld1 {v15.2d}, [x11]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rbit v15.16b, v15.16b\n\t" @@ -24490,30 +24523,30 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "rbit v15.16b, v15.16b\n\t" "eor v14.16b, v14.16b, v15.16b\n\t" - "st1 {v14.2d}, [%x[tmp]]\n\t" + "st1 {v14.2d}, [x11]\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_dw_%=\n\t" - "ldr x17, [%x[tmp]], #8\n\t" + "ldr x17, [x11], #8\n\t" "sub w14, w14, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_dw_%=:\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_sw_%=\n\t" - "ldr w17, [%x[tmp]], #4\n\t" + "ldr w17, [x11], #4\n\t" "sub w14, w14, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_sw_%=:\n\t" "cmp w14, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_byte_%=\n\t" - "ldrh w17, [%x[tmp]], #2\n\t" + "ldrh w17, [x11], #2\n\t" "sub w14, w14, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_byte_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_end_bytes_%=\n\t" - "ldrb w17, [%x[tmp]], #1\n\t" + "ldrb w17, [x11], #1\n\t" "subs w14, w14, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_byte_%=\n\t" @@ -24521,11 +24554,11 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_end_bytes_%=:\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_partial_done_%=:\n\t" - "ld1 {v14.2d}, [%x[reg]]\n\t" - "ubfiz %x[aadSz], %x[aadSz], #3, #32\n\t" - "rbit %x[aadSz], %x[aadSz]\n\t" - "mov v28.d[0], %x[aadSz]\n\t" - "ubfiz %x[sz], %x[sz], #3, #32\n\t" + "ld1 {v14.2d}, [x12]\n\t" + "lsl x8, x8, #3\n\t" + "rbit x8, x8\n\t" + "mov v28.d[0], x8\n\t" + "lsl %x[sz], %x[sz], #3\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" @@ -24570,51 +24603,50 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_loaded_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_part_tag_%=:\n\t" - "ubfiz %x[tagSz], %x[tagSz], #0, #32\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov x17, %x[tagSz]\n\t" - "st1 {v28.2d}, [%x[tmp]]\n\t" + "st1 {v28.2d}, [x11]\n\t" "cmp x17, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_dw_%=\n\t" "ldr x16, [%x[tag]], #8\n\t" "sub x17, x17, #8\n\t" - "str x16, [%x[tmp]], #8\n\t" + "str x16, [x11], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_dw_%=:\n\t" "cmp x17, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_sw_%=\n\t" "ldr w16, [%x[tag]], #4\n\t" "sub x17, x17, #4\n\t" - "str w16, [%x[tmp]], #4\n\t" + "str w16, [x11], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_sw_%=:\n\t" "cmp x17, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_byte_%=\n\t" "ldrh w16, [%x[tag]], #2\n\t" "sub x17, x17, #2\n\t" - "strh w16, [%x[tmp]], #2\n\t" + "strh w16, [x11], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_byte_%=:\n\t" "cbz x17, L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_end_bytes_%=\n\t" "ldrb w16, [%x[tag]], #1\n\t" "subs x17, x17, #1\n\t" - "strb w16, [%x[tmp]], #1\n\t" + "strb w16, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_end_bytes_%=:\n\t" - "sub %x[tmp], %x[tmp], %x[tagSz]\n\t" - "ld1 {v28.2d}, [%x[tmp]]\n\t" + "sub x11, x11, %x[tagSz]\n\t" + "ld1 {v28.2d}, [x11]\n\t" "mov x17, #16\n\t" - "st1 {v26.2d}, [%x[tmp]]\n\t" + "st1 {v26.2d}, [x11]\n\t" "sub x17, x17, %x[tagSz]\n\t" - "add %x[tmp], %x[tmp], %x[tagSz]\n\t" + "add x11, x11, %x[tagSz]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_calc_tag_byte_%=:\n\t" - "strb wzr, [%x[tmp]], #1\n\t" + "strb wzr, [x11], #1\n\t" "subs x17, x17, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_128_calc_tag_byte_%=\n\t" - "subs %x[tmp], %x[tmp], #16\n\t" - "ld1 {v26.2d}, [%x[tmp]]\n\t" + "subs x11, x11, #16\n\t" + "ld1 {v26.2d}, [x11]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_loaded_%=:\n\t" "eor v28.16b, v28.16b, v26.16b\n\t" @@ -24628,11 +24660,12 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, #endif /* !NO_AES_128 */ "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_done_%=:\n\t" - : [in] "+r" (in), [out] "+r" (out), [sz] "+r" (sz), - [nonceSz] "+r" (nonceSz), [tagSz] "+r" (tagSz), [aadSz] "+r" (aadSz), - [key] "+r" (key), [gcm_h] "+r" (gcm_h), [tmp] "+r" (tmp), - [reg] "+r" (reg), [nr] "+r" (nr) - : [nonce] "r" (nonce), [tag] "r" (tag), [aad] "r" (aad) + "ldp x29, x30, [sp], #0x50\n\t" + : [out] "+r" (out), [sz] "+r" (sz), [nonceSz] "+r" (nonceSz), + [tagSz] "+r" (tagSz), [aadSz] "+r" (aadSz), [key] "+r" (key), + [gcm_h] "+r" (gcm_h), [tmp] "+r" (tmp), [reg] "+r" (reg), + [nr] "+r" (nr) + : [in] "r" (in), [nonce] "r" (nonce), [tag] "r" (tag), [aad] "r" (aad) : "memory", "cc", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", @@ -24748,7 +24781,7 @@ void AES_GCM_init_AARCH64(byte* key, int nr, const byte* nonce, word32 nonceSz, "\n" "L_aes_gcm_init_arm64_crypto_partial_done_%=:\n\t" "eor x7, x7, x7\n\t" - "ubfiz x13, %x[nonceSz], #3, #32\n\t" + "lsl x13, %x[nonceSz], #3\n\t" "mov v7.d[0], x7\n\t" "mov v7.d[1], x13\n\t" "rev64 v7.16b, v7.16b\n\t" @@ -25266,6 +25299,8 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, const byte* in, word32 nbytes, byte* tag, byte* h, byte* counter) { __asm__ __volatile__ ( + "stp x29, x30, [sp, #-32]!\n\t" + "add x29, sp, #0\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "movi v27.16b, #0x87\n\t" "ld1 {v26.2d}, [%x[tag]]\n\t" @@ -29078,6 +29113,7 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "mov v13.s[3], w9\n\t" "st1 {v26.2d}, [%x[tag]]\n\t" "st1 {v13.2d}, [%x[counter]]\n\t" + "ldp x29, x30, [sp], #32\n\t" : [nr] "+r" (nr), [out] "+r" (out), [nbytes] "+r" (nbytes), [tag] "+r" (tag), [h] "+r" (h), [counter] "+r" (counter) : [key] "r" (key), [in] "r" (in) @@ -29098,10 +29134,10 @@ void AES_GCM_encrypt_final_AARCH64(byte* tag, byte* authTag, word32 tbytes, "ld1 {v4.2d}, [%x[h]]\n\t" "ushr v6.2d, v6.2d, #56\n\t" "ld1 {v7.2d}, [%x[initCtr]]\n\t" - "ubfiz %x[abytes], %x[abytes], #3, #32\n\t" + "lsl %x[abytes], %x[abytes], #3\n\t" "rbit %x[abytes], %x[abytes]\n\t" "mov v0.d[0], %x[abytes]\n\t" - "ubfiz %x[nbytes], %x[nbytes], #3, #32\n\t" + "lsl %x[nbytes], %x[nbytes], #3\n\t" "rbit %x[nbytes], %x[nbytes]\n\t" "mov v0.d[1], %x[nbytes]\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" @@ -29171,6 +29207,8 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, const byte* in, word32 nbytes, byte* tag, byte* h, byte* counter) { __asm__ __volatile__ ( + "stp x29, x30, [sp, #-32]!\n\t" + "add x29, sp, #0\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "movi v27.16b, #0x87\n\t" "ld1 {v26.2d}, [%x[tag]]\n\t" @@ -32984,6 +33022,7 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "mov v13.s[3], w9\n\t" "st1 {v26.2d}, [%x[tag]]\n\t" "st1 {v13.2d}, [%x[counter]]\n\t" + "ldp x29, x30, [sp], #32\n\t" : [nr] "+r" (nr), [out] "+r" (out), [nbytes] "+r" (nbytes), [tag] "+r" (tag), [h] "+r" (h), [counter] "+r" (counter) : [key] "r" (key), [in] "r" (in) @@ -33000,15 +33039,17 @@ void AES_GCM_decrypt_final_AARCH64(byte* tag, const byte* authTag, int* res) { __asm__ __volatile__ ( + "stp x29, x30, [sp, #-32]!\n\t" + "add x29, sp, #0\n\t" "ld1 {v5.2d}, [%x[tag]]\n\t" "movi v6.16b, #0x87\n\t" "ld1 {v4.2d}, [%x[h]]\n\t" "ushr v6.2d, v6.2d, #56\n\t" "ld1 {v7.2d}, [%x[initCtr]]\n\t" - "ubfiz %x[abytes], %x[abytes], #3, #32\n\t" + "lsl %x[abytes], %x[abytes], #3\n\t" "rbit %x[abytes], %x[abytes]\n\t" "mov v0.d[0], %x[abytes]\n\t" - "ubfiz %x[nbytes], %x[nbytes], #3, #32\n\t" + "lsl %x[nbytes], %x[nbytes], #3\n\t" "rbit %x[nbytes], %x[nbytes]\n\t" "mov v0.d[1], %x[nbytes]\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" @@ -33034,7 +33075,6 @@ void AES_GCM_decrypt_final_AARCH64(byte* tag, const byte* authTag, "b L_aes_gcm_decrypt_final_arm64_crypto_tag_loaded_%=\n\t" "\n" "L_aes_gcm_decrypt_final_arm64_crypto_part_tag_%=:\n\t" - "ubfiz %x[tbytes], %x[tbytes], #0, #32\n\t" "eor v0.16b, v0.16b, v0.16b\n\t" "mov x10, %x[tbytes]\n\t" "st1 {v0.2d}, [%x[tag]]\n\t" @@ -33091,6 +33131,7 @@ void AES_GCM_decrypt_final_AARCH64(byte* tag, const byte* authTag, "and x8, x8, x11\n\t" "add w8, w8, #0xb4\n\t" "str w8, [%x[res]]\n\t" + "ldp x29, x30, [sp], #32\n\t" : [tag] "+r" (tag), [tbytes] "+r" (tbytes), [nbytes] "+r" (nbytes), [abytes] "+r" (abytes), [h] "+r" (h), [initCtr] "+r" (initCtr), [res] "+r" (res) @@ -33202,7 +33243,7 @@ void AES_GCM_init_AARCH64_EOR3(byte* key, int nr, const byte* nonce, "\n" "L_aes_gcm_init_arm64_crypto_eor3_partial_done_%=:\n\t" "eor x7, x7, x7\n\t" - "ubfiz x13, %x[nonceSz], #3, #32\n\t" + "lsl x13, %x[nonceSz], #3\n\t" "mov v7.d[0], x7\n\t" "mov v7.d[1], x13\n\t" "rev64 v7.16b, v7.16b\n\t" @@ -33700,6 +33741,8 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, const byte* in, word32 nbytes, byte* tag, byte* h, byte* counter) { __asm__ __volatile__ ( + "stp x29, x30, [sp, #-32]!\n\t" + "add x29, sp, #0\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "movi v27.16b, #0x87\n\t" "ld1 {v26.2d}, [%x[tag]]\n\t" @@ -37428,6 +37471,7 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "mov v13.s[3], w9\n\t" "st1 {v26.2d}, [%x[tag]]\n\t" "st1 {v13.2d}, [%x[counter]]\n\t" + "ldp x29, x30, [sp], #32\n\t" : [nr] "+r" (nr), [out] "+r" (out), [nbytes] "+r" (nbytes), [tag] "+r" (tag), [h] "+r" (h), [counter] "+r" (counter) : [key] "r" (key), [in] "r" (in) @@ -37448,10 +37492,10 @@ void AES_GCM_encrypt_final_AARCH64_EOR3(byte* tag, byte* authTag, word32 tbytes, "ld1 {v4.2d}, [%x[h]]\n\t" "ushr v6.2d, v6.2d, #56\n\t" "ld1 {v7.2d}, [%x[initCtr]]\n\t" - "ubfiz %x[abytes], %x[abytes], #3, #32\n\t" + "lsl %x[abytes], %x[abytes], #3\n\t" "rbit %x[abytes], %x[abytes]\n\t" "mov v0.d[0], %x[abytes]\n\t" - "ubfiz %x[nbytes], %x[nbytes], #3, #32\n\t" + "lsl %x[nbytes], %x[nbytes], #3\n\t" "rbit %x[nbytes], %x[nbytes]\n\t" "mov v0.d[1], %x[nbytes]\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" @@ -37520,6 +37564,8 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, const byte* in, word32 nbytes, byte* tag, byte* h, byte* counter) { __asm__ __volatile__ ( + "stp x29, x30, [sp, #-32]!\n\t" + "add x29, sp, #0\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "movi v27.16b, #0x87\n\t" "ld1 {v26.2d}, [%x[tag]]\n\t" @@ -41249,6 +41295,7 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "mov v13.s[3], w9\n\t" "st1 {v26.2d}, [%x[tag]]\n\t" "st1 {v13.2d}, [%x[counter]]\n\t" + "ldp x29, x30, [sp], #32\n\t" : [nr] "+r" (nr), [out] "+r" (out), [nbytes] "+r" (nbytes), [tag] "+r" (tag), [h] "+r" (h), [counter] "+r" (counter) : [key] "r" (key), [in] "r" (in) @@ -41265,15 +41312,17 @@ void AES_GCM_decrypt_final_AARCH64_EOR3(byte* tag, const byte* authTag, int* res) { __asm__ __volatile__ ( + "stp x29, x30, [sp, #-32]!\n\t" + "add x29, sp, #0\n\t" "ld1 {v5.2d}, [%x[tag]]\n\t" "movi v6.16b, #0x87\n\t" "ld1 {v4.2d}, [%x[h]]\n\t" "ushr v6.2d, v6.2d, #56\n\t" "ld1 {v7.2d}, [%x[initCtr]]\n\t" - "ubfiz %x[abytes], %x[abytes], #3, #32\n\t" + "lsl %x[abytes], %x[abytes], #3\n\t" "rbit %x[abytes], %x[abytes]\n\t" "mov v0.d[0], %x[abytes]\n\t" - "ubfiz %x[nbytes], %x[nbytes], #3, #32\n\t" + "lsl %x[nbytes], %x[nbytes], #3\n\t" "rbit %x[nbytes], %x[nbytes]\n\t" "mov v0.d[1], %x[nbytes]\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" @@ -41298,7 +41347,6 @@ void AES_GCM_decrypt_final_AARCH64_EOR3(byte* tag, const byte* authTag, "b L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_loaded_%=\n\t" "\n" "L_aes_gcm_decrypt_final_arm64_crypto_eor3_part_tag_%=:\n\t" - "ubfiz %x[tbytes], %x[tbytes], #0, #32\n\t" "eor v0.16b, v0.16b, v0.16b\n\t" "mov x10, %x[tbytes]\n\t" "st1 {v0.2d}, [%x[tag]]\n\t" @@ -41355,6 +41403,7 @@ void AES_GCM_decrypt_final_AARCH64_EOR3(byte* tag, const byte* authTag, "and x8, x8, x11\n\t" "add w8, w8, #0xb4\n\t" "str w8, [%x[res]]\n\t" + "ldp x29, x30, [sp], #32\n\t" : [tag] "+r" (tag), [tbytes] "+r" (tbytes), [nbytes] "+r" (nbytes), [abytes] "+r" (abytes), [h] "+r" (h), [initCtr] "+r" (initCtr), [res] "+r" (res) @@ -41372,6 +41421,8 @@ void AES_XTS_encrypt_AARCH64(const byte* in, byte* out, word32 sz, const byte* i, byte* key, byte* key2, byte* tmp, int nr) { __asm__ __volatile__ ( + "stp x29, x30, [sp, #-32]!\n\t" + "add x29, sp, #0\n\t" "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [%x[key2]], #0x40\n\t" "ld1 {v20.2d, v21.2d, v22.2d, v23.2d}, [%x[key2]], #0x40\n\t" "ld1 {v4.16b}, [%x[i]]\n\t" @@ -42381,6 +42432,7 @@ void AES_XTS_encrypt_AARCH64(const byte* in, byte* out, word32 sz, #endif /* !NO_AES_128 */ "\n" "L_aes_xts_encrypt_arm64_crypto_done_%=:\n\t" + "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [key2] "+r" (key2), [tmp] "+r" (tmp), [nr] "+r" (nr) : [in] "r" (in), [i] "r" (i) @@ -42396,6 +42448,8 @@ void AES_XTS_decrypt_AARCH64(const byte* in, byte* out, word32 sz, const byte* i, byte* key, byte* key2, byte* tmp, int nr) { __asm__ __volatile__ ( + "stp x29, x30, [sp, #-32]!\n\t" + "add x29, sp, #0\n\t" "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [%x[key2]], #0x40\n\t" "ld1 {v20.2d, v21.2d, v22.2d, v23.2d}, [%x[key2]], #0x40\n\t" "ld1 {v4.16b}, [%x[i]]\n\t" @@ -43500,6 +43554,7 @@ void AES_XTS_decrypt_AARCH64(const byte* in, byte* out, word32 sz, #endif /* !NO_AES_128 */ "\n" "L_aes_xts_decrypt_arm64_crypto_done_%=:\n\t" + "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [key2] "+r" (key2), [tmp] "+r" (tmp), [nr] "+r" (nr) : [in] "r" (in), [i] "r" (i) @@ -48851,6 +48906,8 @@ void AES_XTS_encrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, const word8* te = L_AES_ARM64_NEON_te; const word8* shuffle = L_AES_ARM64_NEON_shift_rows_shuffle; __asm__ __volatile__ ( + "stp x29, x30, [sp, #-32]!\n\t" + "add x29, sp, #0\n\t" "ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [%[te]], #0x40\n\t" "ld1 {v20.16b, v21.16b, v22.16b, v23.16b}, [%[te]], #0x40\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%[te]], #0x40\n\t" @@ -49896,6 +49953,7 @@ void AES_XTS_encrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "st1 {v0.16b}, [%x[out]]\n\t" "\n" "L_AES_XTS_encrypt_NEON_data_done_%=:\n\t" + "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [key2] "+r" (key2), [tmp] "+r" (tmp), [nr] "+r" (nr) : [in] "r" (in), [i] "r" (i), [te] "r" (te), [shuffle] "r" (shuffle) @@ -49916,6 +49974,8 @@ void AES_XTS_decrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, const word8* shuffle = L_AES_ARM64_NEON_shift_rows_shuffle; const word8* invshuffle = L_AES_ARM64_NEON_shift_rows_invshuffle; __asm__ __volatile__ ( + "stp x29, x30, [sp, #-32]!\n\t" + "add x29, sp, #0\n\t" "ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [%[te]], #0x40\n\t" "ld1 {v20.16b, v21.16b, v22.16b, v23.16b}, [%[te]], #0x40\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%[te]], #0x40\n\t" @@ -51425,6 +51485,7 @@ void AES_XTS_decrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "st1 {v0.16b}, [%x[out]]\n\t" "\n" "L_AES_XTS_decrypt_NEON_data_done_%=:\n\t" + "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [key2] "+r" (key2), [tmp] "+r" (tmp), [nr] "+r" (nr) : [in] "r" (in), [i] "r" (i), [te] "r" (te), [td] "r" (td), @@ -54610,6 +54671,8 @@ void AES_XTS_encrypt(const byte* in, byte* out, word32 sz, const byte* i, { const word32* te = L_AES_ARM64_te; __asm__ __volatile__ ( + "stp x29, x30, [sp, #-32]!\n\t" + "add x29, sp, #0\n\t" "mov x9, #0x87\n\t" "mov x26, %x[key2]\n\t" "ldp x21, x22, [%x[i]]\n\t" @@ -55534,6 +55597,7 @@ void AES_XTS_encrypt(const byte* in, byte* out, word32 sz, const byte* i, "stp x10, x11, [%x[out]]\n\t" "\n" "L_AES_XTS_encrypt_done_data_%=:\n\t" + "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [key2] "+r" (key2), [tmp] "+r" (tmp), [nr] "+r" (nr) : [in] "r" (in), [i] "r" (i), [te] "r" (te) @@ -55550,6 +55614,8 @@ void AES_XTS_decrypt(const byte* in, byte* out, word32 sz, const byte* i, const word8* td4 = L_AES_ARM64_td4; const word32* te = L_AES_ARM64_te; __asm__ __volatile__ ( + "stp x29, x30, [sp, #-32]!\n\t" + "add x29, sp, #0\n\t" "ands w11, %w[sz], #15\n\t" "cset w11, ne\n\t" "lsl w11, w11, #4\n\t" @@ -56699,6 +56765,7 @@ void AES_XTS_decrypt(const byte* in, byte* out, word32 sz, const byte* i, "stp x12, x13, [%x[out]]\n\t" "\n" "L_AES_XTS_decrypt_done_data_%=:\n\t" + "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [key2] "+r" (key2), [tmp] "+r" (tmp), [nr] "+r" (nr) : [in] "r" (in), [i] "r" (i), [td] "r" (td), [td4] "r" (td4), diff --git a/wolfcrypt/src/port/arm/armv8-curve25519_c.c b/wolfcrypt/src/port/arm/armv8-curve25519_c.c index 7f1b14a4d62..b8515b00549 100644 --- a/wolfcrypt/src/port/arm/armv8-curve25519_c.c +++ b/wolfcrypt/src/port/arm/armv8-curve25519_c.c @@ -38,7 +38,7 @@ #if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL) #include -void fe_init(void) +void fe_init() { __asm__ __volatile__ ( "\n\t" @@ -229,8 +229,8 @@ int fe_isnonzero(const fe a) "orr %x[a], x1, x2\n\t" "orr x3, x3, x4\n\t" "orr %x[a], %x[a], x3\n\t" - : [a] "+r" (a) : + : [a] "r" (a) : "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6" ); return (word32)(size_t)a; @@ -248,8 +248,8 @@ int fe_isnegative(const fe a) "adc x5, x4, xzr\n\t" "and %x[a], x1, #1\n\t" "eor %x[a], %x[a], x5, lsr 63\n\t" - : [a] "+r" (a) : + : [a] "r" (a) : "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6" ); return (word32)(size_t)a; @@ -4362,7 +4362,7 @@ int curve25519_base(byte* r, const byte* n) /* Store */ "stp x14, x15, [%x[r]]\n\t" "stp x16, x17, [%x[r], #16]\n\t" - "mov %x[r], xzr\n\t" + "mov x0, xzr\n\t" "ldp x29, x30, [sp], #0xb0\n\t" : [r] "+r" (r) : [n] "r" (n), [x2] "r" (x2) @@ -6969,7 +6969,7 @@ int curve25519(byte* r, const byte* n, const byte* a) /* Store */ "stp x14, x15, [%x[r]]\n\t" "stp x16, x17, [%x[r], #16]\n\t" - "mov %x[r], xzr\n\t" + "mov x0, xzr\n\t" "ldp x29, x30, [sp], #0xc0\n\t" : [r] "+r" (r) : [n] "r" (n), [a] "r" (a) diff --git a/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c b/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c index 0c00ffbb48d..54a92c47fc1 100644 --- a/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c @@ -8406,11 +8406,11 @@ int mlkem_cmp_neon(const byte* a, const byte* b, int sz) "orr v8.16b, v8.16b, v10.16b\n\t" "ext v9.16b, v8.16b, v8.16b, #8\n\t" "orr v8.16b, v8.16b, v9.16b\n\t" - "mov %x[a], v8.d[0]\n\t" - "subs %x[a], %x[a], xzr\n\t" - "csetm %w[a], ne\n\t" - : [a] "+r" (a), [sz] "+r" (sz) - : [b] "r" (b) + "mov x0, v8.d[0]\n\t" + "subs x0, x0, xzr\n\t" + "csetm w0, ne\n\t" + : [sz] "+r" (sz) + : [a] "r" (a), [b] "r" (b) : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11" ); @@ -9089,7 +9089,7 @@ unsigned int mlkem_rej_uniform_neon(sword16* p, unsigned int len, const byte* r, "b L_mlkem_rej_uniform_loop_lt_4_%=\n\t" "\n" "L_mlkem_rej_uniform_done_%=:\n\t" - "mov %x[p], x12\n\t" + "mov x0, x12\n\t" : [p] "+r" (p), [len] "+r" (len), [rLen] "+r" (rLen) : [r] "r" (r), [mask] "r" (mask), [q] "r" (q), [bits] "r" (bits), [indices] "r" (indices) diff --git a/wolfcrypt/src/port/arm/thumb2-aes-asm.S b/wolfcrypt/src/port/arm/thumb2-aes-asm.S index 2275959d2ad..cbc829e0017 100644 --- a/wolfcrypt/src/port/arm/thumb2-aes-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-aes-asm.S @@ -249,16 +249,16 @@ L_AES_invert_key_loop: LDM r10, {r6, r7, r8, r9} STM r10, {r2, r3, r4, r5} STM r0!, {r6, r7, r8, r9} - SUBS r11, r11, #0x2 - SUB r10, r10, #0x10 + SUBS r11, r11, #2 + SUB r10, r10, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_invert_key_loop #else BNE.N L_AES_invert_key_loop #endif SUB r0, r0, r1, LSL #3 - ADD r0, r0, #0x10 - SUB r11, r1, #0x1 + ADD r0, r0, #16 + SUB r11, r1, #1 L_AES_invert_key_mix_loop: LDM r0, {r2, r3, r4, r5} UBFX r6, r2, #0, #8 @@ -325,7 +325,7 @@ L_AES_invert_key_mix_loop: EOR r8, r8, r7, ROR #8 EOR r8, r8, r9, ROR #24 STR r8, [r0], #4 - SUBS r11, r11, #0x1 + SUBS r11, r11, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_invert_key_mix_loop #else @@ -390,8 +390,8 @@ AES_set_encrypt_key: REV r6, r6 REV r7, r7 STM r2, {r4, r5, r6, r7} - SUB r2, r2, #0x10 - MOV r12, #0x6 + SUB r2, r2, #16 + MOV r12, #6 L_AES_set_encrypt_key_loop_256: UBFX r4, r7, #0, #8 UBFX r5, r7, #8, #8 @@ -411,14 +411,13 @@ L_AES_set_encrypt_key_loop_256: EOR r5, r5, r4 EOR r6, r6, r5 EOR r7, r7, r6 - ADD r2, r2, #0x10 + ADD r2, r2, #16 STM r2, {r4, r5, r6, r7} - SUB r2, r2, #0x10 - MOV r3, r7 - UBFX r4, r3, #8, #8 - UBFX r5, r3, #16, #8 - LSR r6, r3, #24 - UBFX r3, r3, #0, #8 + SUB r2, r2, #16 + UBFX r4, r7, #8, #8 + UBFX r5, r7, #16, #8 + LSR r6, r7, #24 + UBFX r3, r7, #0, #8 LDRB r4, [r10, r4, LSL #2] LDRB r6, [r10, r6, LSL #2] LDRB r5, [r10, r5, LSL #2] @@ -431,10 +430,10 @@ L_AES_set_encrypt_key_loop_256: EOR r5, r5, r4 EOR r6, r6, r5 EOR r7, r7, r6 - ADD r2, r2, #0x10 + ADD r2, r2, #16 STM r2, {r4, r5, r6, r7} - SUB r2, r2, #0x10 - SUBS r12, r12, #0x1 + SUB r2, r2, #16 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_set_encrypt_key_loop_256 #else @@ -458,9 +457,9 @@ L_AES_set_encrypt_key_loop_256: EOR r5, r5, r4 EOR r6, r6, r5 EOR r7, r7, r6 - ADD r2, r2, #0x10 + ADD r2, r2, #16 STM r2, {r4, r5, r6, r7} - SUB r2, r2, #0x10 + SUB r2, r2, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) B L_AES_set_encrypt_key_end #else @@ -482,7 +481,7 @@ L_AES_set_encrypt_key_start_192: STM r2, {r4, r5, r6, r7} STRD r8, r9, [r2, #16] MOV r7, r9 - MOV r12, #0x7 + MOV r12, #7 L_AES_set_encrypt_key_loop_192: UBFX r4, r9, #0, #8 UBFX r5, r9, #8, #8 @@ -505,7 +504,7 @@ L_AES_set_encrypt_key_loop_192: EOR r8, r8, r7 EOR r9, r9, r8 STM r2, {r4, r5, r6, r7, r8, r9} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_set_encrypt_key_loop_192 #else @@ -545,7 +544,7 @@ L_AES_set_encrypt_key_start_128: REV r6, r6 REV r7, r7 STM r2, {r4, r5, r6, r7} - MOV r12, #0xa + MOV r12, #10 L_AES_set_encrypt_key_loop_128: UBFX r4, r7, #0, #8 UBFX r5, r7, #8, #8 @@ -566,7 +565,7 @@ L_AES_set_encrypt_key_loop_128: EOR r6, r6, r5 EOR r7, r7, r6 STM r2, {r4, r5, r6, r7} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_set_encrypt_key_loop_128 #else @@ -574,7 +573,7 @@ L_AES_set_encrypt_key_loop_128: #endif L_AES_set_encrypt_key_end: POP {r4, r5, r6, r7, r8, r9, r10, pc} - /* Cycle Count = 340 */ + /* Cycle Count = 339 */ .size AES_set_encrypt_key,.-AES_set_encrypt_key #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE .text @@ -684,7 +683,7 @@ L_AES_encrypt_block_nr: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - SUBS r1, r1, #0x1 + SUBS r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_encrypt_block_nr #else @@ -826,13 +825,13 @@ AES_ECB_encrypt: LDR r0, L_AES_Thumb2_te_ecb LDR r12, [sp, #36] PUSH {r3} - CMP r12, #0xa + CMP r12, #10 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_AES_ECB_encrypt_start_block_128 #else BEQ.W L_AES_ECB_encrypt_start_block_128 #endif - CMP r12, #0xc + CMP r12, #12 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_AES_ECB_encrypt_start_block_192 #else @@ -854,7 +853,7 @@ L_AES_ECB_encrypt_loop_block_256: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - MOV r1, #0x6 + MOV r1, #6 #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE BL AES_encrypt_block #else @@ -959,7 +958,7 @@ L_AES_ECB_encrypt_block_nr_256: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - SUBS r1, r1, #0x1 + SUBS r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_ECB_encrypt_block_nr_256 #else @@ -1076,9 +1075,9 @@ L_AES_ECB_encrypt_block_nr_256: STR r5, [r1, #4] STR r6, [r1, #8] STR r7, [r1, #12] - SUBS r2, r2, #0x10 - ADD lr, lr, #0x10 - ADD r1, r1, #0x10 + SUBS r2, r2, #16 + ADD lr, lr, #16 + ADD r1, r1, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_ECB_encrypt_loop_block_256 #else @@ -1106,7 +1105,7 @@ L_AES_ECB_encrypt_loop_block_192: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - MOV r1, #0x5 + MOV r1, #5 #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE BL AES_encrypt_block #else @@ -1211,7 +1210,7 @@ L_AES_ECB_encrypt_block_nr_192: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - SUBS r1, r1, #0x1 + SUBS r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_ECB_encrypt_block_nr_192 #else @@ -1328,9 +1327,9 @@ L_AES_ECB_encrypt_block_nr_192: STR r5, [r1, #4] STR r6, [r1, #8] STR r7, [r1, #12] - SUBS r2, r2, #0x10 - ADD lr, lr, #0x10 - ADD r1, r1, #0x10 + SUBS r2, r2, #16 + ADD lr, lr, #16 + ADD r1, r1, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_ECB_encrypt_loop_block_192 #else @@ -1358,7 +1357,7 @@ L_AES_ECB_encrypt_loop_block_128: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - MOV r1, #0x4 + MOV r1, #4 #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE BL AES_encrypt_block #else @@ -1463,7 +1462,7 @@ L_AES_ECB_encrypt_block_nr_128: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - SUBS r1, r1, #0x1 + SUBS r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_ECB_encrypt_block_nr_128 #else @@ -1580,9 +1579,9 @@ L_AES_ECB_encrypt_block_nr_128: STR r5, [r1, #4] STR r6, [r1, #8] STR r7, [r1, #12] - SUBS r2, r2, #0x10 - ADD lr, lr, #0x10 - ADD r1, r1, #0x10 + SUBS r2, r2, #16 + ADD lr, lr, #16 + ADD r1, r1, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_ECB_encrypt_loop_block_128 #else @@ -1608,13 +1607,13 @@ AES_CBC_encrypt: LDR r0, L_AES_Thumb2_te_ecb LDM r9, {r4, r5, r6, r7} PUSH {r3, r9} - CMP r8, #0xa + CMP r8, #10 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_AES_CBC_encrypt_start_block_128 #else BEQ.W L_AES_CBC_encrypt_start_block_128 #endif - CMP r8, #0xc + CMP r8, #12 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_AES_CBC_encrypt_start_block_192 #else @@ -1640,7 +1639,7 @@ L_AES_CBC_encrypt_loop_block_256: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - MOV r1, #0x6 + MOV r1, #6 #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE BL AES_encrypt_block #else @@ -1745,7 +1744,7 @@ L_AES_CBC_encrypt_block_nr_256: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - SUBS r1, r1, #0x1 + SUBS r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_CBC_encrypt_block_nr_256 #else @@ -1862,9 +1861,9 @@ L_AES_CBC_encrypt_block_nr_256: STR r5, [r1, #4] STR r6, [r1, #8] STR r7, [r1, #12] - SUBS r2, r2, #0x10 - ADD lr, lr, #0x10 - ADD r1, r1, #0x10 + SUBS r2, r2, #16 + ADD lr, lr, #16 + ADD r1, r1, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_CBC_encrypt_loop_block_256 #else @@ -1896,7 +1895,7 @@ L_AES_CBC_encrypt_loop_block_192: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - MOV r1, #0x5 + MOV r1, #5 #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE BL AES_encrypt_block #else @@ -2001,7 +2000,7 @@ L_AES_CBC_encrypt_block_nr_192: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - SUBS r1, r1, #0x1 + SUBS r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_CBC_encrypt_block_nr_192 #else @@ -2118,9 +2117,9 @@ L_AES_CBC_encrypt_block_nr_192: STR r5, [r1, #4] STR r6, [r1, #8] STR r7, [r1, #12] - SUBS r2, r2, #0x10 - ADD lr, lr, #0x10 - ADD r1, r1, #0x10 + SUBS r2, r2, #16 + ADD lr, lr, #16 + ADD r1, r1, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_CBC_encrypt_loop_block_192 #else @@ -2152,7 +2151,7 @@ L_AES_CBC_encrypt_loop_block_128: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - MOV r1, #0x4 + MOV r1, #4 #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE BL AES_encrypt_block #else @@ -2257,7 +2256,7 @@ L_AES_CBC_encrypt_block_nr_128: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - SUBS r1, r1, #0x1 + SUBS r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_CBC_encrypt_block_nr_128 #else @@ -2374,9 +2373,9 @@ L_AES_CBC_encrypt_block_nr_128: STR r5, [r1, #4] STR r6, [r1, #8] STR r7, [r1, #12] - SUBS r2, r2, #0x10 - ADD lr, lr, #0x10 - ADD r1, r1, #0x10 + SUBS r2, r2, #16 + ADD lr, lr, #16 + ADD r1, r1, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_CBC_encrypt_loop_block_128 #else @@ -2422,13 +2421,13 @@ AES_CTR_encrypt: REV r7, r7 STM r8, {r4, r5, r6, r7} PUSH {r3, r8} - CMP r12, #0xa + CMP r12, #10 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_AES_CTR_encrypt_start_block_128 #else BEQ.W L_AES_CTR_encrypt_start_block_128 #endif - CMP r12, #0xc + CMP r12, #12 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_AES_CTR_encrypt_start_block_192 #else @@ -2437,10 +2436,10 @@ AES_CTR_encrypt: L_AES_CTR_encrypt_loop_block_256: PUSH {r1, r2, lr} LDR lr, [sp, #16] - ADDS r11, r7, #0x1 - ADCS r10, r6, #0x0 - ADCS r9, r5, #0x0 - ADC r8, r4, #0x0 + ADDS r11, r7, #1 + ADCS r10, r6, #0 + ADCS r9, r5, #0 + ADC r8, r4, #0 STM lr, {r8, r9, r10, r11} LDM r3!, {r8, r9, r10, r11} /* Round: 0 - XOR in key schedule */ @@ -2448,7 +2447,7 @@ L_AES_CTR_encrypt_loop_block_256: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - MOV r1, #0x6 + MOV r1, #6 #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE BL AES_encrypt_block #else @@ -2553,7 +2552,7 @@ L_AES_CTR_encrypt_block_nr_256: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - SUBS r1, r1, #0x1 + SUBS r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_CTR_encrypt_block_nr_256 #else @@ -2680,9 +2679,9 @@ L_AES_CTR_encrypt_block_nr_256: STR r6, [r1, #8] STR r7, [r1, #12] LDM r8, {r4, r5, r6, r7} - SUBS r2, r2, #0x10 - ADD lr, lr, #0x10 - ADD r1, r1, #0x10 + SUBS r2, r2, #16 + ADD lr, lr, #16 + ADD r1, r1, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_CTR_encrypt_loop_block_256 #else @@ -2697,10 +2696,10 @@ L_AES_CTR_encrypt_start_block_192: L_AES_CTR_encrypt_loop_block_192: PUSH {r1, r2, lr} LDR lr, [sp, #16] - ADDS r11, r7, #0x1 - ADCS r10, r6, #0x0 - ADCS r9, r5, #0x0 - ADC r8, r4, #0x0 + ADDS r11, r7, #1 + ADCS r10, r6, #0 + ADCS r9, r5, #0 + ADC r8, r4, #0 STM lr, {r8, r9, r10, r11} LDM r3!, {r8, r9, r10, r11} /* Round: 0 - XOR in key schedule */ @@ -2708,7 +2707,7 @@ L_AES_CTR_encrypt_loop_block_192: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - MOV r1, #0x5 + MOV r1, #5 #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE BL AES_encrypt_block #else @@ -2813,7 +2812,7 @@ L_AES_CTR_encrypt_block_nr_192: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - SUBS r1, r1, #0x1 + SUBS r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_CTR_encrypt_block_nr_192 #else @@ -2940,9 +2939,9 @@ L_AES_CTR_encrypt_block_nr_192: STR r6, [r1, #8] STR r7, [r1, #12] LDM r8, {r4, r5, r6, r7} - SUBS r2, r2, #0x10 - ADD lr, lr, #0x10 - ADD r1, r1, #0x10 + SUBS r2, r2, #16 + ADD lr, lr, #16 + ADD r1, r1, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_CTR_encrypt_loop_block_192 #else @@ -2957,10 +2956,10 @@ L_AES_CTR_encrypt_start_block_128: L_AES_CTR_encrypt_loop_block_128: PUSH {r1, r2, lr} LDR lr, [sp, #16] - ADDS r11, r7, #0x1 - ADCS r10, r6, #0x0 - ADCS r9, r5, #0x0 - ADC r8, r4, #0x0 + ADDS r11, r7, #1 + ADCS r10, r6, #0 + ADCS r9, r5, #0 + ADC r8, r4, #0 STM lr, {r8, r9, r10, r11} LDM r3!, {r8, r9, r10, r11} /* Round: 0 - XOR in key schedule */ @@ -2968,7 +2967,7 @@ L_AES_CTR_encrypt_loop_block_128: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - MOV r1, #0x4 + MOV r1, #4 #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE BL AES_encrypt_block #else @@ -3073,7 +3072,7 @@ L_AES_CTR_encrypt_block_nr_128: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - SUBS r1, r1, #0x1 + SUBS r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_CTR_encrypt_block_nr_128 #else @@ -3200,9 +3199,9 @@ L_AES_CTR_encrypt_block_nr_128: STR r6, [r1, #8] STR r7, [r1, #12] LDM r8, {r4, r5, r6, r7} - SUBS r2, r2, #0x10 - ADD lr, lr, #0x10 - ADD r1, r1, #0x10 + SUBS r2, r2, #16 + ADD lr, lr, #16 + ADD r1, r1, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_CTR_encrypt_loop_block_128 #else @@ -3330,7 +3329,7 @@ L_AES_decrypt_block_nr: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - SUBS r1, r1, #0x1 + SUBS r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_decrypt_block_nr #else @@ -3513,13 +3512,13 @@ AES_ECB_decrypt: LDR r0, L_AES_Thumb2_td_ecb MOV r12, r2 ADR r2, L_AES_Thumb2_td4 - CMP r8, #0xa + CMP r8, #10 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_AES_ECB_decrypt_start_block_128 #else BEQ.W L_AES_ECB_decrypt_start_block_128 #endif - CMP r8, #0xc + CMP r8, #12 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_AES_ECB_decrypt_start_block_192 #else @@ -3541,7 +3540,7 @@ L_AES_ECB_decrypt_loop_block_256: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - MOV r1, #0x6 + MOV r1, #6 #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE BL AES_decrypt_block #else @@ -3646,7 +3645,7 @@ L_AES_ECB_decrypt_block_nr_256: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - SUBS r1, r1, #0x1 + SUBS r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_ECB_decrypt_block_nr_256 #else @@ -3762,9 +3761,9 @@ L_AES_ECB_decrypt_block_nr_256: STR r5, [r1, #4] STR r6, [r1, #8] STR r7, [r1, #12] - SUBS r12, r12, #0x10 - ADD lr, lr, #0x10 - ADD r1, r1, #0x10 + SUBS r12, r12, #16 + ADD lr, lr, #16 + ADD r1, r1, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_ECB_decrypt_loop_block_256 #else @@ -3792,7 +3791,7 @@ L_AES_ECB_decrypt_loop_block_192: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - MOV r1, #0x5 + MOV r1, #5 #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE BL AES_decrypt_block #else @@ -3897,7 +3896,7 @@ L_AES_ECB_decrypt_block_nr_192: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - SUBS r1, r1, #0x1 + SUBS r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_ECB_decrypt_block_nr_192 #else @@ -4013,9 +4012,9 @@ L_AES_ECB_decrypt_block_nr_192: STR r5, [r1, #4] STR r6, [r1, #8] STR r7, [r1, #12] - SUBS r12, r12, #0x10 - ADD lr, lr, #0x10 - ADD r1, r1, #0x10 + SUBS r12, r12, #16 + ADD lr, lr, #16 + ADD r1, r1, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_ECB_decrypt_loop_block_192 #else @@ -4043,7 +4042,7 @@ L_AES_ECB_decrypt_loop_block_128: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - MOV r1, #0x4 + MOV r1, #4 #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE BL AES_decrypt_block #else @@ -4148,7 +4147,7 @@ L_AES_ECB_decrypt_block_nr_128: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - SUBS r1, r1, #0x1 + SUBS r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_ECB_decrypt_block_nr_128 #else @@ -4264,9 +4263,9 @@ L_AES_ECB_decrypt_block_nr_128: STR r5, [r1, #4] STR r6, [r1, #8] STR r7, [r1, #12] - SUBS r12, r12, #0x10 - ADD lr, lr, #0x10 - ADD r1, r1, #0x10 + SUBS r12, r12, #16 + ADD lr, lr, #16 + ADD r1, r1, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_ECB_decrypt_loop_block_128 #else @@ -4291,13 +4290,13 @@ AES_CBC_decrypt: LDR r8, [sp, #36] LDR r4, [sp, #40] PUSH {r3, r4} - CMP r8, #0xa + CMP r8, #10 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_AES_CBC_decrypt_loop_block_128 #else BEQ.W L_AES_CBC_decrypt_loop_block_128 #endif - CMP r8, #0xc + CMP r8, #12 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_AES_CBC_decrypt_loop_block_192 #else @@ -4322,7 +4321,7 @@ L_AES_CBC_decrypt_loop_block_256: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - MOV r1, #0x6 + MOV r1, #6 #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE BL AES_decrypt_block #else @@ -4427,7 +4426,7 @@ L_AES_CBC_decrypt_block_nr_256_odd: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - SUBS r1, r1, #0x1 + SUBS r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_CBC_decrypt_block_nr_256_odd #else @@ -4550,9 +4549,9 @@ L_AES_CBC_decrypt_block_nr_256_odd: STR r5, [r1, #4] STR r6, [r1, #8] STR r7, [r1, #12] - SUBS r12, r12, #0x10 - ADD lr, lr, #0x10 - ADD r1, r1, #0x10 + SUBS r12, r12, #16 + ADD lr, lr, #16 + ADD r1, r1, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_AES_CBC_decrypt_end_odd #else @@ -4576,7 +4575,7 @@ L_AES_CBC_decrypt_block_nr_256_odd: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - MOV r1, #0x6 + MOV r1, #6 #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE BL AES_decrypt_block #else @@ -4681,7 +4680,7 @@ L_AES_CBC_decrypt_block_nr_256_even: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - SUBS r1, r1, #0x1 + SUBS r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_CBC_decrypt_block_nr_256_even #else @@ -4805,9 +4804,9 @@ L_AES_CBC_decrypt_block_nr_256_even: STR r5, [r1, #4] STR r6, [r1, #8] STR r7, [r1, #12] - SUBS r12, r12, #0x10 - ADD lr, lr, #0x10 - ADD r1, r1, #0x10 + SUBS r12, r12, #16 + ADD lr, lr, #16 + ADD r1, r1, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_CBC_decrypt_loop_block_256 #else @@ -4837,7 +4836,7 @@ L_AES_CBC_decrypt_loop_block_192: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - MOV r1, #0x5 + MOV r1, #5 #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE BL AES_decrypt_block #else @@ -4942,7 +4941,7 @@ L_AES_CBC_decrypt_block_nr_192_odd: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - SUBS r1, r1, #0x1 + SUBS r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_CBC_decrypt_block_nr_192_odd #else @@ -5065,9 +5064,9 @@ L_AES_CBC_decrypt_block_nr_192_odd: STR r5, [r1, #4] STR r6, [r1, #8] STR r7, [r1, #12] - SUBS r12, r12, #0x10 - ADD lr, lr, #0x10 - ADD r1, r1, #0x10 + SUBS r12, r12, #16 + ADD lr, lr, #16 + ADD r1, r1, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_AES_CBC_decrypt_end_odd #else @@ -5091,7 +5090,7 @@ L_AES_CBC_decrypt_block_nr_192_odd: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - MOV r1, #0x5 + MOV r1, #5 #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE BL AES_decrypt_block #else @@ -5196,7 +5195,7 @@ L_AES_CBC_decrypt_block_nr_192_even: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - SUBS r1, r1, #0x1 + SUBS r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_CBC_decrypt_block_nr_192_even #else @@ -5320,9 +5319,9 @@ L_AES_CBC_decrypt_block_nr_192_even: STR r5, [r1, #4] STR r6, [r1, #8] STR r7, [r1, #12] - SUBS r12, r12, #0x10 - ADD lr, lr, #0x10 - ADD r1, r1, #0x10 + SUBS r12, r12, #16 + ADD lr, lr, #16 + ADD r1, r1, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_CBC_decrypt_loop_block_192 #else @@ -5352,7 +5351,7 @@ L_AES_CBC_decrypt_loop_block_128: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - MOV r1, #0x4 + MOV r1, #4 #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE BL AES_decrypt_block #else @@ -5457,7 +5456,7 @@ L_AES_CBC_decrypt_block_nr_128_odd: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - SUBS r1, r1, #0x1 + SUBS r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_CBC_decrypt_block_nr_128_odd #else @@ -5580,9 +5579,9 @@ L_AES_CBC_decrypt_block_nr_128_odd: STR r5, [r1, #4] STR r6, [r1, #8] STR r7, [r1, #12] - SUBS r12, r12, #0x10 - ADD lr, lr, #0x10 - ADD r1, r1, #0x10 + SUBS r12, r12, #16 + ADD lr, lr, #16 + ADD r1, r1, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_AES_CBC_decrypt_end_odd #else @@ -5606,7 +5605,7 @@ L_AES_CBC_decrypt_block_nr_128_odd: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - MOV r1, #0x4 + MOV r1, #4 #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE BL AES_decrypt_block #else @@ -5711,7 +5710,7 @@ L_AES_CBC_decrypt_block_nr_128_even: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - SUBS r1, r1, #0x1 + SUBS r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_CBC_decrypt_block_nr_128_even #else @@ -5835,9 +5834,9 @@ L_AES_CBC_decrypt_block_nr_128_even: STR r5, [r1, #4] STR r6, [r1, #8] STR r7, [r1, #12] - SUBS r12, r12, #0x10 - ADD lr, lr, #0x10 - ADD r1, r1, #0x10 + SUBS r12, r12, #16 + ADD lr, lr, #16 + ADD r1, r1, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_CBC_decrypt_loop_block_128 #else @@ -5894,14 +5893,13 @@ L_GCM_gmult_len_start_block: LDR r12, [r0, #12] LDR r3, [r2, #12] EOR r12, r12, r3 - LSR r3, r12, #24 - AND r3, r3, #0xf + UBFX r3, r12, #24, #4 ADD r3, r1, r3, LSL #4 LDM r3, {r8, r9, r10, r11} LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - LSR r4, r12, #28 + UBFX r4, r12, #28, #4 EOR r11, r11, r10, LSL #28 LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 @@ -5915,11 +5913,10 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - LSR r4, r12, #16 + UBFX r4, r12, #16, #4 EOR r11, r11, r10, LSL #28 - AND r4, r4, #0xf LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 EOR r10, r6, r9, LSL #28 @@ -5932,11 +5929,10 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - LSR r4, r12, #20 + UBFX r4, r12, #20, #4 EOR r11, r11, r10, LSL #28 - AND r4, r4, #0xf LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 EOR r10, r6, r9, LSL #28 @@ -5949,11 +5945,10 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - LSR r4, r12, #8 + UBFX r4, r12, #8, #4 EOR r11, r11, r10, LSL #28 - AND r4, r4, #0xf LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 EOR r10, r6, r9, LSL #28 @@ -5966,11 +5961,10 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - LSR r4, r12, #12 + UBFX r4, r12, #12, #4 EOR r11, r11, r10, LSL #28 - AND r4, r4, #0xf LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 EOR r10, r6, r9, LSL #28 @@ -5983,9 +5977,9 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - AND r4, r12, #0xf + AND r4, r12, #15 EOR r11, r11, r10, LSL #28 LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 @@ -5999,11 +5993,10 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - LSR r4, r12, #4 + UBFX r4, r12, #4, #4 EOR r11, r11, r10, LSL #28 - AND r4, r4, #0xf LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 EOR r10, r6, r9, LSL #28 @@ -6016,7 +6009,7 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 EOR r11, r11, r10, LSL #28 LDR r3, [lr, r3, LSL #2] @@ -6027,8 +6020,7 @@ L_GCM_gmult_len_start_block: LDR r12, [r0, #8] LDR r3, [r2, #8] EOR r12, r12, r3 - LSR r3, r12, #24 - AND r3, r3, #0xf + UBFX r3, r12, #24, #4 ADD r3, r1, r3, LSL #4 LDM r3, {r4, r5, r6, r7} EOR r8, r8, r4 @@ -6036,9 +6028,9 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - LSR r4, r12, #28 + UBFX r4, r12, #28, #4 EOR r11, r11, r10, LSL #28 LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 @@ -6052,11 +6044,10 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - LSR r4, r12, #16 + UBFX r4, r12, #16, #4 EOR r11, r11, r10, LSL #28 - AND r4, r4, #0xf LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 EOR r10, r6, r9, LSL #28 @@ -6069,11 +6060,10 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - LSR r4, r12, #20 + UBFX r4, r12, #20, #4 EOR r11, r11, r10, LSL #28 - AND r4, r4, #0xf LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 EOR r10, r6, r9, LSL #28 @@ -6086,11 +6076,10 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - LSR r4, r12, #8 + UBFX r4, r12, #8, #4 EOR r11, r11, r10, LSL #28 - AND r4, r4, #0xf LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 EOR r10, r6, r9, LSL #28 @@ -6103,11 +6092,10 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - LSR r4, r12, #12 + UBFX r4, r12, #12, #4 EOR r11, r11, r10, LSL #28 - AND r4, r4, #0xf LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 EOR r10, r6, r9, LSL #28 @@ -6120,9 +6108,9 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - AND r4, r12, #0xf + AND r4, r12, #15 EOR r11, r11, r10, LSL #28 LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 @@ -6136,11 +6124,10 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - LSR r4, r12, #4 + UBFX r4, r12, #4, #4 EOR r11, r11, r10, LSL #28 - AND r4, r4, #0xf LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 EOR r10, r6, r9, LSL #28 @@ -6153,7 +6140,7 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 EOR r11, r11, r10, LSL #28 LDR r3, [lr, r3, LSL #2] @@ -6164,8 +6151,7 @@ L_GCM_gmult_len_start_block: LDR r12, [r0, #4] LDR r3, [r2, #4] EOR r12, r12, r3 - LSR r3, r12, #24 - AND r3, r3, #0xf + UBFX r3, r12, #24, #4 ADD r3, r1, r3, LSL #4 LDM r3, {r4, r5, r6, r7} EOR r8, r8, r4 @@ -6173,9 +6159,9 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - LSR r4, r12, #28 + UBFX r4, r12, #28, #4 EOR r11, r11, r10, LSL #28 LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 @@ -6189,11 +6175,10 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - LSR r4, r12, #16 + UBFX r4, r12, #16, #4 EOR r11, r11, r10, LSL #28 - AND r4, r4, #0xf LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 EOR r10, r6, r9, LSL #28 @@ -6206,11 +6191,10 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - LSR r4, r12, #20 + UBFX r4, r12, #20, #4 EOR r11, r11, r10, LSL #28 - AND r4, r4, #0xf LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 EOR r10, r6, r9, LSL #28 @@ -6223,11 +6207,10 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - LSR r4, r12, #8 + UBFX r4, r12, #8, #4 EOR r11, r11, r10, LSL #28 - AND r4, r4, #0xf LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 EOR r10, r6, r9, LSL #28 @@ -6240,11 +6223,10 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - LSR r4, r12, #12 + UBFX r4, r12, #12, #4 EOR r11, r11, r10, LSL #28 - AND r4, r4, #0xf LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 EOR r10, r6, r9, LSL #28 @@ -6257,9 +6239,9 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - AND r4, r12, #0xf + AND r4, r12, #15 EOR r11, r11, r10, LSL #28 LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 @@ -6273,11 +6255,10 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - LSR r4, r12, #4 + UBFX r4, r12, #4, #4 EOR r11, r11, r10, LSL #28 - AND r4, r4, #0xf LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 EOR r10, r6, r9, LSL #28 @@ -6290,7 +6271,7 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 EOR r11, r11, r10, LSL #28 LDR r3, [lr, r3, LSL #2] @@ -6301,8 +6282,7 @@ L_GCM_gmult_len_start_block: LDR r12, [r0] LDR r3, [r2] EOR r12, r12, r3 - LSR r3, r12, #24 - AND r3, r3, #0xf + UBFX r3, r12, #24, #4 ADD r3, r1, r3, LSL #4 LDM r3, {r4, r5, r6, r7} EOR r8, r8, r4 @@ -6310,9 +6290,9 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - LSR r4, r12, #28 + UBFX r4, r12, #28, #4 EOR r11, r11, r10, LSL #28 LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 @@ -6326,11 +6306,10 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - LSR r4, r12, #16 + UBFX r4, r12, #16, #4 EOR r11, r11, r10, LSL #28 - AND r4, r4, #0xf LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 EOR r10, r6, r9, LSL #28 @@ -6343,11 +6322,10 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - LSR r4, r12, #20 + UBFX r4, r12, #20, #4 EOR r11, r11, r10, LSL #28 - AND r4, r4, #0xf LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 EOR r10, r6, r9, LSL #28 @@ -6360,11 +6338,10 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - LSR r4, r12, #8 + UBFX r4, r12, #8, #4 EOR r11, r11, r10, LSL #28 - AND r4, r4, #0xf LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 EOR r10, r6, r9, LSL #28 @@ -6377,11 +6354,10 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - LSR r4, r12, #12 + UBFX r4, r12, #12, #4 EOR r11, r11, r10, LSL #28 - AND r4, r4, #0xf LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 EOR r10, r6, r9, LSL #28 @@ -6394,9 +6370,9 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - AND r4, r12, #0xf + AND r4, r12, #15 EOR r11, r11, r10, LSL #28 LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 @@ -6410,11 +6386,10 @@ L_GCM_gmult_len_start_block: EOR r10, r10, r6 EOR r11, r11, r7 LSR r6, r10, #4 - AND r3, r11, #0xf + AND r3, r11, #15 LSR r11, r11, #4 - LSR r4, r12, #4 + UBFX r4, r12, #4, #4 EOR r11, r11, r10, LSL #28 - AND r4, r4, #0xf LDR r3, [lr, r3, LSL #2] ADD r4, r1, r4, LSL #4 EOR r10, r6, r9, LSL #28 @@ -6432,15 +6407,15 @@ L_GCM_gmult_len_start_block: REV r11, r11 STM r0, {r8, r9, r10, r11} POP {r3} - SUBS r3, r3, #0x10 - ADD r2, r2, #0x10 + SUBS r3, r3, #16 + ADD r2, r2, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_GCM_gmult_len_start_block #else BNE.W L_GCM_gmult_len_start_block #endif POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - /* Cycle Count = 742 */ + /* Cycle Count = 718 */ .size GCM_gmult_len,.-GCM_gmult_len #ifndef __APPLE__ .text @@ -6474,13 +6449,13 @@ AES_GCM_encrypt: REV r7, r7 STM r8, {r4, r5, r6, r7} PUSH {r3, r8} - CMP r12, #0xa + CMP r12, #10 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_AES_GCM_encrypt_start_block_128 #else BEQ.W L_AES_GCM_encrypt_start_block_128 #endif - CMP r12, #0xc + CMP r12, #12 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_AES_GCM_encrypt_start_block_192 #else @@ -6489,7 +6464,7 @@ AES_GCM_encrypt: L_AES_GCM_encrypt_loop_block_256: PUSH {r1, r2, lr} LDR lr, [sp, #16] - ADD r7, r7, #0x1 + ADD r7, r7, #1 LDM r3!, {r8, r9, r10, r11} STR r7, [lr, #12] /* Round: 0 - XOR in key schedule */ @@ -6497,7 +6472,7 @@ L_AES_GCM_encrypt_loop_block_256: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - MOV r1, #0x6 + MOV r1, #6 #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE BL AES_encrypt_block #else @@ -6602,7 +6577,7 @@ L_AES_GCM_encrypt_block_nr_256: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - SUBS r1, r1, #0x1 + SUBS r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_GCM_encrypt_block_nr_256 #else @@ -6729,9 +6704,9 @@ L_AES_GCM_encrypt_block_nr_256: STR r6, [r1, #8] STR r7, [r1, #12] LDM r8, {r4, r5, r6, r7} - SUBS r2, r2, #0x10 - ADD lr, lr, #0x10 - ADD r1, r1, #0x10 + SUBS r2, r2, #16 + ADD lr, lr, #16 + ADD r1, r1, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_GCM_encrypt_loop_block_256 #else @@ -6746,7 +6721,7 @@ L_AES_GCM_encrypt_start_block_192: L_AES_GCM_encrypt_loop_block_192: PUSH {r1, r2, lr} LDR lr, [sp, #16] - ADD r7, r7, #0x1 + ADD r7, r7, #1 LDM r3!, {r8, r9, r10, r11} STR r7, [lr, #12] /* Round: 0 - XOR in key schedule */ @@ -6754,7 +6729,7 @@ L_AES_GCM_encrypt_loop_block_192: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - MOV r1, #0x5 + MOV r1, #5 #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE BL AES_encrypt_block #else @@ -6859,7 +6834,7 @@ L_AES_GCM_encrypt_block_nr_192: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - SUBS r1, r1, #0x1 + SUBS r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_GCM_encrypt_block_nr_192 #else @@ -6986,9 +6961,9 @@ L_AES_GCM_encrypt_block_nr_192: STR r6, [r1, #8] STR r7, [r1, #12] LDM r8, {r4, r5, r6, r7} - SUBS r2, r2, #0x10 - ADD lr, lr, #0x10 - ADD r1, r1, #0x10 + SUBS r2, r2, #16 + ADD lr, lr, #16 + ADD r1, r1, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_GCM_encrypt_loop_block_192 #else @@ -7003,7 +6978,7 @@ L_AES_GCM_encrypt_start_block_128: L_AES_GCM_encrypt_loop_block_128: PUSH {r1, r2, lr} LDR lr, [sp, #16] - ADD r7, r7, #0x1 + ADD r7, r7, #1 LDM r3!, {r8, r9, r10, r11} STR r7, [lr, #12] /* Round: 0 - XOR in key schedule */ @@ -7011,7 +6986,7 @@ L_AES_GCM_encrypt_loop_block_128: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - MOV r1, #0x4 + MOV r1, #4 #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE BL AES_encrypt_block #else @@ -7116,7 +7091,7 @@ L_AES_GCM_encrypt_block_nr_128: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - SUBS r1, r1, #0x1 + SUBS r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_GCM_encrypt_block_nr_128 #else @@ -7243,9 +7218,9 @@ L_AES_GCM_encrypt_block_nr_128: STR r6, [r1, #8] STR r7, [r1, #12] LDM r8, {r4, r5, r6, r7} - SUBS r2, r2, #0x10 - ADD lr, lr, #0x10 - ADD r1, r1, #0x10 + SUBS r2, r2, #16 + ADD lr, lr, #16 + ADD r1, r1, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_AES_GCM_encrypt_loop_block_128 #else diff --git a/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c b/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c index 6d332507a92..6ff491d27fc 100644 --- a/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c @@ -235,8 +235,8 @@ WC_OMIT_FRAME_POINTER void AES_invert_key(unsigned char* ks, word32 rounds) "LDM r10, {r6, r7, r8, r9}\n\t" "STM r10, {r2, r3, r4, r5}\n\t" "STM %[ks]!, {r6, r7, r8, r9}\n\t" - "SUBS r11, r11, #0x2\n\t" - "SUB r10, r10, #0x10\n\t" + "SUBS r11, r11, #2\n\t" + "SUB r10, r10, #16\n\t" #if defined(__GNUC__) "BNE L_AES_invert_key_loop_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -245,8 +245,8 @@ WC_OMIT_FRAME_POINTER void AES_invert_key(unsigned char* ks, word32 rounds) "BNE.N L_AES_invert_key_loop_%=\n\t" #endif "SUB %[ks], %[ks], %[rounds], LSL #3\n\t" - "ADD %[ks], %[ks], #0x10\n\t" - "SUB r11, %[rounds], #0x1\n\t" + "ADD %[ks], %[ks], #16\n\t" + "SUB r11, %[rounds], #1\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_AES_invert_key_mix_loop:\n\t" @@ -318,7 +318,7 @@ WC_OMIT_FRAME_POINTER void AES_invert_key(unsigned char* ks, word32 rounds) "EOR r8, r8, r7, ROR #8\n\t" "EOR r8, r8, r9, ROR #24\n\t" "STR r8, [%[ks]], #4\n\t" - "SUBS r11, r11, #0x1\n\t" + "SUBS r11, r11, #1\n\t" #if defined(__GNUC__) "BNE L_AES_invert_key_mix_loop_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -410,8 +410,8 @@ WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key, "REV r6, r6\n\t" "REV r7, r7\n\t" "STM %[ks], {r4, r5, r6, r7}\n\t" - "SUB %[ks], %[ks], #0x10\n\t" - "MOV r12, #0x6\n\t" + "SUB %[ks], %[ks], #16\n\t" + "MOV r12, #6\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_AES_set_encrypt_key_loop_256:\n\t" @@ -436,14 +436,13 @@ WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key, "EOR r5, r5, r4\n\t" "EOR r6, r6, r5\n\t" "EOR r7, r7, r6\n\t" - "ADD %[ks], %[ks], #0x10\n\t" + "ADD %[ks], %[ks], #16\n\t" "STM %[ks], {r4, r5, r6, r7}\n\t" - "SUB %[ks], %[ks], #0x10\n\t" - "MOV r3, r7\n\t" - "UBFX r4, r3, #8, #8\n\t" - "UBFX r5, r3, #16, #8\n\t" - "LSR r6, r3, #24\n\t" - "UBFX r3, r3, #0, #8\n\t" + "SUB %[ks], %[ks], #16\n\t" + "UBFX r4, r7, #8, #8\n\t" + "UBFX r5, r7, #16, #8\n\t" + "LSR r6, r7, #24\n\t" + "UBFX r3, r7, #0, #8\n\t" "LDRB r4, [r10, r4, LSL #2]\n\t" "LDRB r6, [r10, r6, LSL #2]\n\t" "LDRB r5, [r10, r5, LSL #2]\n\t" @@ -456,10 +455,10 @@ WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key, "EOR r5, r5, r4\n\t" "EOR r6, r6, r5\n\t" "EOR r7, r7, r6\n\t" - "ADD %[ks], %[ks], #0x10\n\t" + "ADD %[ks], %[ks], #16\n\t" "STM %[ks], {r4, r5, r6, r7}\n\t" - "SUB %[ks], %[ks], #0x10\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUB %[ks], %[ks], #16\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_AES_set_encrypt_key_loop_256_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -485,9 +484,9 @@ WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key, "EOR r5, r5, r4\n\t" "EOR r6, r6, r5\n\t" "EOR r7, r7, r6\n\t" - "ADD %[ks], %[ks], #0x10\n\t" + "ADD %[ks], %[ks], #16\n\t" "STM %[ks], {r4, r5, r6, r7}\n\t" - "SUB %[ks], %[ks], #0x10\n\t" + "SUB %[ks], %[ks], #16\n\t" #if defined(__GNUC__) "B L_AES_set_encrypt_key_end_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -516,7 +515,7 @@ WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key, "STM %[ks], {r4, r5, r6, r7}\n\t" "STRD r8, r9, [%[ks], #16]\n\t" "MOV r7, r9\n\t" - "MOV r12, #0x7\n\t" + "MOV r12, #7\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_AES_set_encrypt_key_loop_192:\n\t" @@ -544,7 +543,7 @@ WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key, "EOR r8, r8, r7\n\t" "EOR r9, r9, r8\n\t" "STM %[ks], {r4, r5, r6, r7, r8, r9}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_AES_set_encrypt_key_loop_192_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -593,7 +592,7 @@ WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key, "REV r6, r6\n\t" "REV r7, r7\n\t" "STM %[ks], {r4, r5, r6, r7}\n\t" - "MOV r12, #0xa\n\t" + "MOV r12, #10\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_AES_set_encrypt_key_loop_128:\n\t" @@ -619,7 +618,7 @@ WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key, "EOR r6, r6, r5\n\t" "EOR r7, r7, r6\n\t" "STM %[ks], {r4, r5, r6, r7}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_AES_set_encrypt_key_loop_128_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -773,7 +772,7 @@ WC_OMIT_FRAME_POINTER void AES_encrypt_block(const word32* te, int nr, int len, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "SUBS %[nr], %[nr], #0x1\n\t" + "SUBS %[nr], %[nr], #1\n\t" #if defined(__GNUC__) "BNE L_AES_encrypt_block_nr_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -935,7 +934,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_encrypt(const unsigned char* in, "MOV r12, %[nr]\n\t" #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ "PUSH {%[ks]}\n\t" - "CMP r12, #0xa\n\t" + "CMP r12, #10\n\t" #if defined(__GNUC__) "BEQ L_AES_ECB_encrypt_start_block_128_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -943,7 +942,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_encrypt(const unsigned char* in, #else "BEQ.W L_AES_ECB_encrypt_start_block_128_%=\n\t" #endif - "CMP r12, #0xc\n\t" + "CMP r12, #12\n\t" #if defined(__GNUC__) "BEQ L_AES_ECB_encrypt_start_block_192_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -972,7 +971,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "MOV r1, #0x6\n\t" + "MOV r1, #6\n\t" #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE "BL AES_encrypt_block\n\t" #else @@ -1082,7 +1081,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "SUBS r1, r1, #0x1\n\t" + "SUBS r1, r1, #1\n\t" #if defined(__GNUC__) "BNE L_AES_ECB_encrypt_block_nr_256_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -1201,9 +1200,9 @@ WC_OMIT_FRAME_POINTER void AES_ECB_encrypt(const unsigned char* in, "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "SUBS %[len], %[len], #0x10\n\t" - "ADD lr, lr, #0x10\n\t" - "ADD %[out], %[out], #0x10\n\t" + "SUBS %[len], %[len], #16\n\t" + "ADD lr, lr, #16\n\t" + "ADD %[out], %[out], #16\n\t" #if defined(__GNUC__) "BNE L_AES_ECB_encrypt_loop_block_256_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -1245,7 +1244,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "MOV r1, #0x5\n\t" + "MOV r1, #5\n\t" #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE "BL AES_encrypt_block\n\t" #else @@ -1355,7 +1354,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "SUBS r1, r1, #0x1\n\t" + "SUBS r1, r1, #1\n\t" #if defined(__GNUC__) "BNE L_AES_ECB_encrypt_block_nr_192_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -1474,9 +1473,9 @@ WC_OMIT_FRAME_POINTER void AES_ECB_encrypt(const unsigned char* in, "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "SUBS %[len], %[len], #0x10\n\t" - "ADD lr, lr, #0x10\n\t" - "ADD %[out], %[out], #0x10\n\t" + "SUBS %[len], %[len], #16\n\t" + "ADD lr, lr, #16\n\t" + "ADD %[out], %[out], #16\n\t" #if defined(__GNUC__) "BNE L_AES_ECB_encrypt_loop_block_192_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -1518,7 +1517,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "MOV r1, #0x4\n\t" + "MOV r1, #4\n\t" #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE "BL AES_encrypt_block\n\t" #else @@ -1628,7 +1627,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "SUBS r1, r1, #0x1\n\t" + "SUBS r1, r1, #1\n\t" #if defined(__GNUC__) "BNE L_AES_ECB_encrypt_block_nr_128_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -1747,9 +1746,9 @@ WC_OMIT_FRAME_POINTER void AES_ECB_encrypt(const unsigned char* in, "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "SUBS %[len], %[len], #0x10\n\t" - "ADD lr, lr, #0x10\n\t" - "ADD %[out], %[out], #0x10\n\t" + "SUBS %[len], %[len], #16\n\t" + "ADD lr, lr, #16\n\t" + "ADD %[out], %[out], #16\n\t" #if defined(__GNUC__) "BNE L_AES_ECB_encrypt_loop_block_128_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -1823,7 +1822,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, "MOV r0, %[L_AES_Thumb2_te_ecb]\n\t" "LDM r9, {r4, r5, r6, r7}\n\t" "PUSH {%[ks], r9}\n\t" - "CMP r8, #0xa\n\t" + "CMP r8, #10\n\t" #if defined(__GNUC__) "BEQ L_AES_CBC_encrypt_start_block_128_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -1831,7 +1830,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, #else "BEQ.W L_AES_CBC_encrypt_start_block_128_%=\n\t" #endif - "CMP r8, #0xc\n\t" + "CMP r8, #12\n\t" #if defined(__GNUC__) "BEQ L_AES_CBC_encrypt_start_block_192_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -1864,7 +1863,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "MOV r1, #0x6\n\t" + "MOV r1, #6\n\t" #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE "BL AES_encrypt_block\n\t" #else @@ -1974,7 +1973,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "SUBS r1, r1, #0x1\n\t" + "SUBS r1, r1, #1\n\t" #if defined(__GNUC__) "BNE L_AES_CBC_encrypt_block_nr_256_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -2093,9 +2092,9 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "SUBS %[len], %[len], #0x10\n\t" - "ADD lr, lr, #0x10\n\t" - "ADD %[out], %[out], #0x10\n\t" + "SUBS %[len], %[len], #16\n\t" + "ADD lr, lr, #16\n\t" + "ADD %[out], %[out], #16\n\t" #if defined(__GNUC__) "BNE L_AES_CBC_encrypt_loop_block_256_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -2141,7 +2140,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "MOV r1, #0x5\n\t" + "MOV r1, #5\n\t" #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE "BL AES_encrypt_block\n\t" #else @@ -2251,7 +2250,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "SUBS r1, r1, #0x1\n\t" + "SUBS r1, r1, #1\n\t" #if defined(__GNUC__) "BNE L_AES_CBC_encrypt_block_nr_192_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -2370,9 +2369,9 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "SUBS %[len], %[len], #0x10\n\t" - "ADD lr, lr, #0x10\n\t" - "ADD %[out], %[out], #0x10\n\t" + "SUBS %[len], %[len], #16\n\t" + "ADD lr, lr, #16\n\t" + "ADD %[out], %[out], #16\n\t" #if defined(__GNUC__) "BNE L_AES_CBC_encrypt_loop_block_192_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -2418,7 +2417,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "MOV r1, #0x4\n\t" + "MOV r1, #4\n\t" #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE "BL AES_encrypt_block\n\t" #else @@ -2528,7 +2527,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "SUBS r1, r1, #0x1\n\t" + "SUBS r1, r1, #1\n\t" #if defined(__GNUC__) "BNE L_AES_CBC_encrypt_block_nr_128_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -2647,9 +2646,9 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "SUBS %[len], %[len], #0x10\n\t" - "ADD lr, lr, #0x10\n\t" - "ADD %[out], %[out], #0x10\n\t" + "SUBS %[len], %[len], #16\n\t" + "ADD lr, lr, #16\n\t" + "ADD %[out], %[out], #16\n\t" #if defined(__GNUC__) "BNE L_AES_CBC_encrypt_loop_block_128_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -2731,7 +2730,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, "REV r7, r7\n\t" "STM r8, {r4, r5, r6, r7}\n\t" "PUSH {%[ks], r8}\n\t" - "CMP r12, #0xa\n\t" + "CMP r12, #10\n\t" #if defined(__GNUC__) "BEQ L_AES_CTR_encrypt_start_block_128_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -2739,7 +2738,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, #else "BEQ.W L_AES_CTR_encrypt_start_block_128_%=\n\t" #endif - "CMP r12, #0xc\n\t" + "CMP r12, #12\n\t" #if defined(__GNUC__) "BEQ L_AES_CTR_encrypt_start_block_192_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -2755,10 +2754,10 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, #endif "PUSH {r1, %[len], lr}\n\t" "LDR lr, [sp, #16]\n\t" - "ADDS r11, r7, #0x1\n\t" - "ADCS r10, r6, #0x0\n\t" - "ADCS r9, r5, #0x0\n\t" - "ADC r8, r4, #0x0\n\t" + "ADDS r11, r7, #1\n\t" + "ADCS r10, r6, #0\n\t" + "ADCS r9, r5, #0\n\t" + "ADC r8, r4, #0\n\t" "STM lr, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t" /* Round: 0 - XOR in key schedule */ @@ -2766,7 +2765,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "MOV r1, #0x6\n\t" + "MOV r1, #6\n\t" #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE "BL AES_encrypt_block\n\t" #else @@ -2876,7 +2875,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "SUBS r1, r1, #0x1\n\t" + "SUBS r1, r1, #1\n\t" #if defined(__GNUC__) "BNE L_AES_CTR_encrypt_block_nr_256_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3005,9 +3004,9 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" "LDM r8, {r4, r5, r6, r7}\n\t" - "SUBS %[len], %[len], #0x10\n\t" - "ADD lr, lr, #0x10\n\t" - "ADD %[out], %[out], #0x10\n\t" + "SUBS %[len], %[len], #16\n\t" + "ADD lr, lr, #16\n\t" + "ADD %[out], %[out], #16\n\t" #if defined(__GNUC__) "BNE L_AES_CTR_encrypt_loop_block_256_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3036,10 +3035,10 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, #endif "PUSH {r1, %[len], lr}\n\t" "LDR lr, [sp, #16]\n\t" - "ADDS r11, r7, #0x1\n\t" - "ADCS r10, r6, #0x0\n\t" - "ADCS r9, r5, #0x0\n\t" - "ADC r8, r4, #0x0\n\t" + "ADDS r11, r7, #1\n\t" + "ADCS r10, r6, #0\n\t" + "ADCS r9, r5, #0\n\t" + "ADC r8, r4, #0\n\t" "STM lr, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t" /* Round: 0 - XOR in key schedule */ @@ -3047,7 +3046,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "MOV r1, #0x5\n\t" + "MOV r1, #5\n\t" #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE "BL AES_encrypt_block\n\t" #else @@ -3157,7 +3156,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "SUBS r1, r1, #0x1\n\t" + "SUBS r1, r1, #1\n\t" #if defined(__GNUC__) "BNE L_AES_CTR_encrypt_block_nr_192_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3286,9 +3285,9 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" "LDM r8, {r4, r5, r6, r7}\n\t" - "SUBS %[len], %[len], #0x10\n\t" - "ADD lr, lr, #0x10\n\t" - "ADD %[out], %[out], #0x10\n\t" + "SUBS %[len], %[len], #16\n\t" + "ADD lr, lr, #16\n\t" + "ADD %[out], %[out], #16\n\t" #if defined(__GNUC__) "BNE L_AES_CTR_encrypt_loop_block_192_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3317,10 +3316,10 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, #endif "PUSH {r1, %[len], lr}\n\t" "LDR lr, [sp, #16]\n\t" - "ADDS r11, r7, #0x1\n\t" - "ADCS r10, r6, #0x0\n\t" - "ADCS r9, r5, #0x0\n\t" - "ADC r8, r4, #0x0\n\t" + "ADDS r11, r7, #1\n\t" + "ADCS r10, r6, #0\n\t" + "ADCS r9, r5, #0\n\t" + "ADC r8, r4, #0\n\t" "STM lr, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t" /* Round: 0 - XOR in key schedule */ @@ -3328,7 +3327,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "MOV r1, #0x4\n\t" + "MOV r1, #4\n\t" #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE "BL AES_encrypt_block\n\t" #else @@ -3438,7 +3437,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "SUBS r1, r1, #0x1\n\t" + "SUBS r1, r1, #1\n\t" #if defined(__GNUC__) "BNE L_AES_CTR_encrypt_block_nr_128_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3567,9 +3566,9 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" "LDM r8, {r4, r5, r6, r7}\n\t" - "SUBS %[len], %[len], #0x10\n\t" - "ADD lr, lr, #0x10\n\t" - "ADD %[out], %[out], #0x10\n\t" + "SUBS %[len], %[len], #16\n\t" + "ADD lr, lr, #16\n\t" + "ADD %[out], %[out], #16\n\t" #if defined(__GNUC__) "BNE L_AES_CTR_encrypt_loop_block_128_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3731,7 +3730,7 @@ WC_OMIT_FRAME_POINTER void AES_decrypt_block(const word32* td, int nr, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "SUBS %[nr], %[nr], #0x1\n\t" + "SUBS %[nr], %[nr], #1\n\t" #if defined(__GNUC__) "BNE L_AES_decrypt_block_nr_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3927,7 +3926,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_decrypt(const unsigned char* in, "MOV r0, %[L_AES_Thumb2_td_ecb]\n\t" "MOV r12, %[len]\n\t" "MOV r2, %[L_AES_Thumb2_td4]\n\t" - "CMP r8, #0xa\n\t" + "CMP r8, #10\n\t" #if defined(__GNUC__) "BEQ L_AES_ECB_decrypt_start_block_128_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3935,7 +3934,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_decrypt(const unsigned char* in, #else "BEQ.W L_AES_ECB_decrypt_start_block_128_%=\n\t" #endif - "CMP r8, #0xc\n\t" + "CMP r8, #12\n\t" #if defined(__GNUC__) "BEQ L_AES_ECB_decrypt_start_block_192_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3964,7 +3963,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_decrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "MOV r1, #0x6\n\t" + "MOV r1, #6\n\t" #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE "BL AES_decrypt_block\n\t" #else @@ -4074,7 +4073,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_decrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "SUBS r1, r1, #0x1\n\t" + "SUBS r1, r1, #1\n\t" #if defined(__GNUC__) "BNE L_AES_ECB_decrypt_block_nr_256_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -4192,9 +4191,9 @@ WC_OMIT_FRAME_POINTER void AES_ECB_decrypt(const unsigned char* in, "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "SUBS r12, r12, #0x10\n\t" - "ADD lr, lr, #0x10\n\t" - "ADD %[out], %[out], #0x10\n\t" + "SUBS r12, r12, #16\n\t" + "ADD lr, lr, #16\n\t" + "ADD %[out], %[out], #16\n\t" #if defined(__GNUC__) "BNE L_AES_ECB_decrypt_loop_block_256_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -4236,7 +4235,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_decrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "MOV r1, #0x5\n\t" + "MOV r1, #5\n\t" #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE "BL AES_decrypt_block\n\t" #else @@ -4346,7 +4345,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_decrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "SUBS r1, r1, #0x1\n\t" + "SUBS r1, r1, #1\n\t" #if defined(__GNUC__) "BNE L_AES_ECB_decrypt_block_nr_192_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -4464,9 +4463,9 @@ WC_OMIT_FRAME_POINTER void AES_ECB_decrypt(const unsigned char* in, "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "SUBS r12, r12, #0x10\n\t" - "ADD lr, lr, #0x10\n\t" - "ADD %[out], %[out], #0x10\n\t" + "SUBS r12, r12, #16\n\t" + "ADD lr, lr, #16\n\t" + "ADD %[out], %[out], #16\n\t" #if defined(__GNUC__) "BNE L_AES_ECB_decrypt_loop_block_192_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -4508,7 +4507,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_decrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "MOV r1, #0x4\n\t" + "MOV r1, #4\n\t" #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE "BL AES_decrypt_block\n\t" #else @@ -4618,7 +4617,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_decrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "SUBS r1, r1, #0x1\n\t" + "SUBS r1, r1, #1\n\t" #if defined(__GNUC__) "BNE L_AES_ECB_decrypt_block_nr_128_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -4736,9 +4735,9 @@ WC_OMIT_FRAME_POINTER void AES_ECB_decrypt(const unsigned char* in, "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "SUBS r12, r12, #0x10\n\t" - "ADD lr, lr, #0x10\n\t" - "ADD %[out], %[out], #0x10\n\t" + "SUBS r12, r12, #16\n\t" + "ADD lr, lr, #16\n\t" + "ADD %[out], %[out], #16\n\t" #if defined(__GNUC__) "BNE L_AES_ECB_decrypt_loop_block_128_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -4816,7 +4815,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "MOV r4, %[iv]\n\t" #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ "PUSH {%[ks], r4}\n\t" - "CMP r8, #0xa\n\t" + "CMP r8, #10\n\t" #if defined(__GNUC__) "BEQ L_AES_CBC_decrypt_loop_block_128_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -4824,7 +4823,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, #else "BEQ.W L_AES_CBC_decrypt_loop_block_128_%=\n\t" #endif - "CMP r8, #0xc\n\t" + "CMP r8, #12\n\t" #if defined(__GNUC__) "BEQ L_AES_CBC_decrypt_loop_block_192_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -4856,7 +4855,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "MOV r1, #0x6\n\t" + "MOV r1, #6\n\t" #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE "BL AES_decrypt_block\n\t" #else @@ -4966,7 +4965,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "SUBS r1, r1, #0x1\n\t" + "SUBS r1, r1, #1\n\t" #if defined(__GNUC__) "BNE L_AES_CBC_decrypt_block_nr_256_odd_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -5091,9 +5090,9 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "SUBS r12, r12, #0x10\n\t" - "ADD lr, lr, #0x10\n\t" - "ADD %[out], %[out], #0x10\n\t" + "SUBS r12, r12, #16\n\t" + "ADD lr, lr, #16\n\t" + "ADD %[out], %[out], #16\n\t" #if defined(__GNUC__) "BEQ L_AES_CBC_decrypt_end_odd_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -5119,7 +5118,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "MOV r1, #0x6\n\t" + "MOV r1, #6\n\t" #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE "BL AES_decrypt_block\n\t" #else @@ -5229,7 +5228,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "SUBS r1, r1, #0x1\n\t" + "SUBS r1, r1, #1\n\t" #if defined(__GNUC__) "BNE L_AES_CBC_decrypt_block_nr_256_even_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -5355,9 +5354,9 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "SUBS r12, r12, #0x10\n\t" - "ADD lr, lr, #0x10\n\t" - "ADD %[out], %[out], #0x10\n\t" + "SUBS r12, r12, #16\n\t" + "ADD lr, lr, #16\n\t" + "ADD %[out], %[out], #16\n\t" #if defined(__GNUC__) "BNE L_AES_CBC_decrypt_loop_block_256_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -5396,7 +5395,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "MOV r1, #0x5\n\t" + "MOV r1, #5\n\t" #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE "BL AES_decrypt_block\n\t" #else @@ -5506,7 +5505,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "SUBS r1, r1, #0x1\n\t" + "SUBS r1, r1, #1\n\t" #if defined(__GNUC__) "BNE L_AES_CBC_decrypt_block_nr_192_odd_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -5631,9 +5630,9 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "SUBS r12, r12, #0x10\n\t" - "ADD lr, lr, #0x10\n\t" - "ADD %[out], %[out], #0x10\n\t" + "SUBS r12, r12, #16\n\t" + "ADD lr, lr, #16\n\t" + "ADD %[out], %[out], #16\n\t" #if defined(__GNUC__) "BEQ L_AES_CBC_decrypt_end_odd_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -5659,7 +5658,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "MOV r1, #0x5\n\t" + "MOV r1, #5\n\t" #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE "BL AES_decrypt_block\n\t" #else @@ -5769,7 +5768,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "SUBS r1, r1, #0x1\n\t" + "SUBS r1, r1, #1\n\t" #if defined(__GNUC__) "BNE L_AES_CBC_decrypt_block_nr_192_even_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -5895,9 +5894,9 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "SUBS r12, r12, #0x10\n\t" - "ADD lr, lr, #0x10\n\t" - "ADD %[out], %[out], #0x10\n\t" + "SUBS r12, r12, #16\n\t" + "ADD lr, lr, #16\n\t" + "ADD %[out], %[out], #16\n\t" #if defined(__GNUC__) "BNE L_AES_CBC_decrypt_loop_block_192_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -5936,7 +5935,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "MOV r1, #0x4\n\t" + "MOV r1, #4\n\t" #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE "BL AES_decrypt_block\n\t" #else @@ -6046,7 +6045,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "SUBS r1, r1, #0x1\n\t" + "SUBS r1, r1, #1\n\t" #if defined(__GNUC__) "BNE L_AES_CBC_decrypt_block_nr_128_odd_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -6171,9 +6170,9 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "SUBS r12, r12, #0x10\n\t" - "ADD lr, lr, #0x10\n\t" - "ADD %[out], %[out], #0x10\n\t" + "SUBS r12, r12, #16\n\t" + "ADD lr, lr, #16\n\t" + "ADD %[out], %[out], #16\n\t" #if defined(__GNUC__) "BEQ L_AES_CBC_decrypt_end_odd_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -6199,7 +6198,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "MOV r1, #0x4\n\t" + "MOV r1, #4\n\t" #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE "BL AES_decrypt_block\n\t" #else @@ -6309,7 +6308,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "SUBS r1, r1, #0x1\n\t" + "SUBS r1, r1, #1\n\t" #if defined(__GNUC__) "BNE L_AES_CBC_decrypt_block_nr_128_even_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -6435,9 +6434,9 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "SUBS r12, r12, #0x10\n\t" - "ADD lr, lr, #0x10\n\t" - "ADD %[out], %[out], #0x10\n\t" + "SUBS r12, r12, #16\n\t" + "ADD lr, lr, #16\n\t" + "ADD %[out], %[out], #16\n\t" #if defined(__GNUC__) "BNE L_AES_CBC_decrypt_loop_block_128_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -6534,14 +6533,13 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "LDR r12, [r0, #12]\n\t" "LDR %[len], [r2, #12]\n\t" "EOR r12, r12, %[len]\n\t" - "LSR %[len], r12, #24\n\t" - "AND %[len], %[len], #0xf\n\t" + "UBFX %[len], r12, #24, #4\n\t" "ADD %[len], %[m], %[len], LSL #4\n\t" "LDM %[len], {r8, r9, r10, r11}\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "LSR r4, r12, #28\n\t" + "UBFX r4, r12, #28, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" @@ -6555,11 +6553,10 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "LSR r4, r12, #16\n\t" + "UBFX r4, r12, #16, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" - "AND r4, r4, #0xf\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" @@ -6572,11 +6569,10 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "LSR r4, r12, #20\n\t" + "UBFX r4, r12, #20, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" - "AND r4, r4, #0xf\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" @@ -6589,11 +6585,10 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "LSR r4, r12, #8\n\t" + "UBFX r4, r12, #8, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" - "AND r4, r4, #0xf\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" @@ -6606,11 +6601,10 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "LSR r4, r12, #12\n\t" + "UBFX r4, r12, #12, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" - "AND r4, r4, #0xf\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" @@ -6623,9 +6617,9 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "AND r4, r12, #0xf\n\t" + "AND r4, r12, #15\n\t" "EOR r11, r11, r10, LSL #28\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" @@ -6639,11 +6633,10 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "LSR r4, r12, #4\n\t" + "UBFX r4, r12, #4, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" - "AND r4, r4, #0xf\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" @@ -6656,7 +6649,7 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" @@ -6667,8 +6660,7 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "LDR r12, [r0, #8]\n\t" "LDR %[len], [r2, #8]\n\t" "EOR r12, r12, %[len]\n\t" - "LSR %[len], r12, #24\n\t" - "AND %[len], %[len], #0xf\n\t" + "UBFX %[len], r12, #24, #4\n\t" "ADD %[len], %[m], %[len], LSL #4\n\t" "LDM %[len], {r4, r5, r6, r7}\n\t" "EOR r8, r8, r4\n\t" @@ -6676,9 +6668,9 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "LSR r4, r12, #28\n\t" + "UBFX r4, r12, #28, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" @@ -6692,11 +6684,10 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "LSR r4, r12, #16\n\t" + "UBFX r4, r12, #16, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" - "AND r4, r4, #0xf\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" @@ -6709,11 +6700,10 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "LSR r4, r12, #20\n\t" + "UBFX r4, r12, #20, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" - "AND r4, r4, #0xf\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" @@ -6726,11 +6716,10 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "LSR r4, r12, #8\n\t" + "UBFX r4, r12, #8, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" - "AND r4, r4, #0xf\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" @@ -6743,11 +6732,10 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "LSR r4, r12, #12\n\t" + "UBFX r4, r12, #12, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" - "AND r4, r4, #0xf\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" @@ -6760,9 +6748,9 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "AND r4, r12, #0xf\n\t" + "AND r4, r12, #15\n\t" "EOR r11, r11, r10, LSL #28\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" @@ -6776,11 +6764,10 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "LSR r4, r12, #4\n\t" + "UBFX r4, r12, #4, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" - "AND r4, r4, #0xf\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" @@ -6793,7 +6780,7 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" @@ -6804,8 +6791,7 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "LDR r12, [r0, #4]\n\t" "LDR %[len], [r2, #4]\n\t" "EOR r12, r12, %[len]\n\t" - "LSR %[len], r12, #24\n\t" - "AND %[len], %[len], #0xf\n\t" + "UBFX %[len], r12, #24, #4\n\t" "ADD %[len], %[m], %[len], LSL #4\n\t" "LDM %[len], {r4, r5, r6, r7}\n\t" "EOR r8, r8, r4\n\t" @@ -6813,9 +6799,9 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "LSR r4, r12, #28\n\t" + "UBFX r4, r12, #28, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" @@ -6829,11 +6815,10 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "LSR r4, r12, #16\n\t" + "UBFX r4, r12, #16, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" - "AND r4, r4, #0xf\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" @@ -6846,11 +6831,10 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "LSR r4, r12, #20\n\t" + "UBFX r4, r12, #20, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" - "AND r4, r4, #0xf\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" @@ -6863,11 +6847,10 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "LSR r4, r12, #8\n\t" + "UBFX r4, r12, #8, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" - "AND r4, r4, #0xf\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" @@ -6880,11 +6863,10 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "LSR r4, r12, #12\n\t" + "UBFX r4, r12, #12, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" - "AND r4, r4, #0xf\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" @@ -6897,9 +6879,9 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "AND r4, r12, #0xf\n\t" + "AND r4, r12, #15\n\t" "EOR r11, r11, r10, LSL #28\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" @@ -6913,11 +6895,10 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "LSR r4, r12, #4\n\t" + "UBFX r4, r12, #4, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" - "AND r4, r4, #0xf\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" @@ -6930,7 +6911,7 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" @@ -6941,8 +6922,7 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "LDR r12, [r0]\n\t" "LDR %[len], [r2]\n\t" "EOR r12, r12, %[len]\n\t" - "LSR %[len], r12, #24\n\t" - "AND %[len], %[len], #0xf\n\t" + "UBFX %[len], r12, #24, #4\n\t" "ADD %[len], %[m], %[len], LSL #4\n\t" "LDM %[len], {r4, r5, r6, r7}\n\t" "EOR r8, r8, r4\n\t" @@ -6950,9 +6930,9 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "LSR r4, r12, #28\n\t" + "UBFX r4, r12, #28, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" @@ -6966,11 +6946,10 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "LSR r4, r12, #16\n\t" + "UBFX r4, r12, #16, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" - "AND r4, r4, #0xf\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" @@ -6983,11 +6962,10 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "LSR r4, r12, #20\n\t" + "UBFX r4, r12, #20, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" - "AND r4, r4, #0xf\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" @@ -7000,11 +6978,10 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "LSR r4, r12, #8\n\t" + "UBFX r4, r12, #8, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" - "AND r4, r4, #0xf\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" @@ -7017,11 +6994,10 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "LSR r4, r12, #12\n\t" + "UBFX r4, r12, #12, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" - "AND r4, r4, #0xf\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" @@ -7034,9 +7010,9 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "AND r4, r12, #0xf\n\t" + "AND r4, r12, #15\n\t" "EOR r11, r11, r10, LSL #28\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" @@ -7050,11 +7026,10 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" "LSR r6, r10, #4\n\t" - "AND %[len], r11, #0xf\n\t" + "AND %[len], r11, #15\n\t" "LSR r11, r11, #4\n\t" - "LSR r4, r12, #4\n\t" + "UBFX r4, r12, #4, #4\n\t" "EOR r11, r11, r10, LSL #28\n\t" - "AND r4, r4, #0xf\n\t" "LDR %[len], [lr, r3, LSL #2]\n\t" "ADD r4, %[m], r4, LSL #4\n\t" "EOR r10, r6, r9, LSL #28\n\t" @@ -7072,8 +7047,8 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "REV r11, r11\n\t" "STM %[x], {r8, r9, r10, r11}\n\t" "POP {r3}\n\t" - "SUBS %[len], %[len], #0x10\n\t" - "ADD %[data], %[data], #0x10\n\t" + "SUBS %[len], %[len], #16\n\t" + "ADD %[data], %[data], #16\n\t" #if defined(__GNUC__) "BNE L_GCM_gmult_len_start_block_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -7144,7 +7119,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in, "REV r7, r7\n\t" "STM r8, {r4, r5, r6, r7}\n\t" "PUSH {%[ks], r8}\n\t" - "CMP r12, #0xa\n\t" + "CMP r12, #10\n\t" #if defined(__GNUC__) "BEQ L_AES_GCM_encrypt_start_block_128_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -7152,7 +7127,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in, #else "BEQ.W L_AES_GCM_encrypt_start_block_128_%=\n\t" #endif - "CMP r12, #0xc\n\t" + "CMP r12, #12\n\t" #if defined(__GNUC__) "BEQ L_AES_GCM_encrypt_start_block_192_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -7168,7 +7143,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in, #endif "PUSH {r1, %[len], lr}\n\t" "LDR lr, [sp, #16]\n\t" - "ADD r7, r7, #0x1\n\t" + "ADD r7, r7, #1\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t" "STR r7, [lr, #12]\n\t" /* Round: 0 - XOR in key schedule */ @@ -7176,7 +7151,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "MOV r1, #0x6\n\t" + "MOV r1, #6\n\t" #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE "BL AES_encrypt_block\n\t" #else @@ -7286,7 +7261,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "SUBS r1, r1, #0x1\n\t" + "SUBS r1, r1, #1\n\t" #if defined(__GNUC__) "BNE L_AES_GCM_encrypt_block_nr_256_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -7415,9 +7390,9 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in, "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" "LDM r8, {r4, r5, r6, r7}\n\t" - "SUBS %[len], %[len], #0x10\n\t" - "ADD lr, lr, #0x10\n\t" - "ADD %[out], %[out], #0x10\n\t" + "SUBS %[len], %[len], #16\n\t" + "ADD lr, lr, #16\n\t" + "ADD %[out], %[out], #16\n\t" #if defined(__GNUC__) "BNE L_AES_GCM_encrypt_loop_block_256_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -7446,7 +7421,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in, #endif "PUSH {r1, %[len], lr}\n\t" "LDR lr, [sp, #16]\n\t" - "ADD r7, r7, #0x1\n\t" + "ADD r7, r7, #1\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t" "STR r7, [lr, #12]\n\t" /* Round: 0 - XOR in key schedule */ @@ -7454,7 +7429,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "MOV r1, #0x5\n\t" + "MOV r1, #5\n\t" #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE "BL AES_encrypt_block\n\t" #else @@ -7564,7 +7539,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "SUBS r1, r1, #0x1\n\t" + "SUBS r1, r1, #1\n\t" #if defined(__GNUC__) "BNE L_AES_GCM_encrypt_block_nr_192_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -7693,9 +7668,9 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in, "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" "LDM r8, {r4, r5, r6, r7}\n\t" - "SUBS %[len], %[len], #0x10\n\t" - "ADD lr, lr, #0x10\n\t" - "ADD %[out], %[out], #0x10\n\t" + "SUBS %[len], %[len], #16\n\t" + "ADD lr, lr, #16\n\t" + "ADD %[out], %[out], #16\n\t" #if defined(__GNUC__) "BNE L_AES_GCM_encrypt_loop_block_192_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -7724,7 +7699,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in, #endif "PUSH {r1, %[len], lr}\n\t" "LDR lr, [sp, #16]\n\t" - "ADD r7, r7, #0x1\n\t" + "ADD r7, r7, #1\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t" "STR r7, [lr, #12]\n\t" /* Round: 0 - XOR in key schedule */ @@ -7732,7 +7707,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "MOV r1, #0x4\n\t" + "MOV r1, #4\n\t" #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE "BL AES_encrypt_block\n\t" #else @@ -7842,7 +7817,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "SUBS r1, r1, #0x1\n\t" + "SUBS r1, r1, #1\n\t" #if defined(__GNUC__) "BNE L_AES_GCM_encrypt_block_nr_128_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -7971,9 +7946,9 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in, "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" "LDM r8, {r4, r5, r6, r7}\n\t" - "SUBS %[len], %[len], #0x10\n\t" - "ADD lr, lr, #0x10\n\t" - "ADD %[out], %[out], #0x10\n\t" + "SUBS %[len], %[len], #16\n\t" + "ADD lr, lr, #16\n\t" + "ADD %[out], %[out], #16\n\t" #if defined(__GNUC__) "BNE L_AES_GCM_encrypt_loop_block_128_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) diff --git a/wolfcrypt/src/port/arm/thumb2-chacha-asm.S b/wolfcrypt/src/port/arm/thumb2-chacha-asm.S index 775c3f51483..22e5a640e5b 100644 --- a/wolfcrypt/src/port/arm/thumb2-chacha-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-chacha-asm.S @@ -39,7 +39,7 @@ .type wc_chacha_setiv, %function wc_chacha_setiv: PUSH {r4, r5, r6, lr} - ADD r3, r0, #0x34 + ADD r3, r0, #52 LDR r4, [r1] LDR r5, [r1, #4] LDR r6, [r1, #8] @@ -76,7 +76,7 @@ L_chacha_thumb2_constants: wc_chacha_setkey: PUSH {r4, r5, r6, r7, lr} ADR r7, L_chacha_thumb2_constants - SUBS r2, r2, #0x10 + SUBS r2, r2, #16 ADD r7, r7, r2 /* Start state with constants */ LDM r7, {r3, r4, r5, r6} @@ -116,7 +116,7 @@ L_chacha_thumb2_setkey_same_key_bytes: .type wc_chacha_crypt_bytes, %function wc_chacha_crypt_bytes: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} - SUB sp, sp, #0x34 + SUB sp, sp, #52 MOV lr, r0 STRD r0, r1, [sp, #32] STRD r2, r3, [sp, #40] @@ -129,7 +129,7 @@ L_chacha_thumb2_crypt_block: /* Load x[0]..x[12] into registers. */ LDM lr, {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12} /* 10x 2 full rounds to perform. */ - MOV lr, #0xa + MOV lr, #10 STR lr, [sp, #48] L_chacha_thumb2_crypt_loop: /* 0, 4, 8, 12 */ @@ -248,7 +248,7 @@ L_chacha_thumb2_crypt_loop: STR lr, [sp, #20] /* Check if we have done enough rounds. */ LDR lr, [sp, #48] - SUBS lr, lr, #0x1 + SUBS lr, lr, #1 STR lr, [sp, #48] #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BGT L_chacha_thumb2_crypt_loop @@ -283,7 +283,7 @@ L_chacha_thumb2_crypt_loop: LDM lr!, {r10, r11} ADD r8, r8, r10 ADD r9, r9, r11 - ADD r10, r10, #0x1 + ADD r10, r10, #1 STM r12!, {r8, r9} STR r10, [lr, #-8] LDM r12, {r8, r9} @@ -388,7 +388,7 @@ L_chacha_thumb2_crypt_lt_block: STR r12, [lr, #64] ADD lr, lr, #0x44 L_chacha_thumb2_crypt_16byte_loop: - CMP r3, #0x10 + CMP r3, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BLT L_chacha_thumb2_crypt_word_loop #else @@ -404,7 +404,7 @@ L_chacha_thumb2_crypt_16byte_loop: EOR r9, r9, r5 EOR r10, r10, r6 EOR r11, r11, r7 - SUBS r3, r3, #0x10 + SUBS r3, r3, #16 STR r8, [r1] STR r9, [r1, #4] STR r10, [r1, #8] @@ -414,15 +414,15 @@ L_chacha_thumb2_crypt_16byte_loop: #else BEQ.N L_chacha_thumb2_crypt_done #endif - ADD r2, r2, #0x10 - ADD r1, r1, #0x10 + ADD r2, r2, #16 + ADD r1, r1, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) B L_chacha_thumb2_crypt_16byte_loop #else B.N L_chacha_thumb2_crypt_16byte_loop #endif L_chacha_thumb2_crypt_word_loop: - CMP r3, #0x4 + CMP r3, #4 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BLT L_chacha_thumb2_crypt_byte_start #else @@ -432,16 +432,16 @@ L_chacha_thumb2_crypt_word_loop: LDR r4, [lr] LDR r8, [r2] EOR r8, r8, r4 - SUBS r3, r3, #0x4 + SUBS r3, r3, #4 STR r8, [r1] #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_chacha_thumb2_crypt_done #else BEQ.N L_chacha_thumb2_crypt_done #endif - ADD lr, lr, #0x4 - ADD r2, r2, #0x4 - ADD r1, r1, #0x4 + ADD lr, lr, #4 + ADD r2, r2, #4 + ADD r1, r1, #4 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) B L_chacha_thumb2_crypt_word_loop #else @@ -452,7 +452,7 @@ L_chacha_thumb2_crypt_byte_start: L_chacha_thumb2_crypt_byte_loop: LDRB r8, [r2] EOR r8, r8, r4 - SUBS r3, r3, #0x1 + SUBS r3, r3, #1 STRB r8, [r1] #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_chacha_thumb2_crypt_done @@ -460,15 +460,15 @@ L_chacha_thumb2_crypt_byte_loop: BEQ.N L_chacha_thumb2_crypt_done #endif LSR r4, r4, #8 - ADD r2, r2, #0x1 - ADD r1, r1, #0x1 + ADD r2, r2, #1 + ADD r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) B L_chacha_thumb2_crypt_byte_loop #else B.N L_chacha_thumb2_crypt_byte_loop #endif L_chacha_thumb2_crypt_done: - ADD sp, sp, #0x34 + ADD sp, sp, #52 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 508 */ .size wc_chacha_crypt_bytes,.-wc_chacha_crypt_bytes @@ -479,7 +479,7 @@ L_chacha_thumb2_crypt_done: wc_chacha_use_over: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} L_chacha_thumb2_over_16byte_loop: - CMP r3, #0x10 + CMP r3, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BLT L_chacha_thumb2_over_word_loop #else @@ -498,7 +498,7 @@ L_chacha_thumb2_over_16byte_loop: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 - SUBS r3, r3, #0x10 + SUBS r3, r3, #16 STR r4, [r1] STR r5, [r1, #4] STR r6, [r1, #8] @@ -508,16 +508,16 @@ L_chacha_thumb2_over_16byte_loop: #else BEQ.N L_chacha_thumb2_over_done #endif - ADD r0, r0, #0x10 - ADD r2, r2, #0x10 - ADD r1, r1, #0x10 + ADD r0, r0, #16 + ADD r2, r2, #16 + ADD r1, r1, #16 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) B L_chacha_thumb2_over_16byte_loop #else B.N L_chacha_thumb2_over_16byte_loop #endif L_chacha_thumb2_over_word_loop: - CMP r3, #0x4 + CMP r3, #4 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BLT L_chacha_thumb2_over_byte_loop #else @@ -527,16 +527,16 @@ L_chacha_thumb2_over_word_loop: LDR r4, [r0] LDR r8, [r2] EOR r4, r4, r8 - SUBS r3, r3, #0x4 + SUBS r3, r3, #4 STR r4, [r1] #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_chacha_thumb2_over_done #else BEQ.N L_chacha_thumb2_over_done #endif - ADD r0, r0, #0x4 - ADD r2, r2, #0x4 - ADD r1, r1, #0x4 + ADD r0, r0, #4 + ADD r2, r2, #4 + ADD r1, r1, #4 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) B L_chacha_thumb2_over_word_loop #else @@ -547,16 +547,16 @@ L_chacha_thumb2_over_byte_loop: LDRB r4, [r0] LDRB r8, [r2] EOR r4, r4, r8 - SUBS r3, r3, #0x1 + SUBS r3, r3, #1 STRB r4, [r1] #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_chacha_thumb2_over_done #else BEQ.N L_chacha_thumb2_over_done #endif - ADD r0, r0, #0x1 - ADD r2, r2, #0x1 - ADD r1, r1, #0x1 + ADD r0, r0, #1 + ADD r2, r2, #1 + ADD r1, r1, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) B L_chacha_thumb2_over_byte_loop #else diff --git a/wolfcrypt/src/port/arm/thumb2-chacha-asm_c.c b/wolfcrypt/src/port/arm/thumb2-chacha-asm_c.c index cfaf6fa2ddb..72668873894 100644 --- a/wolfcrypt/src/port/arm/thumb2-chacha-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-chacha-asm_c.c @@ -65,7 +65,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setiv(word32* x, const byte* iv, #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "ADD r3, %[x], #0x34\n\t" + "ADD r3, %[x], #52\n\t" "LDR r4, [%[iv]]\n\t" "LDR r5, [%[iv], #4]\n\t" "LDR r6, [%[iv], #8]\n\t" @@ -113,7 +113,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x, const byte* key, __asm__ __volatile__ ( "MOV r7, %[L_chacha_thumb2_constants]\n\t" - "SUBS %[keySz], %[keySz], #0x10\n\t" + "SUBS %[keySz], %[keySz], #16\n\t" "ADD r7, r7, %[keySz]\n\t" /* Start state with constants */ "LDM r7, {r3, r4, r5, r6}\n\t" @@ -180,7 +180,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "SUB sp, sp, #0x34\n\t" + "SUB sp, sp, #52\n\t" "MOV lr, %[ctx]\n\t" "STRD %[ctx], %[c], [sp, #32]\n\t" "STRD %[m], %[len], [sp, #40]\n\t" @@ -198,7 +198,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, /* Load x[0]..x[12] into registers. */ "LDM lr, {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12}\n\t" /* 10x 2 full rounds to perform. */ - "MOV lr, #0xa\n\t" + "MOV lr, #10\n\t" "STR lr, [sp, #48]\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -322,7 +322,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "STR lr, [sp, #20]\n\t" /* Check if we have done enough rounds. */ "LDR lr, [sp, #48]\n\t" - "SUBS lr, lr, #0x1\n\t" + "SUBS lr, lr, #1\n\t" "STR lr, [sp, #48]\n\t" #if defined(__GNUC__) "BGT L_chacha_thumb2_crypt_loop_%=\n\t" @@ -359,7 +359,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "LDM lr!, {r10, r11}\n\t" "ADD r8, r8, r10\n\t" "ADD r9, r9, r11\n\t" - "ADD r10, r10, #0x1\n\t" + "ADD r10, r10, #1\n\t" "STM r12!, {r8, r9}\n\t" "STR r10, [lr, #-8]\n\t" "LDM r12, {r8, r9}\n\t" @@ -480,7 +480,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, #else "L_chacha_thumb2_crypt_16byte_loop_%=:\n\t" #endif - "CMP %[len], #0x10\n\t" + "CMP %[len], #16\n\t" #if defined(__GNUC__) "BLT L_chacha_thumb2_crypt_word_loop_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -498,7 +498,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "EOR r9, r9, r5\n\t" "EOR r10, r10, r6\n\t" "EOR r11, r11, r7\n\t" - "SUBS %[len], %[len], #0x10\n\t" + "SUBS %[len], %[len], #16\n\t" "STR r8, [%[c]]\n\t" "STR r9, [%[c], #4]\n\t" "STR r10, [%[c], #8]\n\t" @@ -510,8 +510,8 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, #else "BEQ.N L_chacha_thumb2_crypt_done_%=\n\t" #endif - "ADD %[m], %[m], #0x10\n\t" - "ADD %[c], %[c], #0x10\n\t" + "ADD %[m], %[m], #16\n\t" + "ADD %[c], %[c], #16\n\t" #if defined(__GNUC__) "B L_chacha_thumb2_crypt_16byte_loop_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -525,7 +525,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, #else "L_chacha_thumb2_crypt_word_loop_%=:\n\t" #endif - "CMP %[len], #0x4\n\t" + "CMP %[len], #4\n\t" #if defined(__GNUC__) "BLT L_chacha_thumb2_crypt_byte_start_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -537,7 +537,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "LDR r4, [lr]\n\t" "LDR r8, [%[m]]\n\t" "EOR r8, r8, r4\n\t" - "SUBS %[len], %[len], #0x4\n\t" + "SUBS %[len], %[len], #4\n\t" "STR r8, [%[c]]\n\t" #if defined(__GNUC__) "BEQ L_chacha_thumb2_crypt_done_%=\n\t" @@ -546,9 +546,9 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, #else "BEQ.N L_chacha_thumb2_crypt_done_%=\n\t" #endif - "ADD lr, lr, #0x4\n\t" - "ADD %[m], %[m], #0x4\n\t" - "ADD %[c], %[c], #0x4\n\t" + "ADD lr, lr, #4\n\t" + "ADD %[m], %[m], #4\n\t" + "ADD %[c], %[c], #4\n\t" #if defined(__GNUC__) "B L_chacha_thumb2_crypt_word_loop_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -571,7 +571,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, #endif "LDRB r8, [%[m]]\n\t" "EOR r8, r8, r4\n\t" - "SUBS %[len], %[len], #0x1\n\t" + "SUBS %[len], %[len], #1\n\t" "STRB r8, [%[c]]\n\t" #if defined(__GNUC__) "BEQ L_chacha_thumb2_crypt_done_%=\n\t" @@ -581,8 +581,8 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "BEQ.N L_chacha_thumb2_crypt_done_%=\n\t" #endif "LSR r4, r4, #8\n\t" - "ADD %[m], %[m], #0x1\n\t" - "ADD %[c], %[c], #0x1\n\t" + "ADD %[m], %[m], #1\n\t" + "ADD %[c], %[c], #1\n\t" #if defined(__GNUC__) "B L_chacha_thumb2_crypt_byte_loop_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -596,7 +596,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, #else "L_chacha_thumb2_crypt_done_%=:\n\t" #endif - "ADD sp, sp, #0x34\n\t" + "ADD sp, sp, #52\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [ctx] "+r" (ctx), [c] "+r" (c), [m] "+r" (m), [len] "+r" (len) : @@ -631,7 +631,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output, #else "L_chacha_thumb2_over_16byte_loop_%=:\n\t" #endif - "CMP %[len], #0x10\n\t" + "CMP %[len], #16\n\t" #if defined(__GNUC__) "BLT L_chacha_thumb2_over_word_loop_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -652,7 +652,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - "SUBS %[len], %[len], #0x10\n\t" + "SUBS %[len], %[len], #16\n\t" "STR r4, [%[output]]\n\t" "STR r5, [%[output], #4]\n\t" "STR r6, [%[output], #8]\n\t" @@ -664,9 +664,9 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output, #else "BEQ.N L_chacha_thumb2_over_done_%=\n\t" #endif - "ADD %[over], %[over], #0x10\n\t" - "ADD %[input], %[input], #0x10\n\t" - "ADD %[output], %[output], #0x10\n\t" + "ADD %[over], %[over], #16\n\t" + "ADD %[input], %[input], #16\n\t" + "ADD %[output], %[output], #16\n\t" #if defined(__GNUC__) "B L_chacha_thumb2_over_16byte_loop_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -680,7 +680,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output, #else "L_chacha_thumb2_over_word_loop_%=:\n\t" #endif - "CMP %[len], #0x4\n\t" + "CMP %[len], #4\n\t" #if defined(__GNUC__) "BLT L_chacha_thumb2_over_byte_loop_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -692,7 +692,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output, "LDR r4, [%[over]]\n\t" "LDR r8, [%[input]]\n\t" "EOR r4, r4, r8\n\t" - "SUBS %[len], %[len], #0x4\n\t" + "SUBS %[len], %[len], #4\n\t" "STR r4, [%[output]]\n\t" #if defined(__GNUC__) "BEQ L_chacha_thumb2_over_done_%=\n\t" @@ -701,9 +701,9 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output, #else "BEQ.N L_chacha_thumb2_over_done_%=\n\t" #endif - "ADD %[over], %[over], #0x4\n\t" - "ADD %[input], %[input], #0x4\n\t" - "ADD %[output], %[output], #0x4\n\t" + "ADD %[over], %[over], #4\n\t" + "ADD %[input], %[input], #4\n\t" + "ADD %[output], %[output], #4\n\t" #if defined(__GNUC__) "B L_chacha_thumb2_over_word_loop_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -721,7 +721,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output, "LDRB r4, [%[over]]\n\t" "LDRB r8, [%[input]]\n\t" "EOR r4, r4, r8\n\t" - "SUBS %[len], %[len], #0x1\n\t" + "SUBS %[len], %[len], #1\n\t" "STRB r4, [%[output]]\n\t" #if defined(__GNUC__) "BEQ L_chacha_thumb2_over_done_%=\n\t" @@ -730,9 +730,9 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output, #else "BEQ.N L_chacha_thumb2_over_done_%=\n\t" #endif - "ADD %[over], %[over], #0x1\n\t" - "ADD %[input], %[input], #0x1\n\t" - "ADD %[output], %[output], #0x1\n\t" + "ADD %[over], %[over], #1\n\t" + "ADD %[input], %[input], #1\n\t" + "ADD %[output], %[output], #1\n\t" #if defined(__GNUC__) "B L_chacha_thumb2_over_byte_loop_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) diff --git a/wolfcrypt/src/port/arm/thumb2-curve25519.S b/wolfcrypt/src/port/arm/thumb2-curve25519.S index dbc3f327e91..29d3399a0cf 100644 --- a/wolfcrypt/src/port/arm/thumb2-curve25519.S +++ b/wolfcrypt/src/port/arm/thumb2-curve25519.S @@ -54,9 +54,9 @@ fe_add_sub_op: LDRD r6, r7, [r3] /* Add */ ADDS r8, r4, r6 - MOV r12, #0x0 + MOV r12, #0 ADCS r9, r5, r7 - ADC r12, r12, #0x0 + ADC r12, r12, #0 STRD r8, r9, [r0] /* Sub */ SUBS r10, r4, r6 @@ -66,12 +66,12 @@ fe_add_sub_op: LDRD r6, r7, [r3, #8] /* Sub */ SBCS r10, r4, r6 - MOV lr, #0x0 + MOV lr, #0 SBCS r11, r5, r7 - ADC lr, lr, #0x0 + ADC lr, lr, #0 STRD r10, r11, [r1, #8] /* Add */ - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 ADCS r8, r4, r6 ADCS r9, r5, r7 STRD r8, r9, [r0, #8] @@ -79,12 +79,12 @@ fe_add_sub_op: LDRD r6, r7, [r3, #16] /* Add */ ADCS r8, r4, r6 - MOV r12, #0x0 + MOV r12, #0 ADCS r9, r5, r7 - ADC r12, r12, #0x0 + ADC r12, r12, #0 STRD r8, r9, [r0, #16] /* Sub */ - SUBS lr, lr, #0x1 + SUBS lr, lr, #1 SBCS r10, r4, r6 SBCS r11, r5, r7 STRD r10, r11, [r1, #16] @@ -94,46 +94,46 @@ fe_add_sub_op: SBCS r10, r4, r6 SBC r11, r5, r7 /* Add */ - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 ADCS r8, r4, r6 - MOV r12, #0x0 + MOV r12, #0 ADCS r9, r5, r7 - ADC r12, r12, #0x0 + ADC r12, r12, #0 /* Multiply -modulus by overflow */ LSL r3, r12, #1 - MOV r12, #0x13 + MOV r12, #19 ORR r3, r3, r9, LSR #31 MUL r12, r3, r12 /* Add -x*modulus (if overflow) */ LDRD r4, r5, [r0] LDRD r6, r7, [r0, #8] ADDS r4, r4, r12 - ADCS r5, r5, #0x0 - ADCS r6, r6, #0x0 - ADCS r7, r7, #0x0 + ADCS r5, r5, #0 + ADCS r6, r6, #0 + ADCS r7, r7, #0 STRD r4, r5, [r0] STRD r6, r7, [r0, #8] LDRD r4, r5, [r0, #16] - ADCS r4, r4, #0x0 - ADCS r5, r5, #0x0 + ADCS r4, r4, #0 + ADCS r5, r5, #0 STRD r4, r5, [r0, #16] BFC r9, #31, #1 - ADCS r8, r8, #0x0 - ADC r9, r9, #0x0 + ADCS r8, r8, #0 + ADC r9, r9, #0 STRD r8, r9, [r0, #24] /* Add -modulus on underflow */ - MOV lr, #0x13 + MOV lr, #19 AND lr, lr, r11, ASR #31 LDM r1, {r4, r5, r6, r7, r8, r9} SUBS r4, r4, lr - SBCS r5, r5, #0x0 - SBCS r6, r6, #0x0 - SBCS r7, r7, #0x0 - SBCS r8, r8, #0x0 - SBCS r9, r9, #0x0 + SBCS r5, r5, #0 + SBCS r6, r6, #0 + SBCS r7, r7, #0 + SBCS r8, r8, #0 + SBCS r9, r9, #0 BFC r11, #31, #1 - SBCS r10, r10, #0x0 - SBC r11, r11, #0x0 + SBCS r10, r10, #0 + SBC r11, r11, #0 STM r1, {r4, r5, r6, r7, r8, r9, r10, r11} /* Done Add-Sub */ POP {pc} @@ -157,17 +157,17 @@ fe_sub_op: SBCS r11, r3, r11 SBCS r12, r4, r12 SBC lr, r5, lr - MOV r2, #0x13 + MOV r2, #19 AND r2, r2, lr, ASR #31 SUBS r6, r6, r2 - SBCS r7, r7, #0x0 - SBCS r8, r8, #0x0 - SBCS r9, r9, #0x0 - SBCS r10, r10, #0x0 - SBCS r11, r11, #0x0 + SBCS r7, r7, #0 + SBCS r8, r8, #0 + SBCS r9, r9, #0 + SBCS r10, r10, #0 + SBCS r11, r11, #0 BFC lr, #31, #1 - SBCS r12, r12, #0x0 - SBC lr, lr, #0x0 + SBCS r12, r12, #0 + SBC lr, lr, #0 STM r0, {r6, r7, r8, r9, r10, r11, r12, lr} /* Done Sub */ POP {pc} @@ -201,17 +201,17 @@ fe_add_op: ADCS r11, r3, r11 ADCS r12, r4, r12 ADC lr, r5, lr - MOV r2, #0x13 + MOV r2, #19 AND r2, r2, lr, ASR #31 ADDS r6, r6, r2 - ADCS r7, r7, #0x0 - ADCS r8, r8, #0x0 - ADCS r9, r9, #0x0 - ADCS r10, r10, #0x0 - ADCS r11, r11, #0x0 + ADCS r7, r7, #0 + ADCS r8, r8, #0 + ADCS r9, r9, #0 + ADCS r10, r10, #0 + ADCS r11, r11, #0 BFC lr, #31, #1 - ADCS r12, r12, #0x0 - ADC lr, lr, #0x0 + ADCS r12, r12, #0 + ADC lr, lr, #0 STM r0, {r6, r7, r8, r9, r10, r11, r12, lr} /* Done Add */ POP {pc} @@ -259,26 +259,26 @@ fe_frombytes: .globl fe_tobytes .type fe_tobytes, %function fe_tobytes: - PUSH {r4, r5, r6, r7, r8, r9, r10, lr} + PUSH {r4, r5, r6, r7, r8, r9, lr} LDM r1, {r2, r3, r4, r5, r6, r7, r8, r9} - ADDS r10, r2, #0x13 - ADCS r10, r3, #0x0 - ADCS r10, r4, #0x0 - ADCS r10, r5, #0x0 - ADCS r10, r6, #0x0 - ADCS r10, r7, #0x0 - ADCS r10, r8, #0x0 - ADC r10, r9, #0x0 - ASR r10, r10, #31 - AND r10, r10, #0x13 - ADDS r2, r2, r10 - ADCS r3, r3, #0x0 - ADCS r4, r4, #0x0 - ADCS r5, r5, #0x0 - ADCS r6, r6, #0x0 - ADCS r7, r7, #0x0 - ADCS r8, r8, #0x0 - ADC r9, r9, #0x0 + ADDS r12, r2, #19 + ADCS r12, r3, #0 + ADCS r12, r4, #0 + ADCS r12, r5, #0 + ADCS r12, r6, #0 + ADCS r12, r7, #0 + ADCS r12, r8, #0 + ADC r12, r9, #0 + ASR r12, r12, #31 + AND r12, r12, #19 + ADDS r2, r2, r12 + ADCS r3, r3, #0 + ADCS r4, r4, #0 + ADCS r5, r5, #0 + ADCS r6, r6, #0 + ADCS r7, r7, #0 + ADCS r8, r8, #0 + ADC r9, r9, #0 BFC r9, #31, #1 STR r2, [r0] STR r3, [r0, #4] @@ -288,8 +288,8 @@ fe_tobytes: STR r7, [r0, #20] STR r8, [r0, #24] STR r9, [r0, #28] - POP {r4, r5, r6, r7, r8, r9, r10, pc} - /* Cycle Count = 62 */ + POP {r4, r5, r6, r7, r8, r9, pc} + /* Cycle Count = 60 */ .size fe_tobytes,.-fe_tobytes .text .align 4 @@ -298,14 +298,14 @@ fe_tobytes: fe_1: PUSH {r4, r5, r6, r7, r8, r9, lr} /* Set one */ - MOV r2, #0x1 - MOV r3, #0x0 - MOV r4, #0x0 - MOV r5, #0x0 - MOV r6, #0x0 - MOV r7, #0x0 - MOV r8, #0x0 - MOV r9, #0x0 + MOV r2, #1 + MOV r3, #0 + MOV r4, #0 + MOV r5, #0 + MOV r6, #0 + MOV r7, #0 + MOV r8, #0 + MOV r9, #0 STM r0, {r2, r3, r4, r5, r6, r7, r8, r9} POP {r4, r5, r6, r7, r8, r9, pc} /* Cycle Count = 33 */ @@ -317,14 +317,14 @@ fe_1: fe_0: PUSH {r4, r5, r6, r7, r8, r9, lr} /* Set zero */ - MOV r2, #0x0 - MOV r3, #0x0 - MOV r4, #0x0 - MOV r5, #0x0 - MOV r6, #0x0 - MOV r7, #0x0 - MOV r8, #0x0 - MOV r9, #0x0 + MOV r2, #0 + MOV r3, #0 + MOV r4, #0 + MOV r5, #0 + MOV r6, #0 + MOV r7, #0 + MOV r8, #0 + MOV r9, #0 STM r0, {r2, r3, r4, r5, r6, r7, r8, r9} POP {r4, r5, r6, r7, r8, r9, pc} /* Cycle Count = 33 */ @@ -352,50 +352,50 @@ fe_copy: .globl fe_neg .type fe_neg, %function fe_neg: - PUSH {r4, r5, r6, r7, lr} - MVN r7, #0x0 - MVN r6, #0x12 + PUSH {r4, r5, lr} + MVN lr, #0 + MVN r12, #18 LDM r1!, {r2, r3, r4, r5} - SUBS r2, r6, r2 - SBCS r3, r7, r3 - SBCS r4, r7, r4 - SBCS r5, r7, r5 + SUBS r2, r12, r2 + SBCS r3, lr, r3 + SBCS r4, lr, r4 + SBCS r5, lr, r5 STM r0!, {r2, r3, r4, r5} - MVN r6, #0x80000000 + MVN r12, #0x80000000 LDM r1!, {r2, r3, r4, r5} - SBCS r2, r7, r2 - SBCS r3, r7, r3 - SBCS r4, r7, r4 - SBC r5, r6, r5 + SBCS r2, lr, r2 + SBCS r3, lr, r3 + SBCS r4, lr, r4 + SBC r5, r12, r5 STM r0!, {r2, r3, r4, r5} - POP {r4, r5, r6, r7, pc} - /* Cycle Count = 43 */ + POP {r4, r5, pc} + /* Cycle Count = 39 */ .size fe_neg,.-fe_neg .text .align 4 .globl fe_isnonzero .type fe_isnonzero, %function fe_isnonzero: - PUSH {r4, r5, r6, r7, r8, r9, r10, lr} + PUSH {r4, r5, r6, r7, r8, r9, lr} LDM r0, {r2, r3, r4, r5, r6, r7, r8, r9} - ADDS r1, r2, #0x13 - ADCS r1, r3, #0x0 - ADCS r1, r4, #0x0 - ADCS r1, r5, #0x0 - ADCS r1, r6, #0x0 - ADCS r1, r7, #0x0 - ADCS r1, r8, #0x0 - ADC r1, r9, #0x0 + ADDS r1, r2, #19 + ADCS r1, r3, #0 + ADCS r1, r4, #0 + ADCS r1, r5, #0 + ADCS r1, r6, #0 + ADCS r1, r7, #0 + ADCS r1, r8, #0 + ADC r1, r9, #0 ASR r1, r1, #31 - AND r1, r1, #0x13 + AND r1, r1, #19 ADDS r2, r2, r1 - ADCS r3, r3, #0x0 - ADCS r4, r4, #0x0 - ADCS r5, r5, #0x0 - ADCS r6, r6, #0x0 - ADCS r7, r7, #0x0 - ADCS r8, r8, #0x0 - ADC r9, r9, #0x0 + ADCS r3, r3, #0 + ADCS r4, r4, #0 + ADCS r5, r5, #0 + ADCS r6, r6, #0 + ADCS r7, r7, #0 + ADCS r8, r8, #0 + ADC r9, r9, #0 BFC r9, #31, #1 ORR r2, r2, r3 ORR r4, r4, r5 @@ -404,8 +404,8 @@ fe_isnonzero: ORR r4, r4, r6 ORR r2, r2, r8 ORR r0, r2, r4 - POP {r4, r5, r6, r7, r8, r9, r10, pc} - /* Cycle Count = 53 */ + POP {r4, r5, r6, r7, r8, r9, pc} + /* Cycle Count = 51 */ .size fe_isnonzero,.-fe_isnonzero .text .align 4 @@ -414,21 +414,20 @@ fe_isnonzero: fe_isnegative: PUSH {r4, r5, lr} LDM r0!, {r2, r3, r4, r5} - ADDS r1, r2, #0x13 - ADCS r1, r3, #0x0 - ADCS r1, r4, #0x0 - ADCS r1, r5, #0x0 + AND r12, r2, #1 + ADDS r1, r2, #19 + ADCS r1, r3, #0 + ADCS r1, r4, #0 + ADCS r1, r5, #0 LDM r0, {r2, r3, r4, r5} - ADCS r1, r2, #0x0 - ADCS r1, r3, #0x0 - ADCS r1, r4, #0x0 - LDR r2, [r0, #-16] - ADC r1, r5, #0x0 - AND r0, r2, #0x1 + ADCS r1, r2, #0 + ADCS r1, r3, #0 + ADCS r1, r4, #0 + ADC r1, r5, #0 LSR r1, r1, #31 - EOR r0, r0, r1 + EOR r0, r12, r1 POP {r4, r5, pc} - /* Cycle Count = 31 */ + /* Cycle Count = 29 */ .size fe_isnegative,.-fe_isnegative #if defined(HAVE_ED25519_MAKE_KEY) || defined(HAVE_ED25519_SIGN) || defined(WOLFSSL_CURVE25519_USE_ED25519) #ifndef WC_NO_CACHE_RESISTANT @@ -442,12 +441,12 @@ fe_cmov_table: SBFX r3, r2, #7, #1 EOR r12, r2, r3 SUB r12, r12, r3 - MOV r4, #0x1 - MOV r5, #0x0 - MOV r6, #0x1 - MOV r7, #0x0 - MOV r8, #0x0 - MOV r9, #0x0 + MOV r4, #1 + MOV r5, #0 + MOV r6, #1 + MOV r7, #0 + MOV r8, #0 + MOV r9, #0 MOV r3, #0x80000000 ROR r3, r3, #31 ROR r3, r3, r12 @@ -656,8 +655,8 @@ fe_cmov_table: EOR r8, r8, r10 EOR r9, r9, r11 SUB r1, r1, #0x2a0 - MVN r10, #0x12 - MVN r11, #0x0 + MVN r10, #18 + MVN r11, #0 SUBS r10, r10, r8 SBCS r11, r11, r9 SBC lr, lr, lr @@ -682,12 +681,12 @@ fe_cmov_table: SBFX r3, r2, #7, #1 EOR r12, r2, r3 SUB r12, r12, r3 - MOV r4, #0x0 - MOV r5, #0x0 - MOV r6, #0x0 - MOV r7, #0x0 - MOV r8, #0x0 - MOV r9, #0x0 + MOV r4, #0 + MOV r5, #0 + MOV r6, #0 + MOV r7, #0 + MOV r8, #0 + MOV r9, #0 MOV r3, #0x80000000 ROR r3, r3, #31 ROR r3, r3, r12 @@ -896,9 +895,9 @@ fe_cmov_table: EOR r8, r8, r10 EOR r9, r9, r11 SUB r1, r1, #0x2a0 - MVN r10, #0x0 - MVN r11, #0x0 - RSBS lr, lr, #0x0 + MVN r10, #0 + MVN r11, #0 + RSBS lr, lr, #0 SBCS r10, r10, r8 SBCS r11, r11, r9 SBC lr, lr, lr @@ -923,12 +922,12 @@ fe_cmov_table: SBFX r3, r2, #7, #1 EOR r12, r2, r3 SUB r12, r12, r3 - MOV r4, #0x0 - MOV r5, #0x0 - MOV r6, #0x0 - MOV r7, #0x0 - MOV r8, #0x0 - MOV r9, #0x0 + MOV r4, #0 + MOV r5, #0 + MOV r6, #0 + MOV r7, #0 + MOV r8, #0 + MOV r9, #0 MOV r3, #0x80000000 ROR r3, r3, #31 ROR r3, r3, r12 @@ -1137,9 +1136,9 @@ fe_cmov_table: EOR r8, r8, r10 EOR r9, r9, r11 SUB r1, r1, #0x2a0 - MVN r10, #0x0 - MVN r11, #0x0 - RSBS lr, lr, #0x0 + MVN r10, #0 + MVN r11, #0 + RSBS lr, lr, #0 SBCS r10, r10, r8 SBCS r11, r11, r9 SBC lr, lr, lr @@ -1164,12 +1163,12 @@ fe_cmov_table: SBFX r3, r2, #7, #1 EOR r12, r2, r3 SUB r12, r12, r3 - MOV r4, #0x0 - MOV r5, #0x0 - MOV r6, #0x0 - MOV r7, #0x0 - MOV r8, #0x0 - MOV r9, #0x0 + MOV r4, #0 + MOV r5, #0 + MOV r6, #0 + MOV r7, #0 + MOV r8, #0 + MOV r9, #0 MOV r3, #0x80000000 ROR r3, r3, #31 ROR r3, r3, r12 @@ -1378,9 +1377,9 @@ fe_cmov_table: EOR r8, r8, r10 EOR r9, r9, r11 SUB r1, r1, #0x2a0 - MVN r10, #0x0 + MVN r10, #0 MVN r11, #0x80000000 - RSBS lr, lr, #0x0 + RSBS lr, lr, #0 SBCS r10, r10, r8 SBC r11, r11, r9 ASR r12, r2, #31 @@ -1434,7 +1433,7 @@ fe_cmov_table: AND r11, r11, lr MVN r12, lr SUB r4, r4, r12 - MOV r12, #0x20 + MOV r12, #32 AND r12, r12, r3 ADD r0, r0, r12 STM r0, {r4, r5, r6, r7, r8, r9, r10, r11} @@ -1450,14 +1449,14 @@ fe_cmov_table: AND r11, r11, lr MVN r12, lr SUB r4, r4, r12 - MOV r12, #0x20 + MOV r12, #32 BIC r12, r12, r3 ADD r0, r0, r12 STM r0, {r4, r5, r6, r7, r8, r9, r10, r11} SUB r0, r0, r12 ADD r0, r0, #0x40 LDM r1!, {r4, r5, r6, r7} - MVN r12, #0x12 + MVN r12, #18 SUBS r8, r12, r4 SBCS r9, r3, r5 SBCS r10, r3, r6 @@ -1516,9 +1515,9 @@ fe_cmov_table: .type fe_mul_op, %function fe_mul_op: PUSH {lr} - SUB sp, sp, #0x28 + SUB sp, sp, #40 STR r0, [sp, #36] - MOV r0, #0x0 + MOV r0, #0 LDR r12, [r1] /* A[0] * B[0] */ LDR lr, [r2] @@ -1540,351 +1539,351 @@ fe_mul_op: ADDS r5, r5, r11 /* A[0] * B[3] */ LDR lr, [r2, #12] - ADCS r6, r6, #0x0 - ADC r11, r0, #0x0 + ADCS r6, r6, #0 + ADC r11, r0, #0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 /* A[0] * B[5] */ LDR lr, [r2, #20] - ADCS r8, r8, #0x0 - ADC r11, r0, #0x0 + ADCS r8, r8, #0 + ADC r11, r0, #0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 /* A[0] * B[7] */ LDR lr, [r2, #28] - ADCS r10, r10, #0x0 - ADC r3, r0, #0x0 + ADCS r10, r10, #0 + ADC r3, r0, #0 UMLAL r10, r3, r12, lr /* A[1] * B[0] */ LDR r12, [r1, #4] LDR lr, [r2] - MOV r11, #0x0 + MOV r11, #0 UMLAL r4, r11, r12, lr STR r4, [sp, #4] ADDS r5, r5, r11 /* A[1] * B[1] */ LDR lr, [r2, #4] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 /* A[1] * B[2] */ LDR lr, [r2, #8] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 /* A[1] * B[3] */ LDR lr, [r2, #12] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r7, r11, r12, lr ADDS r8, r8, r11 /* A[1] * B[4] */ LDR lr, [r2, #16] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 /* A[1] * B[5] */ LDR lr, [r2, #20] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 /* A[1] * B[6] */ LDR lr, [r2, #24] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 /* A[1] * B[7] */ LDR lr, [r2, #28] - ADC r4, r0, #0x0 + ADC r4, r0, #0 UMLAL r3, r4, r12, lr /* A[2] * B[0] */ LDR r12, [r1, #8] LDR lr, [r2] - MOV r11, #0x0 + MOV r11, #0 UMLAL r5, r11, r12, lr STR r5, [sp, #8] ADDS r6, r6, r11 /* A[2] * B[1] */ LDR lr, [r2, #4] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 /* A[2] * B[2] */ LDR lr, [r2, #8] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r7, r11, r12, lr ADDS r8, r8, r11 /* A[2] * B[3] */ LDR lr, [r2, #12] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 /* A[2] * B[4] */ LDR lr, [r2, #16] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 /* A[2] * B[5] */ LDR lr, [r2, #20] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 /* A[2] * B[6] */ LDR lr, [r2, #24] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 /* A[2] * B[7] */ LDR lr, [r2, #28] - ADC r5, r0, #0x0 + ADC r5, r0, #0 UMLAL r4, r5, r12, lr /* A[3] * B[0] */ LDR r12, [r1, #12] LDR lr, [r2] - MOV r11, #0x0 + MOV r11, #0 UMLAL r6, r11, r12, lr STR r6, [sp, #12] ADDS r7, r7, r11 /* A[3] * B[1] */ LDR lr, [r2, #4] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r7, r11, r12, lr ADDS r8, r8, r11 /* A[3] * B[2] */ LDR lr, [r2, #8] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 /* A[3] * B[3] */ LDR lr, [r2, #12] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 /* A[3] * B[4] */ LDR lr, [r2, #16] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 /* A[3] * B[5] */ LDR lr, [r2, #20] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 /* A[3] * B[6] */ LDR lr, [r2, #24] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 /* A[3] * B[7] */ LDR lr, [r2, #28] - ADC r6, r0, #0x0 + ADC r6, r0, #0 UMLAL r5, r6, r12, lr /* A[4] * B[0] */ LDR r12, [r1, #16] LDR lr, [r2] - MOV r11, #0x0 + MOV r11, #0 UMLAL r7, r11, r12, lr STR r7, [sp, #16] ADDS r8, r8, r11 /* A[4] * B[1] */ LDR lr, [r2, #4] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 /* A[4] * B[2] */ LDR lr, [r2, #8] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 /* A[4] * B[3] */ LDR lr, [r2, #12] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 /* A[4] * B[4] */ LDR lr, [r2, #16] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 /* A[4] * B[5] */ LDR lr, [r2, #20] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 /* A[4] * B[6] */ LDR lr, [r2, #24] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 /* A[4] * B[7] */ LDR lr, [r2, #28] - ADC r7, r0, #0x0 + ADC r7, r0, #0 UMLAL r6, r7, r12, lr /* A[5] * B[0] */ LDR r12, [r1, #20] LDR lr, [r2] - MOV r11, #0x0 + MOV r11, #0 UMLAL r8, r11, r12, lr STR r8, [sp, #20] ADDS r9, r9, r11 /* A[5] * B[1] */ LDR lr, [r2, #4] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 /* A[5] * B[2] */ LDR lr, [r2, #8] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 /* A[5] * B[3] */ LDR lr, [r2, #12] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 /* A[5] * B[4] */ LDR lr, [r2, #16] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 /* A[5] * B[5] */ LDR lr, [r2, #20] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 /* A[5] * B[6] */ LDR lr, [r2, #24] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 /* A[5] * B[7] */ LDR lr, [r2, #28] - ADC r8, r0, #0x0 + ADC r8, r0, #0 UMLAL r7, r8, r12, lr /* A[6] * B[0] */ LDR r12, [r1, #24] LDR lr, [r2] - MOV r11, #0x0 + MOV r11, #0 UMLAL r9, r11, r12, lr STR r9, [sp, #24] ADDS r10, r10, r11 /* A[6] * B[1] */ LDR lr, [r2, #4] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 /* A[6] * B[2] */ LDR lr, [r2, #8] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 /* A[6] * B[3] */ LDR lr, [r2, #12] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 /* A[6] * B[4] */ LDR lr, [r2, #16] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 /* A[6] * B[5] */ LDR lr, [r2, #20] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 /* A[6] * B[6] */ LDR lr, [r2, #24] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r7, r11, r12, lr ADDS r8, r8, r11 /* A[6] * B[7] */ LDR lr, [r2, #28] - ADC r9, r0, #0x0 + ADC r9, r0, #0 UMLAL r8, r9, r12, lr /* A[7] * B[0] */ LDR r12, [r1, #28] LDR lr, [r2] - MOV r11, #0x0 + MOV r11, #0 UMLAL r10, r11, r12, lr STR r10, [sp, #28] ADDS r3, r3, r11 /* A[7] * B[1] */ LDR lr, [r2, #4] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 /* A[7] * B[2] */ LDR lr, [r2, #8] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 /* A[7] * B[3] */ LDR lr, [r2, #12] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 /* A[7] * B[4] */ LDR lr, [r2, #16] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 /* A[7] * B[5] */ LDR lr, [r2, #20] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r7, r11, r12, lr ADDS r8, r8, r11 /* A[7] * B[6] */ LDR lr, [r2, #24] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 /* A[7] * B[7] */ LDR lr, [r2, #28] - ADC r10, r0, #0x0 + ADC r10, r0, #0 UMLAL r9, r10, r12, lr /* Reduce */ LDR r2, [sp, #28] MOV lr, sp - MOV r12, #0x26 + MOV r12, #38 UMULL r10, r11, r10, r12 ADDS r10, r10, r2 - ADC r11, r11, #0x0 - MOV r12, #0x13 + ADC r11, r11, #0 + MOV r12, #19 LSL r11, r11, #1 ORR r11, r11, r10, LSR #31 MUL r11, r11, r12 LDM lr!, {r1, r2} - MOV r12, #0x26 + MOV r12, #38 ADDS r1, r1, r11 - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r1, r11, r3, r12 ADDS r2, r2, r11 - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r2, r11, r4, r12 LDM lr!, {r3, r4} ADDS r3, r3, r11 - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r3, r11, r5, r12 ADDS r4, r4, r11 - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r4, r11, r6, r12 LDM lr!, {r5, r6} ADDS r5, r5, r11 - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r5, r11, r7, r12 ADDS r6, r6, r11 - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r6, r11, r8, r12 LDM lr!, {r7, r8} ADDS r7, r7, r11 - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r7, r11, r9, r12 BFC r10, #31, #1 ADDS r8, r10, r11 /* Store */ LDR r0, [sp, #36] STM r0, {r1, r2, r3, r4, r5, r6, r7, r8} - ADD sp, sp, #0x28 + ADD sp, sp, #40 POP {pc} /* Cycle Count = 406 */ .size fe_mul_op,.-fe_mul_op @@ -1895,7 +1894,7 @@ fe_mul_op: .type fe_mul_op, %function fe_mul_op: PUSH {lr} - SUB sp, sp, #0x2c + SUB sp, sp, #44 STRD r0, r1, [sp, #36] MOV lr, r2 LDM r1, {r0, r1, r2, r3} @@ -1920,54 +1919,54 @@ fe_mul_op: UMAAL r9, r10, r2, r4 UMAAL r10, r11, r3, r4 LDM lr, {r4, r5, r6, r7} - MOV r12, #0x0 + MOV r12, #0 UMLAL r8, r12, r0, r4 UMAAL r9, r12, r1, r4 UMAAL r10, r12, r2, r4 UMAAL r11, r12, r3, r4 - MOV r4, #0x0 + MOV r4, #0 UMLAL r9, r4, r0, r5 UMAAL r10, r4, r1, r5 UMAAL r11, r4, r2, r5 UMAAL r12, r4, r3, r5 - MOV r5, #0x0 + MOV r5, #0 UMLAL r10, r5, r0, r6 UMAAL r11, r5, r1, r6 UMAAL r12, r5, r2, r6 UMAAL r4, r5, r3, r6 - MOV r6, #0x0 + MOV r6, #0 UMLAL r11, r6, r0, r7 LDR r0, [sp, #40] UMAAL r12, r6, r1, r7 - ADD r0, r0, #0x10 + ADD r0, r0, #16 UMAAL r4, r6, r2, r7 - SUB lr, lr, #0x10 + SUB lr, lr, #16 UMAAL r5, r6, r3, r7 LDM r0, {r0, r1, r2, r3} STR r6, [sp, #32] LDM lr!, {r6} - MOV r7, #0x0 + MOV r7, #0 UMLAL r8, r7, r0, r6 UMAAL r9, r7, r1, r6 STR r8, [sp, #16] UMAAL r10, r7, r2, r6 UMAAL r11, r7, r3, r6 LDM lr!, {r6} - MOV r8, #0x0 + MOV r8, #0 UMLAL r9, r8, r0, r6 UMAAL r10, r8, r1, r6 STR r9, [sp, #20] UMAAL r11, r8, r2, r6 UMAAL r12, r8, r3, r6 LDM lr!, {r6} - MOV r9, #0x0 + MOV r9, #0 UMLAL r10, r9, r0, r6 UMAAL r11, r9, r1, r6 STR r10, [sp, #24] UMAAL r12, r9, r2, r6 UMAAL r4, r9, r3, r6 LDM lr!, {r6} - MOV r10, #0x0 + MOV r10, #0 UMLAL r11, r10, r0, r6 UMAAL r12, r10, r1, r6 STR r11, [sp, #28] @@ -1995,14 +1994,14 @@ fe_mul_op: UMAAL r9, r10, r3, lr /* Reduce */ LDR r0, [sp, #28] - MOV lr, #0x25 + MOV lr, #37 UMAAL r10, r0, r10, lr - MOV lr, #0x13 + MOV lr, #19 LSL r0, r0, #1 ORR r0, r0, r10, LSR #31 MUL r11, r0, lr POP {r0, r1, r2} - MOV lr, #0x26 + MOV lr, #38 UMAAL r0, r11, r12, lr UMAAL r1, r11, r4, lr UMAAL r2, r11, r5, lr @@ -2017,7 +2016,7 @@ fe_mul_op: LDR lr, [sp, #8] /* Store */ STM lr, {r0, r1, r2, r3, r4, r5, r6, r7} - ADD sp, sp, #0x10 + ADD sp, sp, #16 POP {pc} /* Cycle Count = 239 */ .size fe_mul_op,.-fe_mul_op @@ -2042,7 +2041,7 @@ fe_sq_op: SUB sp, sp, #0x44 STR r0, [sp, #64] /* Square */ - MOV r0, #0x0 + MOV r0, #0 LDR r12, [r1] /* A[0] * A[1] */ LDR lr, [r1, #4] @@ -2058,137 +2057,137 @@ fe_sq_op: UMULL r10, r3, r12, lr /* A[0] * A[2] */ LDR lr, [r1, #8] - MOV r11, #0x0 + MOV r11, #0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 /* A[0] * A[4] */ LDR lr, [r1, #16] - ADCS r7, r7, #0x0 - ADC r11, r0, #0x0 + ADCS r7, r7, #0 + ADC r11, r0, #0 UMLAL r7, r11, r12, lr ADDS r8, r8, r11 /* A[0] * A[6] */ LDR lr, [r1, #24] - ADCS r9, r9, #0x0 - ADC r11, r0, #0x0 + ADCS r9, r9, #0 + ADC r11, r0, #0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 - ADCS r3, r3, #0x0 + ADCS r3, r3, #0 STR r4, [sp, #4] STR r5, [sp, #8] /* A[1] * A[2] */ LDR r12, [r1, #4] LDR lr, [r1, #8] - MOV r11, #0x0 + MOV r11, #0 UMLAL r6, r11, r12, lr STR r6, [sp, #12] ADDS r7, r7, r11 /* A[1] * A[3] */ LDR lr, [r1, #12] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r7, r11, r12, lr STR r7, [sp, #16] ADDS r8, r8, r11 /* A[1] * A[4] */ LDR lr, [r1, #16] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 /* A[1] * A[5] */ LDR lr, [r1, #20] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 /* A[1] * A[6] */ LDR lr, [r1, #24] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 /* A[1] * A[7] */ LDR lr, [r1, #28] - ADC r4, r0, #0x0 + ADC r4, r0, #0 UMLAL r3, r4, r12, lr /* A[2] * A[3] */ LDR r12, [r1, #8] LDR lr, [r1, #12] - MOV r11, #0x0 + MOV r11, #0 UMLAL r8, r11, r12, lr STR r8, [sp, #20] ADDS r9, r9, r11 /* A[2] * A[4] */ LDR lr, [r1, #16] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r9, r11, r12, lr STR r9, [sp, #24] ADDS r10, r10, r11 /* A[2] * A[5] */ LDR lr, [r1, #20] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 /* A[2] * A[6] */ LDR lr, [r1, #24] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 /* A[2] * A[7] */ LDR lr, [r1, #28] - ADC r5, r0, #0x0 + ADC r5, r0, #0 UMLAL r4, r5, r12, lr /* A[3] * A[4] */ LDR r12, [r1, #12] LDR lr, [r1, #16] - MOV r11, #0x0 + MOV r11, #0 UMLAL r10, r11, r12, lr STR r10, [sp, #28] ADDS r3, r3, r11 /* A[3] * A[5] */ LDR lr, [r1, #20] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 /* A[3] * A[6] */ LDR lr, [r1, #24] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 /* A[3] * A[7] */ LDR lr, [r1, #28] - ADC r6, r0, #0x0 + ADC r6, r0, #0 UMLAL r5, r6, r12, lr /* A[4] * A[5] */ LDR r12, [r1, #16] LDR lr, [r1, #20] - MOV r11, #0x0 + MOV r11, #0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 /* A[4] * A[6] */ LDR lr, [r1, #24] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 /* A[4] * A[7] */ LDR lr, [r1, #28] - ADC r7, r0, #0x0 + ADC r7, r0, #0 UMLAL r6, r7, r12, lr /* A[5] * A[6] */ LDR r12, [r1, #20] LDR lr, [r1, #24] - MOV r11, #0x0 + MOV r11, #0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 /* A[5] * A[7] */ LDR lr, [r1, #28] - ADC r8, r0, #0x0 + ADC r8, r0, #0 UMLAL r7, r8, r12, lr /* A[6] * A[7] */ LDR r12, [r1, #24] LDR lr, [r1, #28] - MOV r9, #0x0 + MOV r9, #0 UMLAL r8, r9, r12, lr - ADD lr, sp, #0x20 + ADD lr, sp, #32 STM lr, {r3, r4, r5, r6, r7, r8, r9} - ADD lr, sp, #0x4 + ADD lr, sp, #4 LDM lr, {r4, r5, r6, r7, r8, r9, r10} ADDS r4, r4, r4 ADCS r5, r5, r5 @@ -2206,9 +2205,9 @@ fe_sq_op: ADCS r7, r7, r7 ADCS r8, r8, r8 ADCS r9, r9, r9 - ADC r10, r0, #0x0 + ADC r10, r0, #0 STM lr, {r3, r4, r5, r6, r7, r8, r9, r10} - ADD lr, sp, #0x4 + ADD lr, sp, #4 LDM lr, {r4, r5, r6, r7, r8, r9, r10} MOV lr, sp /* A[0] * A[0] */ @@ -2217,83 +2216,83 @@ fe_sq_op: ADDS r4, r4, r11 /* A[1] * A[1] */ LDR r12, [r1, #4] - ADCS r5, r5, #0x0 - ADC r11, r0, #0x0 + ADCS r5, r5, #0 + ADC r11, r0, #0 UMLAL r5, r11, r12, r12 ADDS r6, r6, r11 /* A[2] * A[2] */ LDR r12, [r1, #8] - ADCS r7, r7, #0x0 - ADC r11, r0, #0x0 + ADCS r7, r7, #0 + ADC r11, r0, #0 UMLAL r7, r11, r12, r12 ADDS r8, r8, r11 /* A[3] * A[3] */ LDR r12, [r1, #12] - ADCS r9, r9, #0x0 - ADC r11, r0, #0x0 + ADCS r9, r9, #0 + ADC r11, r0, #0 UMLAL r9, r11, r12, r12 ADDS r10, r10, r11 STM lr!, {r3, r4, r5, r6, r7, r8, r9, r10} LDM lr, {r3, r4, r5, r6, r7, r8, r9, r10} /* A[4] * A[4] */ LDR r12, [r1, #16] - ADCS r3, r3, #0x0 - ADC r11, r0, #0x0 + ADCS r3, r3, #0 + ADC r11, r0, #0 UMLAL r3, r11, r12, r12 ADDS r4, r4, r11 /* A[5] * A[5] */ LDR r12, [r1, #20] - ADCS r5, r5, #0x0 - ADC r11, r0, #0x0 + ADCS r5, r5, #0 + ADC r11, r0, #0 UMLAL r5, r11, r12, r12 ADDS r6, r6, r11 /* A[6] * A[6] */ LDR r12, [r1, #24] - ADCS r7, r7, #0x0 - ADC r11, r0, #0x0 + ADCS r7, r7, #0 + ADC r11, r0, #0 UMLAL r7, r11, r12, r12 ADDS r8, r8, r11 /* A[7] * A[7] */ LDR r12, [r1, #28] - ADCS r9, r9, #0x0 - ADC r10, r10, #0x0 + ADCS r9, r9, #0 + ADC r10, r10, #0 UMLAL r9, r10, r12, r12 /* Reduce */ LDR r2, [sp, #28] MOV lr, sp - MOV r12, #0x26 + MOV r12, #38 UMULL r10, r11, r10, r12 ADDS r10, r10, r2 - ADC r11, r11, #0x0 - MOV r12, #0x13 + ADC r11, r11, #0 + MOV r12, #19 LSL r11, r11, #1 ORR r11, r11, r10, LSR #31 MUL r11, r11, r12 LDM lr!, {r1, r2} - MOV r12, #0x26 + MOV r12, #38 ADDS r1, r1, r11 - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r1, r11, r3, r12 ADDS r2, r2, r11 - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r2, r11, r4, r12 LDM lr!, {r3, r4} ADDS r3, r3, r11 - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r3, r11, r5, r12 ADDS r4, r4, r11 - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r4, r11, r6, r12 LDM lr!, {r5, r6} ADDS r5, r5, r11 - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r5, r11, r7, r12 ADDS r6, r6, r11 - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r6, r11, r8, r12 LDM lr!, {r7, r8} ADDS r7, r7, r11 - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r7, r11, r9, r12 BFC r10, #31, #1 ADDS r8, r10, r11 @@ -2311,14 +2310,14 @@ fe_sq_op: .type fe_sq_op, %function fe_sq_op: PUSH {lr} - SUB sp, sp, #0x20 + SUB sp, sp, #32 STR r0, [sp, #28] LDM r1, {r0, r1, r2, r3, r4, r5, r6, r7} /* Square */ UMULL r9, r10, r0, r0 UMULL r11, r12, r0, r1 ADDS r11, r11, r11 - MOV lr, #0x0 + MOV lr, #0 UMAAL r10, r11, lr, lr STM sp, {r9, r10} MOV r8, lr @@ -2395,14 +2394,14 @@ fe_sq_op: /* R[14] = r9 */ /* R[15] = r7 */ /* Reduce */ - MOV r6, #0x25 + MOV r6, #37 UMAAL r7, r0, r7, r6 - MOV r6, #0x13 + MOV r6, #19 LSL r0, r0, #1 ORR r0, r0, r7, LSR #31 MUL lr, r0, r6 POP {r0, r1} - MOV r6, #0x26 + MOV r6, #38 UMAAL r0, lr, r12, r6 UMAAL r1, lr, r11, r6 MOV r12, r3 @@ -2441,49 +2440,49 @@ fe_sq: .globl fe_mul121666 .type fe_mul121666, %function fe_mul121666: - PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} + PUSH {r4, r5, r6, r7, r8, r9, r10, lr} /* Multiply by 121666 */ LDM r1, {r2, r3, r4, r5, r6, r7, r8, r9} - MOV r12, #0xdb42 - MOVT r12, #0x1 - UMULL r2, r10, r2, r12 - UMULL r3, r11, r3, r12 - ADDS r3, r3, r10 - ADC r11, r11, #0x0 - UMULL r4, r10, r4, r12 - ADDS r4, r4, r11 - ADC r10, r10, #0x0 - UMULL r5, r11, r5, r12 - ADDS r5, r5, r10 - ADC r11, r11, #0x0 - UMULL r6, r10, r6, r12 - ADDS r6, r6, r11 - ADC r10, r10, #0x0 - UMULL r7, r11, r7, r12 - ADDS r7, r7, r10 - ADC r11, r11, #0x0 - UMULL r8, r10, r8, r12 - ADDS r8, r8, r11 - ADC r10, r10, #0x0 - UMULL r9, r11, r9, r12 - ADDS r9, r9, r10 - MOV r12, #0x13 - ADC r11, r11, #0x0 - LSL r11, r11, #1 - ORR r11, r11, r9, LSR #31 - MUL r11, r11, r12 - ADDS r2, r2, r11 - ADCS r3, r3, #0x0 - ADCS r4, r4, #0x0 - ADCS r5, r5, #0x0 - ADCS r6, r6, #0x0 - ADCS r7, r7, #0x0 + MOV r10, #0xdb42 + MOVT r10, #0x1 + UMULL r2, r12, r2, r10 + UMULL r3, lr, r3, r10 + ADDS r3, r3, r12 + ADC lr, lr, #0 + UMULL r4, r12, r4, r10 + ADDS r4, r4, lr + ADC r12, r12, #0 + UMULL r5, lr, r5, r10 + ADDS r5, r5, r12 + ADC lr, lr, #0 + UMULL r6, r12, r6, r10 + ADDS r6, r6, lr + ADC r12, r12, #0 + UMULL r7, lr, r7, r10 + ADDS r7, r7, r12 + ADC lr, lr, #0 + UMULL r8, r12, r8, r10 + ADDS r8, r8, lr + ADC r12, r12, #0 + UMULL r9, lr, r9, r10 + ADDS r9, r9, r12 + MOV r10, #19 + ADC lr, lr, #0 + LSL lr, lr, #1 + ORR lr, lr, r9, LSR #31 + MUL lr, lr, r10 + ADDS r2, r2, lr + ADCS r3, r3, #0 + ADCS r4, r4, #0 + ADCS r5, r5, #0 + ADCS r6, r6, #0 + ADCS r7, r7, #0 BFC r9, #31, #1 - ADCS r8, r8, #0x0 - ADC r9, r9, #0x0 + ADCS r8, r8, #0 + ADC r9, r9, #0 STM r0, {r2, r3, r4, r5, r6, r7, r8, r9} - POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - /* Cycle Count = 75 */ + POP {r4, r5, r6, r7, r8, r9, r10, pc} + /* Cycle Count = 73 */ .size fe_mul121666,.-fe_mul121666 #else .text @@ -2491,36 +2490,36 @@ fe_mul121666: .globl fe_mul121666 .type fe_mul121666, %function fe_mul121666: - PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} + PUSH {r4, r5, r6, r7, r8, r9, r10, lr} /* Multiply by 121666 */ LDM r1, {r2, r3, r4, r5, r6, r7, r8, r9} - MOV r11, #0xdb42 - MOVT r11, #0x1 - UMULL r2, r12, r2, r11 - SUB r10, r11, #0x1 - UMAAL r3, r12, r3, r10 - UMAAL r4, r12, r4, r10 - UMAAL r5, r12, r5, r10 - UMAAL r6, r12, r6, r10 - UMAAL r7, r12, r7, r10 - UMAAL r8, r12, r8, r10 - MOV r11, #0x13 - UMAAL r9, r12, r9, r10 - LSL r12, r12, #1 - ORR r12, r12, r9, LSR #31 - MUL r12, r12, r11 - ADDS r2, r2, r12 - ADCS r3, r3, #0x0 - ADCS r4, r4, #0x0 - ADCS r5, r5, #0x0 - ADCS r6, r6, #0x0 - ADCS r7, r7, #0x0 + MOV lr, #0xdb42 + MOVT lr, #0x1 + UMULL r2, r10, r2, lr + SUB r12, lr, #1 + UMAAL r3, r10, r3, r12 + UMAAL r4, r10, r4, r12 + UMAAL r5, r10, r5, r12 + UMAAL r6, r10, r6, r12 + UMAAL r7, r10, r7, r12 + UMAAL r8, r10, r8, r12 + MOV lr, #19 + UMAAL r9, r10, r9, r12 + LSL r10, r10, #1 + ORR r10, r10, r9, LSR #31 + MUL r10, r10, lr + ADDS r2, r2, r10 + ADCS r3, r3, #0 + ADCS r4, r4, #0 + ADCS r5, r5, #0 + ADCS r6, r6, #0 + ADCS r7, r7, #0 BFC r9, #31, #1 - ADCS r8, r8, #0x0 - ADC r9, r9, #0x0 + ADCS r8, r8, #0 + ADC r9, r9, #0 STM r0, {r2, r3, r4, r5, r6, r7, r8, r9} - POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - /* Cycle Count = 69 */ + POP {r4, r5, r6, r7, r8, r9, r10, pc} + /* Cycle Count = 67 */ .size fe_mul121666,.-fe_mul121666 #endif /* WOLFSSL_ARM_ARCH_7M */ #ifndef WC_NO_CACHE_RESISTANT @@ -2534,29 +2533,29 @@ curve25519: STR r0, [sp, #160] STR r1, [sp, #164] STR r2, [sp, #168] - MOV r1, #0x0 + MOV r1, #0 STR r1, [sp, #172] - MOV r4, #0x1 - MOV r5, #0x0 - MOV r6, #0x0 - MOV r7, #0x0 - MOV r8, #0x0 - MOV r9, #0x0 - MOV r10, #0x0 - MOV r11, #0x0 + MOV r4, #1 + MOV r5, #0 + MOV r6, #0 + MOV r7, #0 + MOV r8, #0 + MOV r9, #0 + MOV r10, #0 + MOV r11, #0 STM r0, {r4, r5, r6, r7, r8, r9, r10, r11} - ADD r3, sp, #0x20 + ADD r3, sp, #32 STM r3, {r4, r5, r6, r7, r8, r9, r10, r11} - MOV r4, #0x0 + MOV r4, #0 MOV r3, sp STM r3, {r4, r5, r6, r7, r8, r9, r10, r11} ADD r3, sp, #0x40 /* Copy */ LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11} STM r3, {r4, r5, r6, r7, r8, r9, r10, r11} - MOV r1, #0x1e + MOV r1, #30 STR r1, [sp, #180] - MOV r2, #0x1c + MOV r2, #28 STR r2, [sp, #176] L_curve25519_words: L_curve25519_bits: @@ -2564,14 +2563,14 @@ L_curve25519_bits: LDR r2, [r1, r2] LDR r1, [sp, #180] LSR r2, r2, r1 - AND r2, r2, #0x1 + AND r2, r2, #1 STR r2, [sp, #184] LDR r1, [sp, #172] EOR r1, r1, r2 STR r1, [sp, #172] LDR r0, [sp, #160] /* Conditional Swap */ - RSB r1, r1, #0x0 + RSB r1, r1, #0 MOV r3, r0 ADD r12, sp, #0x40 LDM r3, {r4, r5} @@ -2624,9 +2623,9 @@ L_curve25519_bits: STM r12!, {r6, r7} LDR r1, [sp, #172] /* Conditional Swap */ - RSB r1, r1, #0x0 + RSB r1, r1, #0 MOV r3, sp - ADD r12, sp, #0x20 + ADD r12, sp, #32 LDM r3, {r4, r5} LDM r12, {r6, r7} EOR r8, r4, r6 @@ -2682,14 +2681,14 @@ L_curve25519_bits: ADD r1, sp, #0x80 LDR r0, [sp, #160] BL fe_add_sub_op - ADD r3, sp, #0x20 + ADD r3, sp, #32 ADD r2, sp, #0x40 ADD r1, sp, #0x60 MOV r0, sp BL fe_add_sub_op LDR r2, [sp, #160] ADD r1, sp, #0x60 - ADD r0, sp, #0x20 + ADD r0, sp, #32 BL fe_mul_op ADD r2, sp, #0x80 MOV r1, sp @@ -2702,7 +2701,7 @@ L_curve25519_bits: ADD r0, sp, #0x60 BL fe_sq_op MOV r3, sp - ADD r2, sp, #0x20 + ADD r2, sp, #32 MOV r1, sp ADD r0, sp, #0x40 BL fe_add_sub_op @@ -2718,18 +2717,18 @@ L_curve25519_bits: MOV r0, sp BL fe_sq_op ADD r1, sp, #0x60 - ADD r0, sp, #0x20 + ADD r0, sp, #32 BL fe_mul121666 ADD r1, sp, #0x40 ADD r0, sp, #0x40 BL fe_sq_op - ADD r2, sp, #0x20 + ADD r2, sp, #32 ADD r1, sp, #0x80 ADD r0, sp, #0x80 BL fe_add_op MOV r2, sp LDR r1, [sp, #168] - ADD r0, sp, #0x20 + ADD r0, sp, #32 BL fe_mul_op ADD r2, sp, #0x80 ADD r1, sp, #0x60 @@ -2737,16 +2736,16 @@ L_curve25519_bits: BL fe_mul_op LDR r2, [sp, #176] LDR r1, [sp, #180] - SUBS r1, r1, #0x1 + SUBS r1, r1, #1 STR r1, [sp, #180] #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BGE L_curve25519_bits #else BGE.W L_curve25519_bits #endif - MOV r1, #0x1f + MOV r1, #31 STR r1, [sp, #180] - SUBS r2, r2, #0x4 + SUBS r2, r2, #4 STR r2, [sp, #176] #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BGE L_curve25519_words @@ -2754,24 +2753,24 @@ L_curve25519_bits: BGE.W L_curve25519_words #endif /* Invert */ - ADD r1, sp, #0x0 - ADD r0, sp, #0x20 + ADD r1, sp, #0 + ADD r0, sp, #32 BL fe_sq_op - ADD r1, sp, #0x20 + ADD r1, sp, #32 ADD r0, sp, #0x40 BL fe_sq_op ADD r1, sp, #0x40 ADD r0, sp, #0x40 BL fe_sq_op ADD r2, sp, #0x40 - ADD r1, sp, #0x0 + ADD r1, sp, #0 ADD r0, sp, #0x40 BL fe_mul_op ADD r2, sp, #0x40 - ADD r1, sp, #0x20 - ADD r0, sp, #0x20 + ADD r1, sp, #32 + ADD r0, sp, #32 BL fe_mul_op - ADD r1, sp, #0x20 + ADD r1, sp, #32 ADD r0, sp, #0x60 BL fe_sq_op ADD r2, sp, #0x60 @@ -2781,14 +2780,14 @@ L_curve25519_bits: ADD r1, sp, #0x40 ADD r0, sp, #0x60 BL fe_sq_op - MOV r12, #0x4 + MOV r12, #4 L_curve25519_inv_1: ADD r1, sp, #0x60 ADD r0, sp, #0x60 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_curve25519_inv_1 #else @@ -2801,14 +2800,14 @@ L_curve25519_inv_1: ADD r1, sp, #0x40 ADD r0, sp, #0x60 BL fe_sq_op - MOV r12, #0x9 + MOV r12, #9 L_curve25519_inv_2: ADD r1, sp, #0x60 ADD r0, sp, #0x60 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_curve25519_inv_2 #else @@ -2821,14 +2820,14 @@ L_curve25519_inv_2: ADD r1, sp, #0x60 ADD r0, sp, #0x80 BL fe_sq_op - MOV r12, #0x13 + MOV r12, #19 L_curve25519_inv_3: ADD r1, sp, #0x80 ADD r0, sp, #0x80 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_curve25519_inv_3 #else @@ -2838,14 +2837,14 @@ L_curve25519_inv_3: ADD r1, sp, #0x80 ADD r0, sp, #0x60 BL fe_mul_op - MOV r12, #0xa + MOV r12, #10 L_curve25519_inv_4: ADD r1, sp, #0x60 ADD r0, sp, #0x60 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_curve25519_inv_4 #else @@ -2858,14 +2857,14 @@ L_curve25519_inv_4: ADD r1, sp, #0x40 ADD r0, sp, #0x60 BL fe_sq_op - MOV r12, #0x31 + MOV r12, #49 L_curve25519_inv_5: ADD r1, sp, #0x60 ADD r0, sp, #0x60 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_curve25519_inv_5 #else @@ -2885,7 +2884,7 @@ L_curve25519_inv_6: PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_curve25519_inv_6 #else @@ -2895,14 +2894,14 @@ L_curve25519_inv_6: ADD r1, sp, #0x80 ADD r0, sp, #0x60 BL fe_mul_op - MOV r12, #0x32 + MOV r12, #50 L_curve25519_inv_7: ADD r1, sp, #0x60 ADD r0, sp, #0x60 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_curve25519_inv_7 #else @@ -2912,22 +2911,22 @@ L_curve25519_inv_7: ADD r1, sp, #0x60 ADD r0, sp, #0x40 BL fe_mul_op - MOV r12, #0x5 + MOV r12, #5 L_curve25519_inv_8: ADD r1, sp, #0x40 ADD r0, sp, #0x40 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_curve25519_inv_8 #else BNE.N L_curve25519_inv_8 #endif - ADD r2, sp, #0x20 + ADD r2, sp, #32 ADD r1, sp, #0x40 - ADD r0, sp, #0x0 + ADD r0, sp, #0 BL fe_mul_op MOV r2, sp LDR r1, [sp, #160] @@ -2936,27 +2935,27 @@ L_curve25519_inv_8: /* Ensure result is less than modulus */ LDR r0, [sp, #160] LDM r0, {r4, r5, r6, r7, r8, r9, r10, r11} - ADDS r2, r4, #0x13 - ADCS r2, r5, #0x0 - ADCS r2, r6, #0x0 - ADCS r2, r7, #0x0 - ADCS r2, r8, #0x0 - ADCS r2, r9, #0x0 - ADCS r2, r10, #0x0 - ADC r2, r11, #0x0 + ADDS r2, r4, #19 + ADCS r2, r5, #0 + ADCS r2, r6, #0 + ADCS r2, r7, #0 + ADCS r2, r8, #0 + ADCS r2, r9, #0 + ADCS r2, r10, #0 + ADC r2, r11, #0 ASR r2, r2, #31 - AND r2, r2, #0x13 + AND r2, r2, #19 ADDS r4, r4, r2 - ADCS r5, r5, #0x0 - ADCS r6, r6, #0x0 - ADCS r7, r7, #0x0 - ADCS r8, r8, #0x0 - ADCS r9, r9, #0x0 - ADCS r10, r10, #0x0 - ADC r11, r11, #0x0 + ADCS r5, r5, #0 + ADCS r6, r6, #0 + ADCS r7, r7, #0 + ADCS r8, r8, #0 + ADCS r9, r9, #0 + ADCS r10, r10, #0 + ADC r11, r11, #0 BFC r11, #31, #1 STM r0, {r4, r5, r6, r7, r8, r9, r10, r11} - MOV r0, #0x0 + MOV r0, #0 ADD sp, sp, #0xbc POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 721 */ @@ -2973,24 +2972,24 @@ curve25519: STR r1, [sp, #160] STR r2, [sp, #172] ADD r5, sp, #0x40 - ADD r4, sp, #0x20 + ADD r4, sp, #32 STR sp, [sp, #184] STR r5, [sp, #180] STR r4, [sp, #188] - MOV r1, #0x0 + MOV r1, #0 STR r1, [sp, #164] - MOV r4, #0x1 - MOV r5, #0x0 - MOV r6, #0x0 - MOV r7, #0x0 - MOV r8, #0x0 - MOV r9, #0x0 - MOV r10, #0x0 - MOV r11, #0x0 + MOV r4, #1 + MOV r5, #0 + MOV r6, #0 + MOV r7, #0 + MOV r8, #0 + MOV r9, #0 + MOV r10, #0 + MOV r11, #0 STM r0, {r4, r5, r6, r7, r8, r9, r10, r11} - ADD r3, sp, #0x20 + ADD r3, sp, #32 STM r3, {r4, r5, r6, r7, r8, r9, r10, r11} - MOV r4, #0x0 + MOV r4, #0 MOV r3, sp STM r3, {r4, r5, r6, r7, r8, r9, r10, r11} ADD r3, sp, #0x40 @@ -3001,10 +3000,10 @@ curve25519: L_curve25519_bits: STR r2, [sp, #168] LDR r1, [sp, #160] - AND r4, r2, #0x1f + AND r4, r2, #31 LSR r2, r2, #5 LDR r2, [r1, r2, LSL #2] - RSB r4, r4, #0x1f + RSB r4, r4, #31 LSL r2, r2, r4 LDR r1, [sp, #164] EOR r1, r1, r2 @@ -3082,7 +3081,7 @@ L_curve25519_bits: LDR r0, [sp, #184] BL fe_mul_op LDR r2, [sp, #168] - SUBS r2, r2, #0x1 + SUBS r2, r2, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BGE L_curve25519_bits #else @@ -3094,24 +3093,24 @@ L_curve25519_bits: LDM r1, {r4, r5, r6, r7, r8, r9, r10, r11} STM sp, {r4, r5, r6, r7, r8, r9, r10, r11} /* Invert */ - ADD r1, sp, #0x0 - ADD r0, sp, #0x20 + ADD r1, sp, #0 + ADD r0, sp, #32 BL fe_sq_op - ADD r1, sp, #0x20 + ADD r1, sp, #32 ADD r0, sp, #0x40 BL fe_sq_op ADD r1, sp, #0x40 ADD r0, sp, #0x40 BL fe_sq_op ADD r2, sp, #0x40 - ADD r1, sp, #0x0 + ADD r1, sp, #0 ADD r0, sp, #0x40 BL fe_mul_op ADD r2, sp, #0x40 - ADD r1, sp, #0x20 - ADD r0, sp, #0x20 + ADD r1, sp, #32 + ADD r0, sp, #32 BL fe_mul_op - ADD r1, sp, #0x20 + ADD r1, sp, #32 ADD r0, sp, #0x60 BL fe_sq_op ADD r2, sp, #0x60 @@ -3121,14 +3120,14 @@ L_curve25519_bits: ADD r1, sp, #0x40 ADD r0, sp, #0x60 BL fe_sq_op - MOV r12, #0x4 + MOV r12, #4 L_curve25519_inv_1: ADD r1, sp, #0x60 ADD r0, sp, #0x60 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_curve25519_inv_1 #else @@ -3141,14 +3140,14 @@ L_curve25519_inv_1: ADD r1, sp, #0x40 ADD r0, sp, #0x60 BL fe_sq_op - MOV r12, #0x9 + MOV r12, #9 L_curve25519_inv_2: ADD r1, sp, #0x60 ADD r0, sp, #0x60 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_curve25519_inv_2 #else @@ -3161,14 +3160,14 @@ L_curve25519_inv_2: ADD r1, sp, #0x60 ADD r0, sp, #0x80 BL fe_sq_op - MOV r12, #0x13 + MOV r12, #19 L_curve25519_inv_3: ADD r1, sp, #0x80 ADD r0, sp, #0x80 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_curve25519_inv_3 #else @@ -3178,14 +3177,14 @@ L_curve25519_inv_3: ADD r1, sp, #0x80 ADD r0, sp, #0x60 BL fe_mul_op - MOV r12, #0xa + MOV r12, #10 L_curve25519_inv_4: ADD r1, sp, #0x60 ADD r0, sp, #0x60 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_curve25519_inv_4 #else @@ -3198,14 +3197,14 @@ L_curve25519_inv_4: ADD r1, sp, #0x40 ADD r0, sp, #0x60 BL fe_sq_op - MOV r12, #0x31 + MOV r12, #49 L_curve25519_inv_5: ADD r1, sp, #0x60 ADD r0, sp, #0x60 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_curve25519_inv_5 #else @@ -3225,7 +3224,7 @@ L_curve25519_inv_6: PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_curve25519_inv_6 #else @@ -3235,14 +3234,14 @@ L_curve25519_inv_6: ADD r1, sp, #0x80 ADD r0, sp, #0x60 BL fe_mul_op - MOV r12, #0x32 + MOV r12, #50 L_curve25519_inv_7: ADD r1, sp, #0x60 ADD r0, sp, #0x60 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_curve25519_inv_7 #else @@ -3252,22 +3251,22 @@ L_curve25519_inv_7: ADD r1, sp, #0x60 ADD r0, sp, #0x40 BL fe_mul_op - MOV r12, #0x5 + MOV r12, #5 L_curve25519_inv_8: ADD r1, sp, #0x40 ADD r0, sp, #0x40 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_curve25519_inv_8 #else BNE.N L_curve25519_inv_8 #endif - ADD r2, sp, #0x20 + ADD r2, sp, #32 ADD r1, sp, #0x40 - ADD r0, sp, #0x0 + ADD r0, sp, #0 BL fe_mul_op LDR r2, [sp, #184] LDR r1, [sp, #176] @@ -3276,27 +3275,27 @@ L_curve25519_inv_8: /* Ensure result is less than modulus */ LDR r0, [sp, #176] LDM r0, {r4, r5, r6, r7, r8, r9, r10, r11} - ADDS r2, r4, #0x13 - ADCS r2, r5, #0x0 - ADCS r2, r6, #0x0 - ADCS r2, r7, #0x0 - ADCS r2, r8, #0x0 - ADCS r2, r9, #0x0 - ADCS r2, r10, #0x0 - ADC r2, r11, #0x0 + ADDS r2, r4, #19 + ADCS r2, r5, #0 + ADCS r2, r6, #0 + ADCS r2, r7, #0 + ADCS r2, r8, #0 + ADCS r2, r9, #0 + ADCS r2, r10, #0 + ADC r2, r11, #0 ASR r2, r2, #31 - AND r2, r2, #0x13 + AND r2, r2, #19 ADDS r4, r4, r2 - ADCS r5, r5, #0x0 - ADCS r6, r6, #0x0 - ADCS r7, r7, #0x0 - ADCS r8, r8, #0x0 - ADCS r9, r9, #0x0 - ADCS r10, r10, #0x0 - ADC r11, r11, #0x0 + ADCS r5, r5, #0 + ADCS r6, r6, #0 + ADCS r7, r7, #0 + ADCS r8, r8, #0 + ADCS r9, r9, #0 + ADCS r10, r10, #0 + ADC r11, r11, #0 BFC r11, #31, #1 STM r0, {r4, r5, r6, r7, r8, r9, r10, r11} - MOV r0, #0x0 + MOV r0, #0 ADD sp, sp, #0xc0 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 597 */ @@ -3318,16 +3317,16 @@ fe_invert: MOV r0, sp BL fe_sq_op MOV r1, sp - ADD r0, sp, #0x20 + ADD r0, sp, #32 BL fe_sq_op - ADD r1, sp, #0x20 - ADD r0, sp, #0x20 + ADD r1, sp, #32 + ADD r0, sp, #32 BL fe_sq_op - ADD r2, sp, #0x20 + ADD r2, sp, #32 LDR r1, [sp, #132] - ADD r0, sp, #0x20 + ADD r0, sp, #32 BL fe_mul_op - ADD r2, sp, #0x20 + ADD r2, sp, #32 MOV r1, sp MOV r0, sp BL fe_mul_op @@ -3335,60 +3334,60 @@ fe_invert: ADD r0, sp, #0x40 BL fe_sq_op ADD r2, sp, #0x40 - ADD r1, sp, #0x20 - ADD r0, sp, #0x20 + ADD r1, sp, #32 + ADD r0, sp, #32 BL fe_mul_op - ADD r1, sp, #0x20 + ADD r1, sp, #32 ADD r0, sp, #0x40 BL fe_sq_op - MOV r12, #0x4 + MOV r12, #4 L_fe_invert1: ADD r1, sp, #0x40 ADD r0, sp, #0x40 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_fe_invert1 #else BNE.N L_fe_invert1 #endif - ADD r2, sp, #0x20 + ADD r2, sp, #32 ADD r1, sp, #0x40 - ADD r0, sp, #0x20 + ADD r0, sp, #32 BL fe_mul_op - ADD r1, sp, #0x20 + ADD r1, sp, #32 ADD r0, sp, #0x40 BL fe_sq_op - MOV r12, #0x9 + MOV r12, #9 L_fe_invert2: ADD r1, sp, #0x40 ADD r0, sp, #0x40 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_fe_invert2 #else BNE.N L_fe_invert2 #endif - ADD r2, sp, #0x20 + ADD r2, sp, #32 ADD r1, sp, #0x40 ADD r0, sp, #0x40 BL fe_mul_op ADD r1, sp, #0x40 ADD r0, sp, #0x60 BL fe_sq_op - MOV r12, #0x13 + MOV r12, #19 L_fe_invert3: ADD r1, sp, #0x60 ADD r0, sp, #0x60 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_fe_invert3 #else @@ -3398,40 +3397,40 @@ L_fe_invert3: ADD r1, sp, #0x60 ADD r0, sp, #0x40 BL fe_mul_op - MOV r12, #0xa + MOV r12, #10 L_fe_invert4: ADD r1, sp, #0x40 ADD r0, sp, #0x40 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_fe_invert4 #else BNE.N L_fe_invert4 #endif - ADD r2, sp, #0x20 + ADD r2, sp, #32 ADD r1, sp, #0x40 - ADD r0, sp, #0x20 + ADD r0, sp, #32 BL fe_mul_op - ADD r1, sp, #0x20 + ADD r1, sp, #32 ADD r0, sp, #0x40 BL fe_sq_op - MOV r12, #0x31 + MOV r12, #49 L_fe_invert5: ADD r1, sp, #0x40 ADD r0, sp, #0x40 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_fe_invert5 #else BNE.N L_fe_invert5 #endif - ADD r2, sp, #0x20 + ADD r2, sp, #32 ADD r1, sp, #0x40 ADD r0, sp, #0x40 BL fe_mul_op @@ -3445,7 +3444,7 @@ L_fe_invert6: PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_fe_invert6 #else @@ -3455,38 +3454,38 @@ L_fe_invert6: ADD r1, sp, #0x60 ADD r0, sp, #0x40 BL fe_mul_op - MOV r12, #0x32 + MOV r12, #50 L_fe_invert7: ADD r1, sp, #0x40 ADD r0, sp, #0x40 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_fe_invert7 #else BNE.N L_fe_invert7 #endif - ADD r2, sp, #0x20 + ADD r2, sp, #32 ADD r1, sp, #0x40 - ADD r0, sp, #0x20 + ADD r0, sp, #32 BL fe_mul_op - MOV r12, #0x5 + MOV r12, #5 L_fe_invert8: - ADD r1, sp, #0x20 - ADD r0, sp, #0x20 + ADD r1, sp, #32 + ADD r0, sp, #32 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_fe_invert8 #else BNE.N L_fe_invert8 #endif MOV r2, sp - ADD r1, sp, #0x20 + ADD r1, sp, #32 LDR r0, [sp, #128] BL fe_mul_op LDR r1, [sp, #132] @@ -3505,7 +3504,7 @@ fe_sq2: SUB sp, sp, #0x44 STR r0, [sp, #64] /* Square * 2 */ - MOV r0, #0x0 + MOV r0, #0 LDR r12, [r1] /* A[0] * A[1] */ LDR lr, [r1, #4] @@ -3521,137 +3520,137 @@ fe_sq2: UMULL r10, r3, r12, lr /* A[0] * A[2] */ LDR lr, [r1, #8] - MOV r11, #0x0 + MOV r11, #0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 /* A[0] * A[4] */ LDR lr, [r1, #16] - ADCS r7, r7, #0x0 - ADC r11, r0, #0x0 + ADCS r7, r7, #0 + ADC r11, r0, #0 UMLAL r7, r11, r12, lr ADDS r8, r8, r11 /* A[0] * A[6] */ LDR lr, [r1, #24] - ADCS r9, r9, #0x0 - ADC r11, r0, #0x0 + ADCS r9, r9, #0 + ADC r11, r0, #0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 - ADCS r3, r3, #0x0 + ADCS r3, r3, #0 STR r4, [sp, #4] STR r5, [sp, #8] /* A[1] * A[2] */ LDR r12, [r1, #4] LDR lr, [r1, #8] - MOV r11, #0x0 + MOV r11, #0 UMLAL r6, r11, r12, lr STR r6, [sp, #12] ADDS r7, r7, r11 /* A[1] * A[3] */ LDR lr, [r1, #12] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r7, r11, r12, lr STR r7, [sp, #16] ADDS r8, r8, r11 /* A[1] * A[4] */ LDR lr, [r1, #16] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 /* A[1] * A[5] */ LDR lr, [r1, #20] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 /* A[1] * A[6] */ LDR lr, [r1, #24] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 /* A[1] * A[7] */ LDR lr, [r1, #28] - ADC r4, r0, #0x0 + ADC r4, r0, #0 UMLAL r3, r4, r12, lr /* A[2] * A[3] */ LDR r12, [r1, #8] LDR lr, [r1, #12] - MOV r11, #0x0 + MOV r11, #0 UMLAL r8, r11, r12, lr STR r8, [sp, #20] ADDS r9, r9, r11 /* A[2] * A[4] */ LDR lr, [r1, #16] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r9, r11, r12, lr STR r9, [sp, #24] ADDS r10, r10, r11 /* A[2] * A[5] */ LDR lr, [r1, #20] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 /* A[2] * A[6] */ LDR lr, [r1, #24] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 /* A[2] * A[7] */ LDR lr, [r1, #28] - ADC r5, r0, #0x0 + ADC r5, r0, #0 UMLAL r4, r5, r12, lr /* A[3] * A[4] */ LDR r12, [r1, #12] LDR lr, [r1, #16] - MOV r11, #0x0 + MOV r11, #0 UMLAL r10, r11, r12, lr STR r10, [sp, #28] ADDS r3, r3, r11 /* A[3] * A[5] */ LDR lr, [r1, #20] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 /* A[3] * A[6] */ LDR lr, [r1, #24] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 /* A[3] * A[7] */ LDR lr, [r1, #28] - ADC r6, r0, #0x0 + ADC r6, r0, #0 UMLAL r5, r6, r12, lr /* A[4] * A[5] */ LDR r12, [r1, #16] LDR lr, [r1, #20] - MOV r11, #0x0 + MOV r11, #0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 /* A[4] * A[6] */ LDR lr, [r1, #24] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 /* A[4] * A[7] */ LDR lr, [r1, #28] - ADC r7, r0, #0x0 + ADC r7, r0, #0 UMLAL r6, r7, r12, lr /* A[5] * A[6] */ LDR r12, [r1, #20] LDR lr, [r1, #24] - MOV r11, #0x0 + MOV r11, #0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 /* A[5] * A[7] */ LDR lr, [r1, #28] - ADC r8, r0, #0x0 + ADC r8, r0, #0 UMLAL r7, r8, r12, lr /* A[6] * A[7] */ LDR r12, [r1, #24] LDR lr, [r1, #28] - MOV r9, #0x0 + MOV r9, #0 UMLAL r8, r9, r12, lr - ADD lr, sp, #0x20 + ADD lr, sp, #32 STM lr, {r3, r4, r5, r6, r7, r8, r9} - ADD lr, sp, #0x4 + ADD lr, sp, #4 LDM lr, {r4, r5, r6, r7, r8, r9, r10} ADDS r4, r4, r4 ADCS r5, r5, r5 @@ -3669,9 +3668,9 @@ fe_sq2: ADCS r7, r7, r7 ADCS r8, r8, r8 ADCS r9, r9, r9 - ADC r10, r0, #0x0 + ADC r10, r0, #0 STM lr, {r3, r4, r5, r6, r7, r8, r9, r10} - ADD lr, sp, #0x4 + ADD lr, sp, #4 LDM lr, {r4, r5, r6, r7, r8, r9, r10} MOV lr, sp /* A[0] * A[0] */ @@ -3680,98 +3679,98 @@ fe_sq2: ADDS r4, r4, r11 /* A[1] * A[1] */ LDR r12, [r1, #4] - ADCS r5, r5, #0x0 - ADC r11, r0, #0x0 + ADCS r5, r5, #0 + ADC r11, r0, #0 UMLAL r5, r11, r12, r12 ADDS r6, r6, r11 /* A[2] * A[2] */ LDR r12, [r1, #8] - ADCS r7, r7, #0x0 - ADC r11, r0, #0x0 + ADCS r7, r7, #0 + ADC r11, r0, #0 UMLAL r7, r11, r12, r12 ADDS r8, r8, r11 /* A[3] * A[3] */ LDR r12, [r1, #12] - ADCS r9, r9, #0x0 - ADC r11, r0, #0x0 + ADCS r9, r9, #0 + ADC r11, r0, #0 UMLAL r9, r11, r12, r12 ADDS r10, r10, r11 STM lr!, {r3, r4, r5, r6, r7, r8, r9, r10} LDM lr, {r3, r4, r5, r6, r7, r8, r9, r10} /* A[4] * A[4] */ LDR r12, [r1, #16] - ADCS r3, r3, #0x0 - ADC r11, r0, #0x0 + ADCS r3, r3, #0 + ADC r11, r0, #0 UMLAL r3, r11, r12, r12 ADDS r4, r4, r11 /* A[5] * A[5] */ LDR r12, [r1, #20] - ADCS r5, r5, #0x0 - ADC r11, r0, #0x0 + ADCS r5, r5, #0 + ADC r11, r0, #0 UMLAL r5, r11, r12, r12 ADDS r6, r6, r11 /* A[6] * A[6] */ LDR r12, [r1, #24] - ADCS r7, r7, #0x0 - ADC r11, r0, #0x0 + ADCS r7, r7, #0 + ADC r11, r0, #0 UMLAL r7, r11, r12, r12 ADDS r8, r8, r11 /* A[7] * A[7] */ LDR r12, [r1, #28] - ADCS r9, r9, #0x0 - ADC r10, r10, #0x0 + ADCS r9, r9, #0 + ADC r10, r10, #0 UMLAL r9, r10, r12, r12 /* Reduce */ LDR r2, [sp, #28] MOV lr, sp - MOV r12, #0x26 + MOV r12, #38 UMULL r10, r11, r10, r12 ADDS r10, r10, r2 - ADC r11, r11, #0x0 - MOV r12, #0x13 + ADC r11, r11, #0 + MOV r12, #19 LSL r11, r11, #1 ORR r11, r11, r10, LSR #31 MUL r11, r11, r12 LDM lr!, {r1, r2} - MOV r12, #0x26 + MOV r12, #38 ADDS r1, r1, r11 - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r1, r11, r3, r12 ADDS r2, r2, r11 - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r2, r11, r4, r12 LDM lr!, {r3, r4} ADDS r3, r3, r11 - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r3, r11, r5, r12 ADDS r4, r4, r11 - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r4, r11, r6, r12 LDM lr!, {r5, r6} ADDS r5, r5, r11 - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r5, r11, r7, r12 ADDS r6, r6, r11 - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r6, r11, r8, r12 LDM lr!, {r7, r8} ADDS r7, r7, r11 - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r7, r11, r9, r12 BFC r10, #31, #1 ADDS r8, r10, r11 /* Reduce if top bit set */ - MOV r12, #0x13 + MOV r12, #19 AND r11, r12, r8, ASR #31 ADDS r1, r1, r11 - ADCS r2, r2, #0x0 - ADCS r3, r3, #0x0 - ADCS r4, r4, #0x0 - ADCS r5, r5, #0x0 - ADCS r6, r6, #0x0 + ADCS r2, r2, #0 + ADCS r3, r3, #0 + ADCS r4, r4, #0 + ADCS r5, r5, #0 + ADCS r6, r6, #0 BFC r8, #31, #1 - ADCS r7, r7, #0x0 - ADC r8, r8, #0x0 + ADCS r7, r7, #0 + ADC r8, r8, #0 /* Double */ ADDS r1, r1, r1 ADCS r2, r2, r2 @@ -3782,17 +3781,17 @@ fe_sq2: ADCS r7, r7, r7 ADC r8, r8, r8 /* Reduce if top bit set */ - MOV r12, #0x13 + MOV r12, #19 AND r11, r12, r8, ASR #31 ADDS r1, r1, r11 - ADCS r2, r2, #0x0 - ADCS r3, r3, #0x0 - ADCS r4, r4, #0x0 - ADCS r5, r5, #0x0 - ADCS r6, r6, #0x0 + ADCS r2, r2, #0 + ADCS r3, r3, #0 + ADCS r4, r4, #0 + ADCS r5, r5, #0 + ADCS r6, r6, #0 BFC r8, #31, #1 - ADCS r7, r7, #0x0 - ADC r8, r8, #0x0 + ADCS r7, r7, #0 + ADC r8, r8, #0 /* Store */ LDR r0, [sp, #64] STM r0, {r1, r2, r3, r4, r5, r6, r7, r8} @@ -3807,14 +3806,14 @@ fe_sq2: .type fe_sq2, %function fe_sq2: PUSH {lr} - SUB sp, sp, #0x24 + SUB sp, sp, #36 STRD r0, r1, [sp, #28] LDM r1, {r0, r1, r2, r3, r4, r5, r6, r7} /* Square * 2 */ UMULL r9, r10, r0, r0 UMULL r11, r12, r0, r1 ADDS r11, r11, r11 - MOV lr, #0x0 + MOV lr, #0 UMAAL r10, r11, lr, lr STM sp, {r9, r10} MOV r8, lr @@ -3891,14 +3890,14 @@ fe_sq2: /* R[14] = r9 */ /* R[15] = r7 */ /* Reduce */ - MOV r6, #0x25 + MOV r6, #37 UMAAL r7, r0, r7, r6 - MOV r6, #0x13 + MOV r6, #19 LSL r0, r0, #1 ORR r0, r0, r7, LSR #31 MUL lr, r0, r6 POP {r0, r1} - MOV r6, #0x26 + MOV r6, #38 UMAAL r0, lr, r12, r6 UMAAL r1, lr, r11, r6 MOV r12, r3 @@ -3914,17 +3913,17 @@ fe_sq2: UMAAL r6, lr, r9, r12 ADD r7, r7, lr /* Reduce if top bit set */ - MOV r11, #0x13 + MOV r11, #19 AND r12, r11, r7, ASR #31 ADDS r0, r0, r12 - ADCS r1, r1, #0x0 - ADCS r2, r2, #0x0 - ADCS r3, r3, #0x0 - ADCS r4, r4, #0x0 - ADCS r5, r5, #0x0 + ADCS r1, r1, #0 + ADCS r2, r2, #0 + ADCS r3, r3, #0 + ADCS r4, r4, #0 + ADCS r5, r5, #0 BFC r7, #31, #1 - ADCS r6, r6, #0x0 - ADC r7, r7, #0x0 + ADCS r6, r6, #0 + ADC r7, r7, #0 /* Double */ ADDS r0, r0, r0 ADCS r1, r1, r1 @@ -3935,17 +3934,17 @@ fe_sq2: ADCS r6, r6, r6 ADC r7, r7, r7 /* Reduce if top bit set */ - MOV r11, #0x13 + MOV r11, #19 AND r12, r11, r7, ASR #31 ADDS r0, r0, r12 - ADCS r1, r1, #0x0 - ADCS r2, r2, #0x0 - ADCS r3, r3, #0x0 - ADCS r4, r4, #0x0 - ADCS r5, r5, #0x0 + ADCS r1, r1, #0 + ADCS r2, r2, #0 + ADCS r3, r3, #0 + ADCS r4, r4, #0 + ADCS r5, r5, #0 BFC r7, #31, #1 - ADCS r6, r6, #0x0 - ADC r7, r7, #0x0 + ADCS r6, r6, #0 + ADC r7, r7, #0 POP {r12, lr} /* Store */ STM r12, {r0, r1, r2, r3, r4, r5, r6, r7} @@ -3969,16 +3968,16 @@ fe_pow22523: MOV r0, sp BL fe_sq_op MOV r1, sp - ADD r0, sp, #0x20 + ADD r0, sp, #32 BL fe_sq_op - ADD r1, sp, #0x20 - ADD r0, sp, #0x20 + ADD r1, sp, #32 + ADD r0, sp, #32 BL fe_sq_op - ADD r2, sp, #0x20 + ADD r2, sp, #32 LDR r1, [sp, #100] - ADD r0, sp, #0x20 + ADD r0, sp, #32 BL fe_mul_op - ADD r2, sp, #0x20 + ADD r2, sp, #32 MOV r1, sp MOV r0, sp BL fe_mul_op @@ -3986,107 +3985,107 @@ fe_pow22523: MOV r0, sp BL fe_sq_op MOV r2, sp - ADD r1, sp, #0x20 + ADD r1, sp, #32 MOV r0, sp BL fe_mul_op MOV r1, sp - ADD r0, sp, #0x20 + ADD r0, sp, #32 BL fe_sq_op - MOV r12, #0x4 + MOV r12, #4 L_fe_pow22523_1: - ADD r1, sp, #0x20 - ADD r0, sp, #0x20 + ADD r1, sp, #32 + ADD r0, sp, #32 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_fe_pow22523_1 #else BNE.N L_fe_pow22523_1 #endif MOV r2, sp - ADD r1, sp, #0x20 + ADD r1, sp, #32 MOV r0, sp BL fe_mul_op MOV r1, sp - ADD r0, sp, #0x20 + ADD r0, sp, #32 BL fe_sq_op - MOV r12, #0x9 + MOV r12, #9 L_fe_pow22523_2: - ADD r1, sp, #0x20 - ADD r0, sp, #0x20 + ADD r1, sp, #32 + ADD r0, sp, #32 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_fe_pow22523_2 #else BNE.N L_fe_pow22523_2 #endif MOV r2, sp - ADD r1, sp, #0x20 - ADD r0, sp, #0x20 + ADD r1, sp, #32 + ADD r0, sp, #32 BL fe_mul_op - ADD r1, sp, #0x20 + ADD r1, sp, #32 ADD r0, sp, #0x40 BL fe_sq_op - MOV r12, #0x13 + MOV r12, #19 L_fe_pow22523_3: ADD r1, sp, #0x40 ADD r0, sp, #0x40 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_fe_pow22523_3 #else BNE.N L_fe_pow22523_3 #endif - ADD r2, sp, #0x20 + ADD r2, sp, #32 ADD r1, sp, #0x40 - ADD r0, sp, #0x20 + ADD r0, sp, #32 BL fe_mul_op - MOV r12, #0xa + MOV r12, #10 L_fe_pow22523_4: - ADD r1, sp, #0x20 - ADD r0, sp, #0x20 + ADD r1, sp, #32 + ADD r0, sp, #32 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_fe_pow22523_4 #else BNE.N L_fe_pow22523_4 #endif MOV r2, sp - ADD r1, sp, #0x20 + ADD r1, sp, #32 MOV r0, sp BL fe_mul_op MOV r1, sp - ADD r0, sp, #0x20 + ADD r0, sp, #32 BL fe_sq_op - MOV r12, #0x31 + MOV r12, #49 L_fe_pow22523_5: - ADD r1, sp, #0x20 - ADD r0, sp, #0x20 + ADD r1, sp, #32 + ADD r0, sp, #32 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_fe_pow22523_5 #else BNE.N L_fe_pow22523_5 #endif MOV r2, sp - ADD r1, sp, #0x20 - ADD r0, sp, #0x20 + ADD r1, sp, #32 + ADD r0, sp, #32 BL fe_mul_op - ADD r1, sp, #0x20 + ADD r1, sp, #32 ADD r0, sp, #0x40 BL fe_sq_op MOV r12, #0x63 @@ -4096,41 +4095,41 @@ L_fe_pow22523_6: PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_fe_pow22523_6 #else BNE.N L_fe_pow22523_6 #endif - ADD r2, sp, #0x20 + ADD r2, sp, #32 ADD r1, sp, #0x40 - ADD r0, sp, #0x20 + ADD r0, sp, #32 BL fe_mul_op - MOV r12, #0x32 + MOV r12, #50 L_fe_pow22523_7: - ADD r1, sp, #0x20 - ADD r0, sp, #0x20 + ADD r1, sp, #32 + ADD r0, sp, #32 PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_fe_pow22523_7 #else BNE.N L_fe_pow22523_7 #endif MOV r2, sp - ADD r1, sp, #0x20 + ADD r1, sp, #32 MOV r0, sp BL fe_mul_op - MOV r12, #0x2 + MOV r12, #2 L_fe_pow22523_8: MOV r1, sp MOV r0, sp PUSH {r12} BL fe_sq_op POP {r12} - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_fe_pow22523_8 #else @@ -4152,7 +4151,7 @@ L_fe_pow22523_8: .type ge_p1p1_to_p2, %function ge_p1p1_to_p2: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} - SUB sp, sp, #0x8 + SUB sp, sp, #8 STR r0, [sp] STR r1, [sp, #4] ADD r2, r1, #0x60 @@ -4160,8 +4159,8 @@ ge_p1p1_to_p2: LDR r0, [sp] LDR r1, [sp, #4] ADD r2, r1, #0x40 - ADD r1, r1, #0x20 - ADD r0, r0, #0x20 + ADD r1, r1, #32 + ADD r0, r0, #32 BL fe_mul_op LDR r0, [sp] LDR r1, [sp, #4] @@ -4169,7 +4168,7 @@ ge_p1p1_to_p2: ADD r1, r1, #0x40 ADD r0, r0, #0x40 BL fe_mul_op - ADD sp, sp, #0x8 + ADD sp, sp, #8 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 53 */ .size ge_p1p1_to_p2,.-ge_p1p1_to_p2 @@ -4179,7 +4178,7 @@ ge_p1p1_to_p2: .type ge_p1p1_to_p3, %function ge_p1p1_to_p3: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} - SUB sp, sp, #0x8 + SUB sp, sp, #8 STR r0, [sp] STR r1, [sp, #4] ADD r2, r1, #0x60 @@ -4187,8 +4186,8 @@ ge_p1p1_to_p3: LDR r0, [sp] LDR r1, [sp, #4] ADD r2, r1, #0x40 - ADD r1, r1, #0x20 - ADD r0, r0, #0x20 + ADD r1, r1, #32 + ADD r0, r0, #32 BL fe_mul_op LDR r0, [sp] LDR r1, [sp, #4] @@ -4198,10 +4197,10 @@ ge_p1p1_to_p3: BL fe_mul_op LDR r0, [sp] LDR r1, [sp, #4] - ADD r2, r1, #0x20 + ADD r2, r1, #32 ADD r0, r0, #0x60 BL fe_mul_op - ADD sp, sp, #0x8 + ADD sp, sp, #8 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 63 */ .size ge_p1p1_to_p3,.-ge_p1p1_to_p3 @@ -4211,19 +4210,19 @@ ge_p1p1_to_p3: .type ge_p2_dbl, %function ge_p2_dbl: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} - SUB sp, sp, #0x8 + SUB sp, sp, #8 STR r0, [sp] STR r1, [sp, #4] BL fe_sq_op LDR r0, [sp] LDR r1, [sp, #4] - ADD r1, r1, #0x20 + ADD r1, r1, #32 ADD r0, r0, #0x40 BL fe_sq_op LDR r0, [sp] LDR r1, [sp, #4] - ADD r2, r1, #0x20 - ADD r0, r0, #0x20 + ADD r2, r1, #32 + ADD r0, r0, #32 BL fe_add_op MOV r1, r0 ADD r0, r0, #0x40 @@ -4232,20 +4231,20 @@ ge_p2_dbl: MOV r3, r0 ADD r2, r0, #0x40 ADD r1, r0, #0x40 - ADD r0, r0, #0x20 + ADD r0, r0, #32 BL fe_add_sub_op MOV r2, r0 ADD r1, r0, #0x40 - SUB r0, r0, #0x20 + SUB r0, r0, #32 BL fe_sub_op LDR r1, [sp, #4] ADD r1, r1, #0x40 ADD r0, r0, #0x60 BL fe_sq2 - SUB r2, r0, #0x20 + SUB r2, r0, #32 MOV r1, r0 BL fe_sub_op - ADD sp, sp, #0x8 + ADD sp, sp, #8 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 87 */ .size ge_p2_dbl,.-ge_p2_dbl @@ -4255,27 +4254,27 @@ ge_p2_dbl: .type ge_madd, %function ge_madd: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} - SUB sp, sp, #0xc + SUB sp, sp, #12 STR r0, [sp] STR r1, [sp, #4] STR r2, [sp, #8] MOV r2, r1 - ADD r1, r1, #0x20 + ADD r1, r1, #32 BL fe_add_op LDR r1, [sp, #4] MOV r2, r1 - ADD r1, r1, #0x20 - ADD r0, r0, #0x20 + ADD r1, r1, #32 + ADD r0, r0, #32 BL fe_sub_op LDR r2, [sp, #8] - SUB r1, r0, #0x20 - ADD r0, r0, #0x20 + SUB r1, r0, #32 + ADD r0, r0, #32 BL fe_mul_op LDR r0, [sp] LDR r2, [sp, #8] - ADD r2, r2, #0x20 - ADD r1, r0, #0x20 - ADD r0, r0, #0x20 + ADD r2, r2, #32 + ADD r1, r0, #32 + ADD r0, r0, #32 BL fe_mul_op LDR r0, [sp] LDR r1, [sp, #8] @@ -4285,14 +4284,14 @@ ge_madd: ADD r0, r0, #0x60 BL fe_mul_op LDR r0, [sp] - ADD r3, r0, #0x20 + ADD r3, r0, #32 ADD r2, r0, #0x40 MOV r1, r0 - ADD r0, r0, #0x20 + ADD r0, r0, #32 BL fe_add_sub_op LDR r1, [sp, #4] ADD r1, r1, #0x40 - ADD r0, r0, #0x20 + ADD r0, r0, #32 /* Double */ LDM r1, {r4, r5, r6, r7, r8, r9, r10, r11} ADDS r4, r4, r4 @@ -4302,28 +4301,28 @@ ge_madd: ADCS r8, r8, r8 ADCS r9, r9, r9 ADCS r10, r10, r10 - MOV lr, #0x0 + MOV lr, #0 ADCS r11, r11, r11 - ADC lr, lr, #0x0 - MOV r12, #0x13 + ADC lr, lr, #0 + MOV r12, #19 LSL lr, lr, #1 ORR lr, lr, r11, LSR #31 MUL r12, lr, r12 ADDS r4, r4, r12 - ADCS r5, r5, #0x0 - ADCS r6, r6, #0x0 - ADCS r7, r7, #0x0 - ADCS r8, r8, #0x0 - ADCS r9, r9, #0x0 + ADCS r5, r5, #0 + ADCS r6, r6, #0 + ADCS r7, r7, #0 + ADCS r8, r8, #0 + ADCS r9, r9, #0 BFC r11, #31, #1 - ADCS r10, r10, #0x0 - ADC r11, r11, #0x0 + ADCS r10, r10, #0 + ADC r11, r11, #0 STM r0, {r4, r5, r6, r7, r8, r9, r10, r11} /* Done Double */ - ADD r3, r0, #0x20 - ADD r1, r0, #0x20 + ADD r3, r0, #32 + ADD r1, r0, #32 BL fe_add_sub_op - ADD sp, sp, #0xc + ADD sp, sp, #12 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 136 */ .size ge_madd,.-ge_madd @@ -4333,27 +4332,27 @@ ge_madd: .type ge_msub, %function ge_msub: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} - SUB sp, sp, #0xc + SUB sp, sp, #12 STR r0, [sp] STR r1, [sp, #4] STR r2, [sp, #8] MOV r2, r1 - ADD r1, r1, #0x20 + ADD r1, r1, #32 BL fe_add_op LDR r1, [sp, #4] MOV r2, r1 - ADD r1, r1, #0x20 - ADD r0, r0, #0x20 + ADD r1, r1, #32 + ADD r0, r0, #32 BL fe_sub_op LDR r2, [sp, #8] - ADD r2, r2, #0x20 - SUB r1, r0, #0x20 - ADD r0, r0, #0x20 + ADD r2, r2, #32 + SUB r1, r0, #32 + ADD r0, r0, #32 BL fe_mul_op LDR r0, [sp] LDR r2, [sp, #8] - ADD r1, r0, #0x20 - ADD r0, r0, #0x20 + ADD r1, r0, #32 + ADD r0, r0, #32 BL fe_mul_op LDR r0, [sp] LDR r1, [sp, #8] @@ -4363,14 +4362,14 @@ ge_msub: ADD r0, r0, #0x60 BL fe_mul_op LDR r0, [sp] - ADD r3, r0, #0x20 + ADD r3, r0, #32 ADD r2, r0, #0x40 MOV r1, r0 - ADD r0, r0, #0x20 + ADD r0, r0, #32 BL fe_add_sub_op LDR r1, [sp, #4] ADD r1, r1, #0x40 - ADD r0, r0, #0x20 + ADD r0, r0, #32 /* Double */ LDM r1, {r4, r5, r6, r7, r8, r9, r10, r11} ADDS r4, r4, r4 @@ -4380,29 +4379,29 @@ ge_msub: ADCS r8, r8, r8 ADCS r9, r9, r9 ADCS r10, r10, r10 - MOV lr, #0x0 + MOV lr, #0 ADCS r11, r11, r11 - ADC lr, lr, #0x0 - MOV r12, #0x13 + ADC lr, lr, #0 + MOV r12, #19 LSL lr, lr, #1 ORR lr, lr, r11, LSR #31 MUL r12, lr, r12 ADDS r4, r4, r12 - ADCS r5, r5, #0x0 - ADCS r6, r6, #0x0 - ADCS r7, r7, #0x0 - ADCS r8, r8, #0x0 - ADCS r9, r9, #0x0 + ADCS r5, r5, #0 + ADCS r6, r6, #0 + ADCS r7, r7, #0 + ADCS r8, r8, #0 + ADCS r9, r9, #0 BFC r11, #31, #1 - ADCS r10, r10, #0x0 - ADC r11, r11, #0x0 + ADCS r10, r10, #0 + ADC r11, r11, #0 STM r0, {r4, r5, r6, r7, r8, r9, r10, r11} /* Done Double */ - ADD r3, r0, #0x20 + ADD r3, r0, #32 MOV r1, r0 - ADD r0, r0, #0x20 + ADD r0, r0, #32 BL fe_add_sub_op - ADD sp, sp, #0xc + ADD sp, sp, #12 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 137 */ .size ge_msub,.-ge_msub @@ -4412,13 +4411,13 @@ ge_msub: .type ge_add, %function ge_add: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} - SUB sp, sp, #0x2c + SUB sp, sp, #44 STR r0, [sp] STR r1, [sp, #4] STR r2, [sp, #8] MOV r3, r1 - ADD r2, r1, #0x20 - ADD r1, r0, #0x20 + ADD r2, r1, #32 + ADD r1, r0, #32 BL fe_add_sub_op LDR r2, [sp, #8] MOV r1, r0 @@ -4426,9 +4425,9 @@ ge_add: BL fe_mul_op LDR r0, [sp] LDR r2, [sp, #8] - ADD r2, r2, #0x20 - ADD r1, r0, #0x20 - ADD r0, r0, #0x20 + ADD r2, r2, #32 + ADD r1, r0, #32 + ADD r0, r0, #32 BL fe_mul_op LDR r0, [sp] LDR r1, [sp, #8] @@ -4444,7 +4443,7 @@ ge_add: ADD r1, r1, #0x40 BL fe_mul_op LDR r1, [sp] - ADD r0, sp, #0xc + ADD r0, sp, #12 /* Double */ LDM r1, {r4, r5, r6, r7, r8, r9, r10, r11} ADDS r4, r4, r4 @@ -4454,34 +4453,34 @@ ge_add: ADCS r8, r8, r8 ADCS r9, r9, r9 ADCS r10, r10, r10 - MOV lr, #0x0 + MOV lr, #0 ADCS r11, r11, r11 - ADC lr, lr, #0x0 - MOV r12, #0x13 + ADC lr, lr, #0 + MOV r12, #19 LSL lr, lr, #1 ORR lr, lr, r11, LSR #31 MUL r12, lr, r12 ADDS r4, r4, r12 - ADCS r5, r5, #0x0 - ADCS r6, r6, #0x0 - ADCS r7, r7, #0x0 - ADCS r8, r8, #0x0 - ADCS r9, r9, #0x0 + ADCS r5, r5, #0 + ADCS r6, r6, #0 + ADCS r7, r7, #0 + ADCS r8, r8, #0 + ADCS r9, r9, #0 BFC r11, #31, #1 - ADCS r10, r10, #0x0 - ADC r11, r11, #0x0 + ADCS r10, r10, #0 + ADC r11, r11, #0 STM r0, {r4, r5, r6, r7, r8, r9, r10, r11} /* Done Double */ - ADD r3, r1, #0x20 + ADD r3, r1, #32 ADD r2, r1, #0x40 - ADD r0, r1, #0x20 + ADD r0, r1, #32 BL fe_add_sub_op ADD r3, r0, #0x40 - ADD r2, sp, #0xc + ADD r2, sp, #12 ADD r1, r0, #0x40 - ADD r0, r0, #0x20 + ADD r0, r0, #32 BL fe_add_sub_op - ADD sp, sp, #0x2c + ADD sp, sp, #44 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 138 */ .size ge_add,.-ge_add @@ -4491,23 +4490,23 @@ ge_add: .type ge_sub, %function ge_sub: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} - SUB sp, sp, #0x2c + SUB sp, sp, #44 STR r0, [sp] STR r1, [sp, #4] STR r2, [sp, #8] MOV r3, r1 - ADD r2, r1, #0x20 - ADD r1, r0, #0x20 + ADD r2, r1, #32 + ADD r1, r0, #32 BL fe_add_sub_op LDR r2, [sp, #8] - ADD r2, r2, #0x20 + ADD r2, r2, #32 MOV r1, r0 ADD r0, r0, #0x40 BL fe_mul_op LDR r0, [sp] LDR r2, [sp, #8] - ADD r1, r0, #0x20 - ADD r0, r0, #0x20 + ADD r1, r0, #32 + ADD r0, r0, #32 BL fe_mul_op LDR r0, [sp] LDR r1, [sp, #8] @@ -4523,7 +4522,7 @@ ge_sub: ADD r1, r1, #0x40 BL fe_mul_op LDR r1, [sp] - ADD r0, sp, #0xc + ADD r0, sp, #12 /* Double */ LDM r1, {r4, r5, r6, r7, r8, r9, r10, r11} ADDS r4, r4, r4 @@ -4533,34 +4532,34 @@ ge_sub: ADCS r8, r8, r8 ADCS r9, r9, r9 ADCS r10, r10, r10 - MOV lr, #0x0 + MOV lr, #0 ADCS r11, r11, r11 - ADC lr, lr, #0x0 - MOV r12, #0x13 + ADC lr, lr, #0 + MOV r12, #19 LSL lr, lr, #1 ORR lr, lr, r11, LSR #31 MUL r12, lr, r12 ADDS r4, r4, r12 - ADCS r5, r5, #0x0 - ADCS r6, r6, #0x0 - ADCS r7, r7, #0x0 - ADCS r8, r8, #0x0 - ADCS r9, r9, #0x0 + ADCS r5, r5, #0 + ADCS r6, r6, #0 + ADCS r7, r7, #0 + ADCS r8, r8, #0 + ADCS r9, r9, #0 BFC r11, #31, #1 - ADCS r10, r10, #0x0 - ADC r11, r11, #0x0 + ADCS r10, r10, #0 + ADC r11, r11, #0 STM r0, {r4, r5, r6, r7, r8, r9, r10, r11} /* Done Double */ - ADD r3, r1, #0x20 + ADD r3, r1, #32 ADD r2, r1, #0x40 - ADD r0, r1, #0x20 + ADD r0, r1, #32 BL fe_add_sub_op ADD r3, r0, #0x40 - ADD r2, sp, #0xc - ADD r1, r0, #0x20 + ADD r2, sp, #12 + ADD r1, r0, #32 ADD r0, r0, #0x40 BL fe_add_sub_op - ADD sp, sp, #0x2c + ADD sp, sp, #44 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 138 */ .size ge_sub,.-ge_sub @@ -4573,10 +4572,10 @@ ge_sub: .type sc_reduce, %function sc_reduce: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} - SUB sp, sp, #0x38 + SUB sp, sp, #56 STR r0, [sp, #52] /* Load bits 252-511 */ - ADD r0, r0, #0x1c + ADD r0, r0, #28 LDM r0, {r1, r2, r3, r4, r5, r6, r7, r8, r9} LSR lr, r9, #24 LSL r9, r9, #4 @@ -4596,206 +4595,206 @@ sc_reduce: LSL r2, r2, #4 ORR r2, r2, r1, LSR #28 BFC r9, #28, #4 - SUB r0, r0, #0x1c + SUB r0, r0, #28 /* Add order times bits 504..511 */ MOV r10, #0x2c13 MOVT r10, #0xa30a MOV r11, #0x9ce5 MOVT r11, #0xa7ed - MOV r1, #0x0 + MOV r1, #0 UMLAL r2, r1, r10, lr ADDS r3, r3, r1 - MOV r1, #0x0 - ADC r1, r1, #0x0 + MOV r1, #0 + ADC r1, r1, #0 UMLAL r3, r1, r11, lr MOV r10, #0x6329 MOVT r10, #0x5d08 MOV r11, #0x621 MOVT r11, #0xeb21 ADDS r4, r4, r1 - MOV r1, #0x0 - ADC r1, r1, #0x0 + MOV r1, #0 + ADC r1, r1, #0 UMLAL r4, r1, r10, lr ADDS r5, r5, r1 - MOV r1, #0x0 - ADC r1, r1, #0x0 + MOV r1, #0 + ADC r1, r1, #0 UMLAL r5, r1, r11, lr ADDS r6, r6, r1 - ADCS r7, r7, #0x0 - ADCS r8, r8, #0x0 - ADC r9, r9, #0x0 + ADCS r7, r7, #0 + ADCS r8, r8, #0 + ADC r9, r9, #0 SUBS r6, r6, lr - SBCS r7, r7, #0x0 - SBCS r8, r8, #0x0 - SBC r9, r9, #0x0 + SBCS r7, r7, #0 + SBCS r8, r8, #0 + SBC r9, r9, #0 /* Sub product of top 8 words and order */ MOV r12, sp MOV r1, #0x2c13 MOVT r1, #0xa30a - MOV lr, #0x0 + MOV lr, #0 LDM r0!, {r10, r11} UMLAL r10, lr, r2, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r3, r1 STM r12!, {r10, r11} LDM r0!, {r10, r11} ADDS r10, r10, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r10, lr, r4, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r5, r1 STM r12!, {r10, r11} LDM r0!, {r10, r11} ADDS r10, r10, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r10, lr, r6, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r7, r1 STM r12!, {r10, r11} LDM r0!, {r10, r11} ADDS r10, r10, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r10, lr, r8, r1 BFC r11, #28, #4 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r9, r1 STM r12!, {r10, r11, lr} - SUB r0, r0, #0x10 - SUB r12, r12, #0x20 + SUB r0, r0, #16 + SUB r12, r12, #32 MOV r1, #0x9ce5 MOVT r1, #0xa7ed - MOV lr, #0x0 + MOV lr, #0 LDM r12, {r10, r11} UMLAL r10, lr, r2, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r3, r1 STM r12!, {r10, r11} LDM r12, {r10, r11} ADDS r10, r10, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r10, lr, r4, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r5, r1 STM r12!, {r10, r11} LDM r12, {r10, r11} ADDS r10, r10, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r10, lr, r6, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r7, r1 STM r12!, {r10, r11} LDM r12, {r10, r11} ADDS r10, r10, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r10, lr, r8, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r9, r1 STM r12!, {r10, r11, lr} - SUB r12, r12, #0x20 + SUB r12, r12, #32 MOV r1, #0x6329 MOVT r1, #0x5d08 - MOV lr, #0x0 + MOV lr, #0 LDM r12, {r10, r11} UMLAL r10, lr, r2, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r3, r1 STM r12!, {r10, r11} LDM r12, {r10, r11} ADDS r10, r10, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r10, lr, r4, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r5, r1 STM r12!, {r10, r11} LDM r12, {r10, r11} ADDS r10, r10, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r10, lr, r6, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r7, r1 STM r12!, {r10, r11} LDM r12, {r10, r11} ADDS r10, r10, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r10, lr, r8, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r9, r1 STM r12!, {r10, r11, lr} - SUB r12, r12, #0x20 + SUB r12, r12, #32 MOV r1, #0x621 MOVT r1, #0xeb21 - MOV lr, #0x0 + MOV lr, #0 LDM r12, {r10, r11} UMLAL r10, lr, r2, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r3, r1 STM r12!, {r10, r11} LDM r12, {r10, r11} ADDS r10, r10, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r10, lr, r4, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r5, r1 STM r12!, {r10, r11} LDM r12, {r10, r11} ADDS r10, r10, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r10, lr, r6, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r7, r1 STM r12!, {r10, r11} LDM r12, {r10, r11} ADDS r10, r10, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r10, lr, r8, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r9, r1 STM r12!, {r10, r11, lr} - SUB r12, r12, #0x20 + SUB r12, r12, #32 /* Subtract at 4 * 32 */ LDM r12, {r10, r11} SUBS r10, r10, r2 @@ -4813,7 +4812,7 @@ sc_reduce: SBCS r10, r10, r8 SBC r11, r11, r9 STM r12!, {r10, r11} - SUB r12, r12, #0x24 + SUB r12, r12, #36 ASR lr, r11, #25 /* Conditionally subtract order starting at bit 125 */ MOV r1, #0xa0000000 @@ -4842,19 +4841,19 @@ sc_reduce: STM r12!, {r10, r11} LDM r12, {r10, r11} ADCS r10, r10, r5 - ADCS r11, r11, #0x0 + ADCS r11, r11, #0 STM r12!, {r10, r11} LDM r12, {r10, r11} - ADCS r10, r10, #0x0 - ADCS r11, r11, #0x0 + ADCS r10, r10, #0 + ADCS r11, r11, #0 STM r12!, {r10, r11} LDM r12, {r10} - ADCS r10, r10, #0x0 + ADCS r10, r10, #0 STM r12!, {r10} - SUB r0, r0, #0x10 + SUB r0, r0, #16 MOV r12, sp /* Load bits 252-376 */ - ADD r12, r12, #0x1c + ADD r12, r12, #28 LDM r12, {r1, r2, r3, r4, r5} LSL r5, r5, #4 ORR r5, r5, r4, LSR #28 @@ -4865,89 +4864,89 @@ sc_reduce: LSL r2, r2, #4 ORR r2, r2, r1, LSR #28 BFC r5, #29, #3 - SUB r12, r12, #0x1c + SUB r12, r12, #28 /* Sub product of top 4 words and order */ MOV r0, sp /* * -5cf5d3ed */ MOV r1, #0x2c13 MOVT r1, #0xa30a - MOV lr, #0x0 + MOV lr, #0 LDM r0, {r6, r7, r8, r9} UMLAL r6, lr, r2, r1 ADDS r7, r7, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r7, lr, r3, r1 ADDS r8, r8, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r8, lr, r4, r1 ADDS r9, r9, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r9, lr, r5, r1 STM r0, {r6, r7, r8, r9} - ADD r0, r0, #0x4 + ADD r0, r0, #4 /* * -5812631b */ MOV r1, #0x9ce5 MOVT r1, #0xa7ed - MOV r10, #0x0 + MOV r10, #0 LDM r0, {r6, r7, r8, r9} UMLAL r6, r10, r2, r1 ADDS r7, r7, r10 - MOV r10, #0x0 - ADC r10, r10, #0x0 + MOV r10, #0 + ADC r10, r10, #0 UMLAL r7, r10, r3, r1 ADDS r8, r8, r10 - MOV r10, #0x0 - ADC r10, r10, #0x0 + MOV r10, #0 + ADC r10, r10, #0 UMLAL r8, r10, r4, r1 ADDS r9, r9, r10 - MOV r10, #0x0 - ADC r10, r10, #0x0 + MOV r10, #0 + ADC r10, r10, #0 UMLAL r9, r10, r5, r1 STM r0, {r6, r7, r8, r9} - ADD r0, r0, #0x4 + ADD r0, r0, #4 /* * -a2f79cd7 */ MOV r1, #0x6329 MOVT r1, #0x5d08 - MOV r11, #0x0 + MOV r11, #0 LDM r0, {r6, r7, r8, r9} UMLAL r6, r11, r2, r1 ADDS r7, r7, r11 - MOV r11, #0x0 - ADC r11, r11, #0x0 + MOV r11, #0 + ADC r11, r11, #0 UMLAL r7, r11, r3, r1 ADDS r8, r8, r11 - MOV r11, #0x0 - ADC r11, r11, #0x0 + MOV r11, #0 + ADC r11, r11, #0 UMLAL r8, r11, r4, r1 ADDS r9, r9, r11 - MOV r11, #0x0 - ADC r11, r11, #0x0 + MOV r11, #0 + ADC r11, r11, #0 UMLAL r9, r11, r5, r1 STM r0, {r6, r7, r8, r9} - ADD r0, r0, #0x4 + ADD r0, r0, #4 /* * -14def9df */ MOV r1, #0x621 MOVT r1, #0xeb21 - MOV r12, #0x0 + MOV r12, #0 LDM r0, {r6, r7, r8, r9} UMLAL r6, r12, r2, r1 ADDS r7, r7, r12 - MOV r12, #0x0 - ADC r12, r12, #0x0 + MOV r12, #0 + ADC r12, r12, #0 UMLAL r7, r12, r3, r1 ADDS r8, r8, r12 - MOV r12, #0x0 - ADC r12, r12, #0x0 + MOV r12, #0 + ADC r12, r12, #0 UMLAL r8, r12, r4, r1 ADDS r9, r9, r12 - MOV r12, #0x0 - ADC r12, r12, #0x0 + MOV r12, #0 + ADC r12, r12, #0 UMLAL r9, r12, r5, r1 STM r0, {r6, r7, r8, r9} - ADD r0, r0, #0x4 + ADD r0, r0, #4 /* Add overflows at 4 * 32 */ LDM r0, {r6, r7, r8, r9} BFC r9, #28, #4 @@ -4961,7 +4960,7 @@ sc_reduce: SBCS r8, r8, r4 SBCS r9, r9, r5 SBC r1, r1, r1 - SUB r0, r0, #0x10 + SUB r0, r0, #16 LDM r0, {r2, r3, r4, r5} MOV r10, #0xd3ed MOVT r10, #0x5cf5 @@ -4979,16 +4978,16 @@ sc_reduce: ADCS r3, r3, r11 ADCS r4, r4, r12 ADCS r5, r5, lr - ADCS r6, r6, #0x0 - ADCS r7, r7, #0x0 + ADCS r6, r6, #0 + ADCS r7, r7, #0 AND r1, r1, #0x10000000 - ADCS r8, r8, #0x0 + ADCS r8, r8, #0 ADC r9, r9, r1 BFC r9, #28, #4 /* Store result */ LDR r0, [sp, #52] STM r0, {r2, r3, r4, r5, r6, r7, r8, r9} - ADD sp, sp, #0x38 + ADD sp, sp, #56 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 588 */ .size sc_reduce,.-sc_reduce @@ -4999,10 +4998,10 @@ sc_reduce: .type sc_reduce, %function sc_reduce: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} - SUB sp, sp, #0x38 + SUB sp, sp, #56 STR r0, [sp, #52] /* Load bits 252-511 */ - ADD r0, r0, #0x1c + ADD r0, r0, #28 LDM r0, {r1, r2, r3, r4, r5, r6, r7, r8, r9} LSR lr, r9, #24 LSL r9, r9, #4 @@ -5022,13 +5021,13 @@ sc_reduce: LSL r2, r2, #4 ORR r2, r2, r1, LSR #28 BFC r9, #28, #4 - SUB r0, r0, #0x1c + SUB r0, r0, #28 /* Add order times bits 504..511 */ MOV r10, #0x2c13 MOVT r10, #0xa30a MOV r11, #0x9ce5 MOVT r11, #0xa7ed - MOV r1, #0x0 + MOV r1, #0 UMLAL r2, r1, r10, lr UMAAL r3, r1, r11, lr MOV r10, #0x6329 @@ -5038,18 +5037,18 @@ sc_reduce: UMAAL r4, r1, r10, lr UMAAL r5, r1, r11, lr ADDS r6, r6, r1 - ADCS r7, r7, #0x0 - ADCS r8, r8, #0x0 - ADC r9, r9, #0x0 + ADCS r7, r7, #0 + ADCS r8, r8, #0 + ADC r9, r9, #0 SUBS r6, r6, lr - SBCS r7, r7, #0x0 - SBCS r8, r8, #0x0 - SBC r9, r9, #0x0 + SBCS r7, r7, #0 + SBCS r8, r8, #0 + SBC r9, r9, #0 /* Sub product of top 8 words and order */ MOV r12, sp MOV r1, #0x2c13 MOVT r1, #0xa30a - MOV lr, #0x0 + MOV lr, #0 LDM r0!, {r10, r11} UMLAL r10, lr, r2, r1 UMAAL r11, lr, r3, r1 @@ -5067,11 +5066,11 @@ sc_reduce: BFC r11, #28, #4 UMAAL r11, lr, r9, r1 STM r12!, {r10, r11, lr} - SUB r0, r0, #0x10 - SUB r12, r12, #0x20 + SUB r0, r0, #16 + SUB r12, r12, #32 MOV r1, #0x9ce5 MOVT r1, #0xa7ed - MOV lr, #0x0 + MOV lr, #0 LDM r12, {r10, r11} UMLAL r10, lr, r2, r1 UMAAL r11, lr, r3, r1 @@ -5088,10 +5087,10 @@ sc_reduce: UMAAL r10, lr, r8, r1 UMAAL r11, lr, r9, r1 STM r12!, {r10, r11, lr} - SUB r12, r12, #0x20 + SUB r12, r12, #32 MOV r1, #0x6329 MOVT r1, #0x5d08 - MOV lr, #0x0 + MOV lr, #0 LDM r12, {r10, r11} UMLAL r10, lr, r2, r1 UMAAL r11, lr, r3, r1 @@ -5108,10 +5107,10 @@ sc_reduce: UMAAL r10, lr, r8, r1 UMAAL r11, lr, r9, r1 STM r12!, {r10, r11, lr} - SUB r12, r12, #0x20 + SUB r12, r12, #32 MOV r1, #0x621 MOVT r1, #0xeb21 - MOV lr, #0x0 + MOV lr, #0 LDM r12, {r10, r11} UMLAL r10, lr, r2, r1 UMAAL r11, lr, r3, r1 @@ -5128,7 +5127,7 @@ sc_reduce: UMAAL r10, lr, r8, r1 UMAAL r11, lr, r9, r1 STM r12!, {r10, r11, lr} - SUB r12, r12, #0x20 + SUB r12, r12, #32 /* Subtract at 4 * 32 */ LDM r12, {r10, r11} SUBS r10, r10, r2 @@ -5146,7 +5145,7 @@ sc_reduce: SBCS r10, r10, r8 SBC r11, r11, r9 STM r12!, {r10, r11} - SUB r12, r12, #0x24 + SUB r12, r12, #36 ASR lr, r11, #25 /* Conditionally subtract order starting at bit 125 */ MOV r1, #0xa0000000 @@ -5175,19 +5174,19 @@ sc_reduce: STM r12!, {r10, r11} LDM r12, {r10, r11} ADCS r10, r10, r5 - ADCS r11, r11, #0x0 + ADCS r11, r11, #0 STM r12!, {r10, r11} LDM r12, {r10, r11} - ADCS r10, r10, #0x0 - ADCS r11, r11, #0x0 + ADCS r10, r10, #0 + ADCS r11, r11, #0 STM r12!, {r10, r11} LDM r12, {r10} - ADCS r10, r10, #0x0 + ADCS r10, r10, #0 STM r12!, {r10} - SUB r0, r0, #0x10 + SUB r0, r0, #16 MOV r12, sp /* Load bits 252-376 */ - ADD r12, r12, #0x1c + ADD r12, r12, #28 LDM r12, {r1, r2, r3, r4, r5} LSL r5, r5, #4 ORR r5, r5, r4, LSR #28 @@ -5198,53 +5197,53 @@ sc_reduce: LSL r2, r2, #4 ORR r2, r2, r1, LSR #28 BFC r5, #29, #3 - SUB r12, r12, #0x1c + SUB r12, r12, #28 /* Sub product of top 4 words and order */ MOV r0, sp /* * -5cf5d3ed */ MOV r1, #0x2c13 MOVT r1, #0xa30a - MOV lr, #0x0 + MOV lr, #0 LDM r0, {r6, r7, r8, r9} UMLAL r6, lr, r2, r1 UMAAL r7, lr, r3, r1 UMAAL r8, lr, r4, r1 UMAAL r9, lr, r5, r1 STM r0, {r6, r7, r8, r9} - ADD r0, r0, #0x4 + ADD r0, r0, #4 /* * -5812631b */ MOV r1, #0x9ce5 MOVT r1, #0xa7ed - MOV r10, #0x0 + MOV r10, #0 LDM r0, {r6, r7, r8, r9} UMLAL r6, r10, r2, r1 UMAAL r7, r10, r3, r1 UMAAL r8, r10, r4, r1 UMAAL r9, r10, r5, r1 STM r0, {r6, r7, r8, r9} - ADD r0, r0, #0x4 + ADD r0, r0, #4 /* * -a2f79cd7 */ MOV r1, #0x6329 MOVT r1, #0x5d08 - MOV r11, #0x0 + MOV r11, #0 LDM r0, {r6, r7, r8, r9} UMLAL r6, r11, r2, r1 UMAAL r7, r11, r3, r1 UMAAL r8, r11, r4, r1 UMAAL r9, r11, r5, r1 STM r0, {r6, r7, r8, r9} - ADD r0, r0, #0x4 + ADD r0, r0, #4 /* * -14def9df */ MOV r1, #0x621 MOVT r1, #0xeb21 - MOV r12, #0x0 + MOV r12, #0 LDM r0, {r6, r7, r8, r9} UMLAL r6, r12, r2, r1 UMAAL r7, r12, r3, r1 UMAAL r8, r12, r4, r1 UMAAL r9, r12, r5, r1 STM r0, {r6, r7, r8, r9} - ADD r0, r0, #0x4 + ADD r0, r0, #4 /* Add overflows at 4 * 32 */ LDM r0, {r6, r7, r8, r9} BFC r9, #28, #4 @@ -5258,7 +5257,7 @@ sc_reduce: SBCS r8, r8, r4 SBCS r9, r9, r5 SBC r1, r1, r1 - SUB r0, r0, #0x10 + SUB r0, r0, #16 LDM r0, {r2, r3, r4, r5} MOV r10, #0xd3ed MOVT r10, #0x5cf5 @@ -5276,16 +5275,16 @@ sc_reduce: ADCS r3, r3, r11 ADCS r4, r4, r12 ADCS r5, r5, lr - ADCS r6, r6, #0x0 - ADCS r7, r7, #0x0 + ADCS r6, r6, #0 + ADCS r7, r7, #0 AND r1, r1, #0x10000000 - ADCS r8, r8, #0x0 + ADCS r8, r8, #0 ADC r9, r9, r1 BFC r9, #28, #4 /* Store result */ LDR r0, [sp, #52] STM r0, {r2, r3, r4, r5, r6, r7, r8, r9} - ADD sp, sp, #0x38 + ADD sp, sp, #56 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 502 */ .size sc_reduce,.-sc_reduce @@ -5301,7 +5300,7 @@ sc_muladd: SUB sp, sp, #0x50 ADD lr, sp, #0x44 STM lr, {r0, r1, r3} - MOV r0, #0x0 + MOV r0, #0 LDR r12, [r1] /* A[0] * B[0] */ LDR lr, [r2] @@ -5323,309 +5322,309 @@ sc_muladd: ADDS r5, r5, r11 /* A[0] * B[3] */ LDR lr, [r2, #12] - ADCS r6, r6, #0x0 - ADC r11, r0, #0x0 + ADCS r6, r6, #0 + ADC r11, r0, #0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 /* A[0] * B[5] */ LDR lr, [r2, #20] - ADCS r8, r8, #0x0 - ADC r11, r0, #0x0 + ADCS r8, r8, #0 + ADC r11, r0, #0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 /* A[0] * B[7] */ LDR lr, [r2, #28] - ADCS r10, r10, #0x0 - ADC r3, r0, #0x0 + ADCS r10, r10, #0 + ADC r3, r0, #0 UMLAL r10, r3, r12, lr /* A[1] * B[0] */ LDR r12, [r1, #4] LDR lr, [r2] - MOV r11, #0x0 + MOV r11, #0 UMLAL r4, r11, r12, lr STR r4, [sp, #4] ADDS r5, r5, r11 /* A[1] * B[1] */ LDR lr, [r2, #4] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 /* A[1] * B[2] */ LDR lr, [r2, #8] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 /* A[1] * B[3] */ LDR lr, [r2, #12] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r7, r11, r12, lr ADDS r8, r8, r11 /* A[1] * B[4] */ LDR lr, [r2, #16] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 /* A[1] * B[5] */ LDR lr, [r2, #20] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 /* A[1] * B[6] */ LDR lr, [r2, #24] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 /* A[1] * B[7] */ LDR lr, [r2, #28] - ADC r4, r0, #0x0 + ADC r4, r0, #0 UMLAL r3, r4, r12, lr /* A[2] * B[0] */ LDR r12, [r1, #8] LDR lr, [r2] - MOV r11, #0x0 + MOV r11, #0 UMLAL r5, r11, r12, lr STR r5, [sp, #8] ADDS r6, r6, r11 /* A[2] * B[1] */ LDR lr, [r2, #4] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 /* A[2] * B[2] */ LDR lr, [r2, #8] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r7, r11, r12, lr ADDS r8, r8, r11 /* A[2] * B[3] */ LDR lr, [r2, #12] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 /* A[2] * B[4] */ LDR lr, [r2, #16] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 /* A[2] * B[5] */ LDR lr, [r2, #20] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 /* A[2] * B[6] */ LDR lr, [r2, #24] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 /* A[2] * B[7] */ LDR lr, [r2, #28] - ADC r5, r0, #0x0 + ADC r5, r0, #0 UMLAL r4, r5, r12, lr /* A[3] * B[0] */ LDR r12, [r1, #12] LDR lr, [r2] - MOV r11, #0x0 + MOV r11, #0 UMLAL r6, r11, r12, lr STR r6, [sp, #12] ADDS r7, r7, r11 /* A[3] * B[1] */ LDR lr, [r2, #4] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r7, r11, r12, lr ADDS r8, r8, r11 /* A[3] * B[2] */ LDR lr, [r2, #8] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 /* A[3] * B[3] */ LDR lr, [r2, #12] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 /* A[3] * B[4] */ LDR lr, [r2, #16] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 /* A[3] * B[5] */ LDR lr, [r2, #20] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 /* A[3] * B[6] */ LDR lr, [r2, #24] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 /* A[3] * B[7] */ LDR lr, [r2, #28] - ADC r6, r0, #0x0 + ADC r6, r0, #0 UMLAL r5, r6, r12, lr /* A[4] * B[0] */ LDR r12, [r1, #16] LDR lr, [r2] - MOV r11, #0x0 + MOV r11, #0 UMLAL r7, r11, r12, lr STR r7, [sp, #16] ADDS r8, r8, r11 /* A[4] * B[1] */ LDR lr, [r2, #4] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 /* A[4] * B[2] */ LDR lr, [r2, #8] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 /* A[4] * B[3] */ LDR lr, [r2, #12] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 /* A[4] * B[4] */ LDR lr, [r2, #16] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 /* A[4] * B[5] */ LDR lr, [r2, #20] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 /* A[4] * B[6] */ LDR lr, [r2, #24] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 /* A[4] * B[7] */ LDR lr, [r2, #28] - ADC r7, r0, #0x0 + ADC r7, r0, #0 UMLAL r6, r7, r12, lr /* A[5] * B[0] */ LDR r12, [r1, #20] LDR lr, [r2] - MOV r11, #0x0 + MOV r11, #0 UMLAL r8, r11, r12, lr STR r8, [sp, #20] ADDS r9, r9, r11 /* A[5] * B[1] */ LDR lr, [r2, #4] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r9, r11, r12, lr ADDS r10, r10, r11 /* A[5] * B[2] */ LDR lr, [r2, #8] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 /* A[5] * B[3] */ LDR lr, [r2, #12] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 /* A[5] * B[4] */ LDR lr, [r2, #16] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 /* A[5] * B[5] */ LDR lr, [r2, #20] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 /* A[5] * B[6] */ LDR lr, [r2, #24] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 /* A[5] * B[7] */ LDR lr, [r2, #28] - ADC r8, r0, #0x0 + ADC r8, r0, #0 UMLAL r7, r8, r12, lr /* A[6] * B[0] */ LDR r12, [r1, #24] LDR lr, [r2] - MOV r11, #0x0 + MOV r11, #0 UMLAL r9, r11, r12, lr STR r9, [sp, #24] ADDS r10, r10, r11 /* A[6] * B[1] */ LDR lr, [r2, #4] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r10, r11, r12, lr ADDS r3, r3, r11 /* A[6] * B[2] */ LDR lr, [r2, #8] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 /* A[6] * B[3] */ LDR lr, [r2, #12] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 /* A[6] * B[4] */ LDR lr, [r2, #16] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 /* A[6] * B[5] */ LDR lr, [r2, #20] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 /* A[6] * B[6] */ LDR lr, [r2, #24] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r7, r11, r12, lr ADDS r8, r8, r11 /* A[6] * B[7] */ LDR lr, [r2, #28] - ADC r9, r0, #0x0 + ADC r9, r0, #0 UMLAL r8, r9, r12, lr /* A[7] * B[0] */ LDR r12, [r1, #28] LDR lr, [r2] - MOV r11, #0x0 + MOV r11, #0 UMLAL r10, r11, r12, lr STR r10, [sp, #28] ADDS r3, r3, r11 /* A[7] * B[1] */ LDR lr, [r2, #4] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r3, r11, r12, lr ADDS r4, r4, r11 /* A[7] * B[2] */ LDR lr, [r2, #8] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r4, r11, r12, lr ADDS r5, r5, r11 /* A[7] * B[3] */ LDR lr, [r2, #12] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r5, r11, r12, lr ADDS r6, r6, r11 /* A[7] * B[4] */ LDR lr, [r2, #16] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r6, r11, r12, lr ADDS r7, r7, r11 /* A[7] * B[5] */ LDR lr, [r2, #20] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r7, r11, r12, lr ADDS r8, r8, r11 /* A[7] * B[6] */ LDR lr, [r2, #24] - ADC r11, r0, #0x0 + ADC r11, r0, #0 UMLAL r8, r11, r12, lr ADDS r9, r9, r11 /* A[7] * B[7] */ LDR lr, [r2, #28] - ADC r10, r0, #0x0 + ADC r10, r0, #0 UMLAL r9, r10, r12, lr - ADD lr, sp, #0x20 + ADD lr, sp, #32 STM lr, {r3, r4, r5, r6, r7, r8, r9, r10} MOV r0, sp /* Add c to a * b */ @@ -5644,15 +5643,15 @@ sc_muladd: MOV r1, r9 STM r0!, {r2, r3, r4, r5, r6, r7, r8, r9} LDM r0, {r2, r3, r4, r5, r6, r7, r8, r9} - ADCS r2, r2, #0x0 - ADCS r3, r3, #0x0 - ADCS r4, r4, #0x0 - ADCS r5, r5, #0x0 - ADCS r6, r6, #0x0 - ADCS r7, r7, #0x0 - ADCS r8, r8, #0x0 - ADC r9, r9, #0x0 - SUB r0, r0, #0x20 + ADCS r2, r2, #0 + ADCS r3, r3, #0 + ADCS r4, r4, #0 + ADCS r5, r5, #0 + ADCS r6, r6, #0 + ADCS r7, r7, #0 + ADCS r8, r8, #0 + ADC r9, r9, #0 + SUB r0, r0, #32 /* Get 252..503 and 504..507 */ LSR lr, r9, #24 LSL r9, r9, #4 @@ -5677,200 +5676,200 @@ sc_muladd: MOVT r10, #0xa30a MOV r11, #0x9ce5 MOVT r11, #0xa7ed - MOV r1, #0x0 + MOV r1, #0 UMLAL r2, r1, r10, lr ADDS r3, r3, r1 - MOV r1, #0x0 - ADC r1, r1, #0x0 + MOV r1, #0 + ADC r1, r1, #0 UMLAL r3, r1, r11, lr MOV r10, #0x6329 MOVT r10, #0x5d08 MOV r11, #0x621 MOVT r11, #0xeb21 ADDS r4, r4, r1 - MOV r1, #0x0 - ADC r1, r1, #0x0 + MOV r1, #0 + ADC r1, r1, #0 UMLAL r4, r1, r10, lr ADDS r5, r5, r1 - MOV r1, #0x0 - ADC r1, r1, #0x0 + MOV r1, #0 + ADC r1, r1, #0 UMLAL r5, r1, r11, lr ADDS r6, r6, r1 - ADCS r7, r7, #0x0 - ADCS r8, r8, #0x0 - ADC r9, r9, #0x0 + ADCS r7, r7, #0 + ADCS r8, r8, #0 + ADC r9, r9, #0 SUBS r6, r6, lr - SBCS r7, r7, #0x0 - SBCS r8, r8, #0x0 - SBC r9, r9, #0x0 + SBCS r7, r7, #0 + SBCS r8, r8, #0 + SBC r9, r9, #0 /* Sub product of top 8 words and order */ MOV r12, sp MOV r1, #0x2c13 MOVT r1, #0xa30a - MOV lr, #0x0 + MOV lr, #0 LDM r0!, {r10, r11} UMLAL r10, lr, r2, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r3, r1 STM r12!, {r10, r11} LDM r0!, {r10, r11} ADDS r10, r10, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r10, lr, r4, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r5, r1 STM r12!, {r10, r11} LDM r0!, {r10, r11} ADDS r10, r10, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r10, lr, r6, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r7, r1 STM r12!, {r10, r11} LDM r0!, {r10, r11} ADDS r10, r10, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r10, lr, r8, r1 BFC r11, #28, #4 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r9, r1 STM r12!, {r10, r11, lr} - SUB r0, r0, #0x10 - SUB r12, r12, #0x20 + SUB r0, r0, #16 + SUB r12, r12, #32 MOV r1, #0x9ce5 MOVT r1, #0xa7ed - MOV lr, #0x0 + MOV lr, #0 LDM r12, {r10, r11} UMLAL r10, lr, r2, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r3, r1 STM r12!, {r10, r11} LDM r12, {r10, r11} ADDS r10, r10, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r10, lr, r4, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r5, r1 STM r12!, {r10, r11} LDM r12, {r10, r11} ADDS r10, r10, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r10, lr, r6, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r7, r1 STM r12!, {r10, r11} LDM r12, {r10, r11} ADDS r10, r10, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r10, lr, r8, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r9, r1 STM r12!, {r10, r11, lr} - SUB r12, r12, #0x20 + SUB r12, r12, #32 MOV r1, #0x6329 MOVT r1, #0x5d08 - MOV lr, #0x0 + MOV lr, #0 LDM r12, {r10, r11} UMLAL r10, lr, r2, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r3, r1 STM r12!, {r10, r11} LDM r12, {r10, r11} ADDS r10, r10, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r10, lr, r4, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r5, r1 STM r12!, {r10, r11} LDM r12, {r10, r11} ADDS r10, r10, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r10, lr, r6, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r7, r1 STM r12!, {r10, r11} LDM r12, {r10, r11} ADDS r10, r10, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r10, lr, r8, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r9, r1 STM r12!, {r10, r11, lr} - SUB r12, r12, #0x20 + SUB r12, r12, #32 MOV r1, #0x621 MOVT r1, #0xeb21 - MOV lr, #0x0 + MOV lr, #0 LDM r12, {r10, r11} UMLAL r10, lr, r2, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r3, r1 STM r12!, {r10, r11} LDM r12, {r10, r11} ADDS r10, r10, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r10, lr, r4, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r5, r1 STM r12!, {r10, r11} LDM r12, {r10, r11} ADDS r10, r10, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r10, lr, r6, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r7, r1 STM r12!, {r10, r11} LDM r12, {r10, r11} ADDS r10, r10, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r10, lr, r8, r1 ADDS r11, r11, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r11, lr, r9, r1 STM r12!, {r10, r11, lr} - SUB r12, r12, #0x20 + SUB r12, r12, #32 /* Subtract at 4 * 32 */ LDM r12, {r10, r11} SUBS r10, r10, r2 @@ -5888,7 +5887,7 @@ sc_muladd: SBCS r10, r10, r8 SBC r11, r11, r9 STM r12!, {r10, r11} - SUB r12, r12, #0x24 + SUB r12, r12, #36 ASR lr, r11, #25 /* Conditionally subtract order starting at bit 125 */ MOV r1, #0xa0000000 @@ -5917,19 +5916,19 @@ sc_muladd: STM r12!, {r10, r11} LDM r12, {r10, r11} ADCS r10, r10, r5 - ADCS r11, r11, #0x0 + ADCS r11, r11, #0 STM r12!, {r10, r11} LDM r12, {r10, r11} - ADCS r10, r10, #0x0 - ADCS r11, r11, #0x0 + ADCS r10, r10, #0 + ADCS r11, r11, #0 STM r12!, {r10, r11} LDM r12, {r10} - ADCS r10, r10, #0x0 + ADCS r10, r10, #0 STM r12!, {r10} - SUB r0, r0, #0x10 + SUB r0, r0, #16 MOV r12, sp /* Load bits 252-376 */ - ADD r12, r12, #0x1c + ADD r12, r12, #28 LDM r12, {r1, r2, r3, r4, r5} LSL r5, r5, #4 ORR r5, r5, r4, LSR #28 @@ -5940,89 +5939,89 @@ sc_muladd: LSL r2, r2, #4 ORR r2, r2, r1, LSR #28 BFC r5, #29, #3 - SUB r12, r12, #0x1c + SUB r12, r12, #28 /* Sub product of top 4 words and order */ MOV r0, sp /* * -5cf5d3ed */ MOV r1, #0x2c13 MOVT r1, #0xa30a - MOV lr, #0x0 + MOV lr, #0 LDM r0, {r6, r7, r8, r9} UMLAL r6, lr, r2, r1 ADDS r7, r7, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r7, lr, r3, r1 ADDS r8, r8, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r8, lr, r4, r1 ADDS r9, r9, lr - MOV lr, #0x0 - ADC lr, lr, #0x0 + MOV lr, #0 + ADC lr, lr, #0 UMLAL r9, lr, r5, r1 STM r0, {r6, r7, r8, r9} - ADD r0, r0, #0x4 + ADD r0, r0, #4 /* * -5812631b */ MOV r1, #0x9ce5 MOVT r1, #0xa7ed - MOV r10, #0x0 + MOV r10, #0 LDM r0, {r6, r7, r8, r9} UMLAL r6, r10, r2, r1 ADDS r7, r7, r10 - MOV r10, #0x0 - ADC r10, r10, #0x0 + MOV r10, #0 + ADC r10, r10, #0 UMLAL r7, r10, r3, r1 ADDS r8, r8, r10 - MOV r10, #0x0 - ADC r10, r10, #0x0 + MOV r10, #0 + ADC r10, r10, #0 UMLAL r8, r10, r4, r1 ADDS r9, r9, r10 - MOV r10, #0x0 - ADC r10, r10, #0x0 + MOV r10, #0 + ADC r10, r10, #0 UMLAL r9, r10, r5, r1 STM r0, {r6, r7, r8, r9} - ADD r0, r0, #0x4 + ADD r0, r0, #4 /* * -a2f79cd7 */ MOV r1, #0x6329 MOVT r1, #0x5d08 - MOV r11, #0x0 + MOV r11, #0 LDM r0, {r6, r7, r8, r9} UMLAL r6, r11, r2, r1 ADDS r7, r7, r11 - MOV r11, #0x0 - ADC r11, r11, #0x0 + MOV r11, #0 + ADC r11, r11, #0 UMLAL r7, r11, r3, r1 ADDS r8, r8, r11 - MOV r11, #0x0 - ADC r11, r11, #0x0 + MOV r11, #0 + ADC r11, r11, #0 UMLAL r8, r11, r4, r1 ADDS r9, r9, r11 - MOV r11, #0x0 - ADC r11, r11, #0x0 + MOV r11, #0 + ADC r11, r11, #0 UMLAL r9, r11, r5, r1 STM r0, {r6, r7, r8, r9} - ADD r0, r0, #0x4 + ADD r0, r0, #4 /* * -14def9df */ MOV r1, #0x621 MOVT r1, #0xeb21 - MOV r12, #0x0 + MOV r12, #0 LDM r0, {r6, r7, r8, r9} UMLAL r6, r12, r2, r1 ADDS r7, r7, r12 - MOV r12, #0x0 - ADC r12, r12, #0x0 + MOV r12, #0 + ADC r12, r12, #0 UMLAL r7, r12, r3, r1 ADDS r8, r8, r12 - MOV r12, #0x0 - ADC r12, r12, #0x0 + MOV r12, #0 + ADC r12, r12, #0 UMLAL r8, r12, r4, r1 ADDS r9, r9, r12 - MOV r12, #0x0 - ADC r12, r12, #0x0 + MOV r12, #0 + ADC r12, r12, #0 UMLAL r9, r12, r5, r1 STM r0, {r6, r7, r8, r9} - ADD r0, r0, #0x4 + ADD r0, r0, #4 /* Add overflows at 4 * 32 */ LDM r0, {r6, r7, r8, r9} BFC r9, #28, #4 @@ -6036,7 +6035,7 @@ sc_muladd: SBCS r8, r8, r4 SBCS r9, r9, r5 SBC r1, r1, r1 - SUB r0, r0, #0x10 + SUB r0, r0, #16 LDM r0, {r2, r3, r4, r5} MOV r10, #0xd3ed MOVT r10, #0x5cf5 @@ -6054,10 +6053,10 @@ sc_muladd: ADCS r3, r3, r11 ADCS r4, r4, r12 ADCS r5, r5, lr - ADCS r6, r6, #0x0 - ADCS r7, r7, #0x0 + ADCS r6, r6, #0 + ADCS r7, r7, #0 AND r1, r1, #0x10000000 - ADCS r8, r8, #0x0 + ADCS r8, r8, #0 ADC r9, r9, r1 BFC r9, #28, #4 LDR r0, [sp, #68] @@ -6107,54 +6106,54 @@ sc_muladd: UMAAL r9, r10, r2, r4 UMAAL r10, r11, r3, r4 LDM lr, {r4, r5, r6, r7} - MOV r12, #0x0 + MOV r12, #0 UMLAL r8, r12, r0, r4 UMAAL r9, r12, r1, r4 UMAAL r10, r12, r2, r4 UMAAL r11, r12, r3, r4 - MOV r4, #0x0 + MOV r4, #0 UMLAL r9, r4, r0, r5 UMAAL r10, r4, r1, r5 UMAAL r11, r4, r2, r5 UMAAL r12, r4, r3, r5 - MOV r5, #0x0 + MOV r5, #0 UMLAL r10, r5, r0, r6 UMAAL r11, r5, r1, r6 UMAAL r12, r5, r2, r6 UMAAL r4, r5, r3, r6 - MOV r6, #0x0 + MOV r6, #0 UMLAL r11, r6, r0, r7 LDR r0, [sp, #72] UMAAL r12, r6, r1, r7 - ADD r0, r0, #0x10 + ADD r0, r0, #16 UMAAL r4, r6, r2, r7 - SUB lr, lr, #0x10 + SUB lr, lr, #16 UMAAL r5, r6, r3, r7 LDM r0, {r0, r1, r2, r3} STR r6, [sp, #64] LDM lr!, {r6} - MOV r7, #0x0 + MOV r7, #0 UMLAL r8, r7, r0, r6 UMAAL r9, r7, r1, r6 STR r8, [sp, #16] UMAAL r10, r7, r2, r6 UMAAL r11, r7, r3, r6 LDM lr!, {r6} - MOV r8, #0x0 + MOV r8, #0 UMLAL r9, r8, r0, r6 UMAAL r10, r8, r1, r6 STR r9, [sp, #20] UMAAL r11, r8, r2, r6 UMAAL r12, r8, r3, r6 LDM lr!, {r6} - MOV r9, #0x0 + MOV r9, #0 UMLAL r10, r9, r0, r6 UMAAL r11, r9, r1, r6 STR r10, [sp, #24] UMAAL r12, r9, r2, r6 UMAAL r4, r9, r3, r6 LDM lr!, {r6} - MOV r10, #0x0 + MOV r10, #0 UMLAL r11, r10, r0, r6 UMAAL r12, r10, r1, r6 STR r11, [sp, #28] @@ -6181,7 +6180,7 @@ sc_muladd: UMAAL r8, r9, r3, r11 UMAAL r9, r10, r3, lr MOV r3, r12 - ADD lr, sp, #0x20 + ADD lr, sp, #32 STM lr, {r3, r4, r5, r6, r7, r8, r9, r10} MOV r0, sp /* Add c to a * b */ @@ -6200,15 +6199,15 @@ sc_muladd: MOV r1, r9 STM r0!, {r2, r3, r4, r5, r6, r7, r8, r9} LDM r0, {r2, r3, r4, r5, r6, r7, r8, r9} - ADCS r2, r2, #0x0 - ADCS r3, r3, #0x0 - ADCS r4, r4, #0x0 - ADCS r5, r5, #0x0 - ADCS r6, r6, #0x0 - ADCS r7, r7, #0x0 - ADCS r8, r8, #0x0 - ADC r9, r9, #0x0 - SUB r0, r0, #0x20 + ADCS r2, r2, #0 + ADCS r3, r3, #0 + ADCS r4, r4, #0 + ADCS r5, r5, #0 + ADCS r6, r6, #0 + ADCS r7, r7, #0 + ADCS r8, r8, #0 + ADC r9, r9, #0 + SUB r0, r0, #32 /* Get 252..503 and 504..507 */ LSR lr, r9, #24 LSL r9, r9, #4 @@ -6233,7 +6232,7 @@ sc_muladd: MOVT r10, #0xa30a MOV r11, #0x9ce5 MOVT r11, #0xa7ed - MOV r1, #0x0 + MOV r1, #0 UMLAL r2, r1, r10, lr UMAAL r3, r1, r11, lr MOV r10, #0x6329 @@ -6243,18 +6242,18 @@ sc_muladd: UMAAL r4, r1, r10, lr UMAAL r5, r1, r11, lr ADDS r6, r6, r1 - ADCS r7, r7, #0x0 - ADCS r8, r8, #0x0 - ADC r9, r9, #0x0 + ADCS r7, r7, #0 + ADCS r8, r8, #0 + ADC r9, r9, #0 SUBS r6, r6, lr - SBCS r7, r7, #0x0 - SBCS r8, r8, #0x0 - SBC r9, r9, #0x0 + SBCS r7, r7, #0 + SBCS r8, r8, #0 + SBC r9, r9, #0 /* Sub product of top 8 words and order */ MOV r12, sp MOV r1, #0x2c13 MOVT r1, #0xa30a - MOV lr, #0x0 + MOV lr, #0 LDM r0!, {r10, r11} UMLAL r10, lr, r2, r1 UMAAL r11, lr, r3, r1 @@ -6272,11 +6271,11 @@ sc_muladd: BFC r11, #28, #4 UMAAL r11, lr, r9, r1 STM r12!, {r10, r11, lr} - SUB r0, r0, #0x10 - SUB r12, r12, #0x20 + SUB r0, r0, #16 + SUB r12, r12, #32 MOV r1, #0x9ce5 MOVT r1, #0xa7ed - MOV lr, #0x0 + MOV lr, #0 LDM r12, {r10, r11} UMLAL r10, lr, r2, r1 UMAAL r11, lr, r3, r1 @@ -6293,10 +6292,10 @@ sc_muladd: UMAAL r10, lr, r8, r1 UMAAL r11, lr, r9, r1 STM r12!, {r10, r11, lr} - SUB r12, r12, #0x20 + SUB r12, r12, #32 MOV r1, #0x6329 MOVT r1, #0x5d08 - MOV lr, #0x0 + MOV lr, #0 LDM r12, {r10, r11} UMLAL r10, lr, r2, r1 UMAAL r11, lr, r3, r1 @@ -6313,10 +6312,10 @@ sc_muladd: UMAAL r10, lr, r8, r1 UMAAL r11, lr, r9, r1 STM r12!, {r10, r11, lr} - SUB r12, r12, #0x20 + SUB r12, r12, #32 MOV r1, #0x621 MOVT r1, #0xeb21 - MOV lr, #0x0 + MOV lr, #0 LDM r12, {r10, r11} UMLAL r10, lr, r2, r1 UMAAL r11, lr, r3, r1 @@ -6333,7 +6332,7 @@ sc_muladd: UMAAL r10, lr, r8, r1 UMAAL r11, lr, r9, r1 STM r12!, {r10, r11, lr} - SUB r12, r12, #0x20 + SUB r12, r12, #32 /* Subtract at 4 * 32 */ LDM r12, {r10, r11} SUBS r10, r10, r2 @@ -6351,7 +6350,7 @@ sc_muladd: SBCS r10, r10, r8 SBC r11, r11, r9 STM r12!, {r10, r11} - SUB r12, r12, #0x24 + SUB r12, r12, #36 ASR lr, r11, #25 /* Conditionally subtract order starting at bit 125 */ MOV r1, #0xa0000000 @@ -6380,19 +6379,19 @@ sc_muladd: STM r12!, {r10, r11} LDM r12, {r10, r11} ADCS r10, r10, r5 - ADCS r11, r11, #0x0 + ADCS r11, r11, #0 STM r12!, {r10, r11} LDM r12, {r10, r11} - ADCS r10, r10, #0x0 - ADCS r11, r11, #0x0 + ADCS r10, r10, #0 + ADCS r11, r11, #0 STM r12!, {r10, r11} LDM r12, {r10} - ADCS r10, r10, #0x0 + ADCS r10, r10, #0 STM r12!, {r10} - SUB r0, r0, #0x10 + SUB r0, r0, #16 MOV r12, sp /* Load bits 252-376 */ - ADD r12, r12, #0x1c + ADD r12, r12, #28 LDM r12, {r1, r2, r3, r4, r5} LSL r5, r5, #4 ORR r5, r5, r4, LSR #28 @@ -6403,53 +6402,53 @@ sc_muladd: LSL r2, r2, #4 ORR r2, r2, r1, LSR #28 BFC r5, #29, #3 - SUB r12, r12, #0x1c + SUB r12, r12, #28 /* Sub product of top 4 words and order */ MOV r0, sp /* * -5cf5d3ed */ MOV r1, #0x2c13 MOVT r1, #0xa30a - MOV lr, #0x0 + MOV lr, #0 LDM r0, {r6, r7, r8, r9} UMLAL r6, lr, r2, r1 UMAAL r7, lr, r3, r1 UMAAL r8, lr, r4, r1 UMAAL r9, lr, r5, r1 STM r0, {r6, r7, r8, r9} - ADD r0, r0, #0x4 + ADD r0, r0, #4 /* * -5812631b */ MOV r1, #0x9ce5 MOVT r1, #0xa7ed - MOV r10, #0x0 + MOV r10, #0 LDM r0, {r6, r7, r8, r9} UMLAL r6, r10, r2, r1 UMAAL r7, r10, r3, r1 UMAAL r8, r10, r4, r1 UMAAL r9, r10, r5, r1 STM r0, {r6, r7, r8, r9} - ADD r0, r0, #0x4 + ADD r0, r0, #4 /* * -a2f79cd7 */ MOV r1, #0x6329 MOVT r1, #0x5d08 - MOV r11, #0x0 + MOV r11, #0 LDM r0, {r6, r7, r8, r9} UMLAL r6, r11, r2, r1 UMAAL r7, r11, r3, r1 UMAAL r8, r11, r4, r1 UMAAL r9, r11, r5, r1 STM r0, {r6, r7, r8, r9} - ADD r0, r0, #0x4 + ADD r0, r0, #4 /* * -14def9df */ MOV r1, #0x621 MOVT r1, #0xeb21 - MOV r12, #0x0 + MOV r12, #0 LDM r0, {r6, r7, r8, r9} UMLAL r6, r12, r2, r1 UMAAL r7, r12, r3, r1 UMAAL r8, r12, r4, r1 UMAAL r9, r12, r5, r1 STM r0, {r6, r7, r8, r9} - ADD r0, r0, #0x4 + ADD r0, r0, #4 /* Add overflows at 4 * 32 */ LDM r0, {r6, r7, r8, r9} BFC r9, #28, #4 @@ -6463,7 +6462,7 @@ sc_muladd: SBCS r8, r8, r4 SBCS r9, r9, r5 SBC r1, r1, r1 - SUB r0, r0, #0x10 + SUB r0, r0, #16 LDM r0, {r2, r3, r4, r5} MOV r10, #0xd3ed MOVT r10, #0x5cf5 @@ -6481,10 +6480,10 @@ sc_muladd: ADCS r3, r3, r11 ADCS r4, r4, r12 ADCS r5, r5, lr - ADCS r6, r6, #0x0 - ADCS r7, r7, #0x0 + ADCS r6, r6, #0 + ADCS r7, r7, #0 AND r1, r1, #0x10000000 - ADCS r8, r8, #0x0 + ADCS r8, r8, #0 ADC r9, r9, r1 BFC r9, #28, #4 LDR r0, [sp, #68] diff --git a/wolfcrypt/src/port/arm/thumb2-curve25519_c.c b/wolfcrypt/src/port/arm/thumb2-curve25519_c.c index 2e4358f1878..e023636c441 100644 --- a/wolfcrypt/src/port/arm/thumb2-curve25519_c.c +++ b/wolfcrypt/src/port/arm/thumb2-curve25519_c.c @@ -59,9 +59,9 @@ #if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL) #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_init(void) +WC_OMIT_FRAME_POINTER void fe_init() #else -WC_OMIT_FRAME_POINTER void fe_init(void) +WC_OMIT_FRAME_POINTER void fe_init() #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -81,9 +81,9 @@ WC_OMIT_FRAME_POINTER void fe_init(void) void fe_add_sub_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_add_sub_op(void) +WC_OMIT_FRAME_POINTER void fe_add_sub_op() #else -WC_OMIT_FRAME_POINTER void fe_add_sub_op(void) +WC_OMIT_FRAME_POINTER void fe_add_sub_op() #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -94,9 +94,9 @@ WC_OMIT_FRAME_POINTER void fe_add_sub_op(void) "LDRD r6, r7, [r3]\n\t" /* Add */ "ADDS r8, r4, r6\n\t" - "MOV r12, #0x0\n\t" + "MOV r12, #0\n\t" "ADCS r9, r5, r7\n\t" - "ADC r12, r12, #0x0\n\t" + "ADC r12, r12, #0\n\t" "STRD r8, r9, [r0]\n\t" /* Sub */ "SUBS r10, r4, r6\n\t" @@ -106,12 +106,12 @@ WC_OMIT_FRAME_POINTER void fe_add_sub_op(void) "LDRD r6, r7, [r3, #8]\n\t" /* Sub */ "SBCS r10, r4, r6\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "SBCS r11, r5, r7\n\t" - "ADC lr, lr, #0x0\n\t" + "ADC lr, lr, #0\n\t" "STRD r10, r11, [r1, #8]\n\t" /* Add */ - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" "ADCS r8, r4, r6\n\t" "ADCS r9, r5, r7\n\t" "STRD r8, r9, [r0, #8]\n\t" @@ -119,12 +119,12 @@ WC_OMIT_FRAME_POINTER void fe_add_sub_op(void) "LDRD r6, r7, [r3, #16]\n\t" /* Add */ "ADCS r8, r4, r6\n\t" - "MOV r12, #0x0\n\t" + "MOV r12, #0\n\t" "ADCS r9, r5, r7\n\t" - "ADC r12, r12, #0x0\n\t" + "ADC r12, r12, #0\n\t" "STRD r8, r9, [r0, #16]\n\t" /* Sub */ - "SUBS lr, lr, #0x1\n\t" + "SUBS lr, lr, #1\n\t" "SBCS r10, r4, r6\n\t" "SBCS r11, r5, r7\n\t" "STRD r10, r11, [r1, #16]\n\t" @@ -134,46 +134,46 @@ WC_OMIT_FRAME_POINTER void fe_add_sub_op(void) "SBCS r10, r4, r6\n\t" "SBC r11, r5, r7\n\t" /* Add */ - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" "ADCS r8, r4, r6\n\t" - "MOV r12, #0x0\n\t" + "MOV r12, #0\n\t" "ADCS r9, r5, r7\n\t" - "ADC r12, r12, #0x0\n\t" + "ADC r12, r12, #0\n\t" /* Multiply -modulus by overflow */ "LSL r3, r12, #1\n\t" - "MOV r12, #0x13\n\t" + "MOV r12, #19\n\t" "ORR r3, r3, r9, LSR #31\n\t" "MUL r12, r3, r12\n\t" /* Add -x*modulus (if overflow) */ "LDRD r4, r5, [r0]\n\t" "LDRD r6, r7, [r0, #8]\n\t" "ADDS r4, r4, r12\n\t" - "ADCS r5, r5, #0x0\n\t" - "ADCS r6, r6, #0x0\n\t" - "ADCS r7, r7, #0x0\n\t" + "ADCS r5, r5, #0\n\t" + "ADCS r6, r6, #0\n\t" + "ADCS r7, r7, #0\n\t" "STRD r4, r5, [r0]\n\t" "STRD r6, r7, [r0, #8]\n\t" "LDRD r4, r5, [r0, #16]\n\t" - "ADCS r4, r4, #0x0\n\t" - "ADCS r5, r5, #0x0\n\t" + "ADCS r4, r4, #0\n\t" + "ADCS r5, r5, #0\n\t" "STRD r4, r5, [r0, #16]\n\t" "BFC r9, #31, #1\n\t" - "ADCS r8, r8, #0x0\n\t" - "ADC r9, r9, #0x0\n\t" + "ADCS r8, r8, #0\n\t" + "ADC r9, r9, #0\n\t" "STRD r8, r9, [r0, #24]\n\t" /* Add -modulus on underflow */ - "MOV lr, #0x13\n\t" + "MOV lr, #19\n\t" "AND lr, lr, r11, ASR #31\n\t" "LDM r1, {r4, r5, r6, r7, r8, r9}\n\t" "SUBS r4, r4, lr\n\t" - "SBCS r5, r5, #0x0\n\t" - "SBCS r6, r6, #0x0\n\t" - "SBCS r7, r7, #0x0\n\t" - "SBCS r8, r8, #0x0\n\t" - "SBCS r9, r9, #0x0\n\t" + "SBCS r5, r5, #0\n\t" + "SBCS r6, r6, #0\n\t" + "SBCS r7, r7, #0\n\t" + "SBCS r8, r8, #0\n\t" + "SBCS r9, r9, #0\n\t" "BFC r11, #31, #1\n\t" - "SBCS r10, r10, #0x0\n\t" - "SBC r11, r11, #0x0\n\t" + "SBCS r10, r10, #0\n\t" + "SBC r11, r11, #0\n\t" "STM r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" /* Done Add-Sub */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -189,9 +189,9 @@ WC_OMIT_FRAME_POINTER void fe_add_sub_op(void) void fe_sub_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_sub_op(void) +WC_OMIT_FRAME_POINTER void fe_sub_op() #else -WC_OMIT_FRAME_POINTER void fe_sub_op(void) +WC_OMIT_FRAME_POINTER void fe_sub_op() #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -209,17 +209,17 @@ WC_OMIT_FRAME_POINTER void fe_sub_op(void) "SBCS r11, r3, r11\n\t" "SBCS r12, r4, r12\n\t" "SBC lr, r5, lr\n\t" - "MOV r2, #0x13\n\t" + "MOV r2, #19\n\t" "AND r2, r2, lr, ASR #31\n\t" "SUBS r6, r6, r2\n\t" - "SBCS r7, r7, #0x0\n\t" - "SBCS r8, r8, #0x0\n\t" - "SBCS r9, r9, #0x0\n\t" - "SBCS r10, r10, #0x0\n\t" - "SBCS r11, r11, #0x0\n\t" + "SBCS r7, r7, #0\n\t" + "SBCS r8, r8, #0\n\t" + "SBCS r9, r9, #0\n\t" + "SBCS r10, r10, #0\n\t" + "SBCS r11, r11, #0\n\t" "BFC lr, #31, #1\n\t" - "SBCS r12, r12, #0x0\n\t" - "SBC lr, lr, #0x0\n\t" + "SBCS r12, r12, #0\n\t" + "SBC lr, lr, #0\n\t" "STM r0, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t" /* Done Sub */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -261,9 +261,9 @@ WC_OMIT_FRAME_POINTER void fe_sub(fe r, const fe a, const fe b) void fe_add_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_add_op(void) +WC_OMIT_FRAME_POINTER void fe_add_op() #else -WC_OMIT_FRAME_POINTER void fe_add_op(void) +WC_OMIT_FRAME_POINTER void fe_add_op() #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -281,17 +281,17 @@ WC_OMIT_FRAME_POINTER void fe_add_op(void) "ADCS r11, r3, r11\n\t" "ADCS r12, r4, r12\n\t" "ADC lr, r5, lr\n\t" - "MOV r2, #0x13\n\t" + "MOV r2, #19\n\t" "AND r2, r2, lr, ASR #31\n\t" "ADDS r6, r6, r2\n\t" - "ADCS r7, r7, #0x0\n\t" - "ADCS r8, r8, #0x0\n\t" - "ADCS r9, r9, #0x0\n\t" - "ADCS r10, r10, #0x0\n\t" - "ADCS r11, r11, #0x0\n\t" + "ADCS r7, r7, #0\n\t" + "ADCS r8, r8, #0\n\t" + "ADCS r9, r9, #0\n\t" + "ADCS r10, r10, #0\n\t" + "ADCS r11, r11, #0\n\t" "BFC lr, #31, #1\n\t" - "ADCS r12, r12, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "ADCS r12, r12, #0\n\t" + "ADC lr, lr, #0\n\t" "STM r0, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t" /* Done Add */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -386,24 +386,24 @@ WC_OMIT_FRAME_POINTER void fe_tobytes(unsigned char* out, const fe n) __asm__ __volatile__ ( "LDM %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" - "ADDS r10, r2, #0x13\n\t" - "ADCS r10, r3, #0x0\n\t" - "ADCS r10, r4, #0x0\n\t" - "ADCS r10, r5, #0x0\n\t" - "ADCS r10, r6, #0x0\n\t" - "ADCS r10, r7, #0x0\n\t" - "ADCS r10, r8, #0x0\n\t" - "ADC r10, r9, #0x0\n\t" - "ASR r10, r10, #31\n\t" - "AND r10, r10, #0x13\n\t" - "ADDS r2, r2, r10\n\t" - "ADCS r3, r3, #0x0\n\t" - "ADCS r4, r4, #0x0\n\t" - "ADCS r5, r5, #0x0\n\t" - "ADCS r6, r6, #0x0\n\t" - "ADCS r7, r7, #0x0\n\t" - "ADCS r8, r8, #0x0\n\t" - "ADC r9, r9, #0x0\n\t" + "ADDS r12, r2, #19\n\t" + "ADCS r12, r3, #0\n\t" + "ADCS r12, r4, #0\n\t" + "ADCS r12, r5, #0\n\t" + "ADCS r12, r6, #0\n\t" + "ADCS r12, r7, #0\n\t" + "ADCS r12, r8, #0\n\t" + "ADC r12, r9, #0\n\t" + "ASR r12, r12, #31\n\t" + "AND r12, r12, #19\n\t" + "ADDS r2, r2, r12\n\t" + "ADCS r3, r3, #0\n\t" + "ADCS r4, r4, #0\n\t" + "ADCS r5, r5, #0\n\t" + "ADCS r6, r6, #0\n\t" + "ADCS r7, r7, #0\n\t" + "ADCS r8, r8, #0\n\t" + "ADC r9, r9, #0\n\t" "BFC r9, #31, #1\n\t" "STR r2, [%[out]]\n\t" "STR r3, [%[out], #4]\n\t" @@ -420,7 +420,7 @@ WC_OMIT_FRAME_POINTER void fe_tobytes(unsigned char* out, const fe n) : : [out] "r" (out), [n] "r" (n) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ - : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12" ); } @@ -436,14 +436,14 @@ WC_OMIT_FRAME_POINTER void fe_1(fe n) __asm__ __volatile__ ( /* Set one */ - "MOV r2, #0x1\n\t" - "MOV r3, #0x0\n\t" - "MOV r4, #0x0\n\t" - "MOV r5, #0x0\n\t" - "MOV r6, #0x0\n\t" - "MOV r7, #0x0\n\t" - "MOV r8, #0x0\n\t" - "MOV r9, #0x0\n\t" + "MOV r2, #1\n\t" + "MOV r3, #0\n\t" + "MOV r4, #0\n\t" + "MOV r5, #0\n\t" + "MOV r6, #0\n\t" + "MOV r7, #0\n\t" + "MOV r8, #0\n\t" + "MOV r9, #0\n\t" "STM %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [n] "+r" (n) @@ -468,14 +468,14 @@ WC_OMIT_FRAME_POINTER void fe_0(fe n) __asm__ __volatile__ ( /* Set zero */ - "MOV r2, #0x0\n\t" - "MOV r3, #0x0\n\t" - "MOV r4, #0x0\n\t" - "MOV r5, #0x0\n\t" - "MOV r6, #0x0\n\t" - "MOV r7, #0x0\n\t" - "MOV r8, #0x0\n\t" - "MOV r9, #0x0\n\t" + "MOV r2, #0\n\t" + "MOV r3, #0\n\t" + "MOV r4, #0\n\t" + "MOV r5, #0\n\t" + "MOV r6, #0\n\t" + "MOV r7, #0\n\t" + "MOV r8, #0\n\t" + "MOV r9, #0\n\t" "STM %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [n] "+r" (n) @@ -532,20 +532,20 @@ WC_OMIT_FRAME_POINTER void fe_neg(fe r, const fe a) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "MVN r7, #0x0\n\t" - "MVN r6, #0x12\n\t" + "MVN lr, #0\n\t" + "MVN r12, #18\n\t" "LDM %[a]!, {r2, r3, r4, r5}\n\t" - "SUBS r2, r6, r2\n\t" - "SBCS r3, r7, r3\n\t" - "SBCS r4, r7, r4\n\t" - "SBCS r5, r7, r5\n\t" + "SUBS r2, r12, r2\n\t" + "SBCS r3, lr, r3\n\t" + "SBCS r4, lr, r4\n\t" + "SBCS r5, lr, r5\n\t" "STM %[r]!, {r2, r3, r4, r5}\n\t" - "MVN r6, #0x80000000\n\t" + "MVN r12, #0x80000000\n\t" "LDM %[a]!, {r2, r3, r4, r5}\n\t" - "SBCS r2, r7, r2\n\t" - "SBCS r3, r7, r3\n\t" - "SBCS r4, r7, r4\n\t" - "SBC r5, r6, r5\n\t" + "SBCS r2, lr, r2\n\t" + "SBCS r3, lr, r3\n\t" + "SBCS r4, lr, r4\n\t" + "SBC r5, r12, r5\n\t" "STM %[r]!, {r2, r3, r4, r5}\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) @@ -554,7 +554,7 @@ WC_OMIT_FRAME_POINTER void fe_neg(fe r, const fe a) : : [r] "r" (r), [a] "r" (a) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ - : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7" + : "memory", "cc", "r2", "r3", "r4", "r5", "r12", "lr" ); } @@ -570,24 +570,24 @@ WC_OMIT_FRAME_POINTER int fe_isnonzero(const fe a) __asm__ __volatile__ ( "LDM %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" - "ADDS r1, r2, #0x13\n\t" - "ADCS r1, r3, #0x0\n\t" - "ADCS r1, r4, #0x0\n\t" - "ADCS r1, r5, #0x0\n\t" - "ADCS r1, r6, #0x0\n\t" - "ADCS r1, r7, #0x0\n\t" - "ADCS r1, r8, #0x0\n\t" - "ADC r1, r9, #0x0\n\t" + "ADDS r1, r2, #19\n\t" + "ADCS r1, r3, #0\n\t" + "ADCS r1, r4, #0\n\t" + "ADCS r1, r5, #0\n\t" + "ADCS r1, r6, #0\n\t" + "ADCS r1, r7, #0\n\t" + "ADCS r1, r8, #0\n\t" + "ADC r1, r9, #0\n\t" "ASR r1, r1, #31\n\t" - "AND r1, r1, #0x13\n\t" + "AND r1, r1, #19\n\t" "ADDS r2, r2, r1\n\t" - "ADCS r3, r3, #0x0\n\t" - "ADCS r4, r4, #0x0\n\t" - "ADCS r5, r5, #0x0\n\t" - "ADCS r6, r6, #0x0\n\t" - "ADCS r7, r7, #0x0\n\t" - "ADCS r8, r8, #0x0\n\t" - "ADC r9, r9, #0x0\n\t" + "ADCS r3, r3, #0\n\t" + "ADCS r4, r4, #0\n\t" + "ADCS r5, r5, #0\n\t" + "ADCS r6, r6, #0\n\t" + "ADCS r7, r7, #0\n\t" + "ADCS r8, r8, #0\n\t" + "ADC r9, r9, #0\n\t" "BFC r9, #31, #1\n\t" "ORR r2, r2, r3\n\t" "ORR r4, r4, r5\n\t" @@ -604,7 +604,7 @@ WC_OMIT_FRAME_POINTER int fe_isnonzero(const fe a) : [a] "r" (a) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", - "r10" + "r12" ); return (word32)(size_t)a; } @@ -621,19 +621,18 @@ WC_OMIT_FRAME_POINTER int fe_isnegative(const fe a) __asm__ __volatile__ ( "LDM %[a]!, {r2, r3, r4, r5}\n\t" - "ADDS r1, r2, #0x13\n\t" - "ADCS r1, r3, #0x0\n\t" - "ADCS r1, r4, #0x0\n\t" - "ADCS r1, r5, #0x0\n\t" + "AND r12, r2, #1\n\t" + "ADDS r1, r2, #19\n\t" + "ADCS r1, r3, #0\n\t" + "ADCS r1, r4, #0\n\t" + "ADCS r1, r5, #0\n\t" "LDM %[a], {r2, r3, r4, r5}\n\t" - "ADCS r1, r2, #0x0\n\t" - "ADCS r1, r3, #0x0\n\t" - "ADCS r1, r4, #0x0\n\t" - "LDR r2, [%[a], #-16]\n\t" - "ADC r1, r5, #0x0\n\t" - "AND %[a], r2, #0x1\n\t" + "ADCS r1, r2, #0\n\t" + "ADCS r1, r3, #0\n\t" + "ADCS r1, r4, #0\n\t" + "ADC r1, r5, #0\n\t" "LSR r1, r1, #31\n\t" - "EOR %[a], %[a], r1\n\t" + "EOR %[a], r12, r1\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a) : @@ -641,7 +640,7 @@ WC_OMIT_FRAME_POINTER int fe_isnegative(const fe a) : : [a] "r" (a) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ - : "memory", "cc", "r1", "r2", "r3", "r4", "r5" + : "memory", "cc", "r1", "r2", "r3", "r4", "r5", "r12" ); return (word32)(size_t)a; } @@ -667,12 +666,12 @@ WC_OMIT_FRAME_POINTER void fe_cmov_table(fe* r, const fe* base, signed char b) "SBFX r3, %[b], #7, #1\n\t" "EOR r12, %[b], r3\n\t" "SUB r12, r12, r3\n\t" - "MOV r4, #0x1\n\t" - "MOV r5, #0x0\n\t" - "MOV r6, #0x1\n\t" - "MOV r7, #0x0\n\t" - "MOV r8, #0x0\n\t" - "MOV r9, #0x0\n\t" + "MOV r4, #1\n\t" + "MOV r5, #0\n\t" + "MOV r6, #1\n\t" + "MOV r7, #0\n\t" + "MOV r8, #0\n\t" + "MOV r9, #0\n\t" "MOV r3, #0x80000000\n\t" "ROR r3, r3, #31\n\t" "ROR r3, r3, r12\n\t" @@ -881,8 +880,8 @@ WC_OMIT_FRAME_POINTER void fe_cmov_table(fe* r, const fe* base, signed char b) "EOR r8, r8, r10\n\t" "EOR r9, r9, r11\n\t" "SUB %[base], %[base], #0x2a0\n\t" - "MVN r10, #0x12\n\t" - "MVN r11, #0x0\n\t" + "MVN r10, #18\n\t" + "MVN r11, #0\n\t" "SUBS r10, r10, r8\n\t" "SBCS r11, r11, r9\n\t" "SBC lr, lr, lr\n\t" @@ -907,12 +906,12 @@ WC_OMIT_FRAME_POINTER void fe_cmov_table(fe* r, const fe* base, signed char b) "SBFX r3, %[b], #7, #1\n\t" "EOR r12, %[b], r3\n\t" "SUB r12, r12, r3\n\t" - "MOV r4, #0x0\n\t" - "MOV r5, #0x0\n\t" - "MOV r6, #0x0\n\t" - "MOV r7, #0x0\n\t" - "MOV r8, #0x0\n\t" - "MOV r9, #0x0\n\t" + "MOV r4, #0\n\t" + "MOV r5, #0\n\t" + "MOV r6, #0\n\t" + "MOV r7, #0\n\t" + "MOV r8, #0\n\t" + "MOV r9, #0\n\t" "MOV r3, #0x80000000\n\t" "ROR r3, r3, #31\n\t" "ROR r3, r3, r12\n\t" @@ -1121,9 +1120,9 @@ WC_OMIT_FRAME_POINTER void fe_cmov_table(fe* r, const fe* base, signed char b) "EOR r8, r8, r10\n\t" "EOR r9, r9, r11\n\t" "SUB %[base], %[base], #0x2a0\n\t" - "MVN r10, #0x0\n\t" - "MVN r11, #0x0\n\t" - "RSBS lr, lr, #0x0\n\t" + "MVN r10, #0\n\t" + "MVN r11, #0\n\t" + "RSBS lr, lr, #0\n\t" "SBCS r10, r10, r8\n\t" "SBCS r11, r11, r9\n\t" "SBC lr, lr, lr\n\t" @@ -1148,12 +1147,12 @@ WC_OMIT_FRAME_POINTER void fe_cmov_table(fe* r, const fe* base, signed char b) "SBFX r3, %[b], #7, #1\n\t" "EOR r12, %[b], r3\n\t" "SUB r12, r12, r3\n\t" - "MOV r4, #0x0\n\t" - "MOV r5, #0x0\n\t" - "MOV r6, #0x0\n\t" - "MOV r7, #0x0\n\t" - "MOV r8, #0x0\n\t" - "MOV r9, #0x0\n\t" + "MOV r4, #0\n\t" + "MOV r5, #0\n\t" + "MOV r6, #0\n\t" + "MOV r7, #0\n\t" + "MOV r8, #0\n\t" + "MOV r9, #0\n\t" "MOV r3, #0x80000000\n\t" "ROR r3, r3, #31\n\t" "ROR r3, r3, r12\n\t" @@ -1362,9 +1361,9 @@ WC_OMIT_FRAME_POINTER void fe_cmov_table(fe* r, const fe* base, signed char b) "EOR r8, r8, r10\n\t" "EOR r9, r9, r11\n\t" "SUB %[base], %[base], #0x2a0\n\t" - "MVN r10, #0x0\n\t" - "MVN r11, #0x0\n\t" - "RSBS lr, lr, #0x0\n\t" + "MVN r10, #0\n\t" + "MVN r11, #0\n\t" + "RSBS lr, lr, #0\n\t" "SBCS r10, r10, r8\n\t" "SBCS r11, r11, r9\n\t" "SBC lr, lr, lr\n\t" @@ -1389,12 +1388,12 @@ WC_OMIT_FRAME_POINTER void fe_cmov_table(fe* r, const fe* base, signed char b) "SBFX r3, %[b], #7, #1\n\t" "EOR r12, %[b], r3\n\t" "SUB r12, r12, r3\n\t" - "MOV r4, #0x0\n\t" - "MOV r5, #0x0\n\t" - "MOV r6, #0x0\n\t" - "MOV r7, #0x0\n\t" - "MOV r8, #0x0\n\t" - "MOV r9, #0x0\n\t" + "MOV r4, #0\n\t" + "MOV r5, #0\n\t" + "MOV r6, #0\n\t" + "MOV r7, #0\n\t" + "MOV r8, #0\n\t" + "MOV r9, #0\n\t" "MOV r3, #0x80000000\n\t" "ROR r3, r3, #31\n\t" "ROR r3, r3, r12\n\t" @@ -1603,9 +1602,9 @@ WC_OMIT_FRAME_POINTER void fe_cmov_table(fe* r, const fe* base, signed char b) "EOR r8, r8, r10\n\t" "EOR r9, r9, r11\n\t" "SUB %[base], %[base], #0x2a0\n\t" - "MVN r10, #0x0\n\t" + "MVN r10, #0\n\t" "MVN r11, #0x80000000\n\t" - "RSBS lr, lr, #0x0\n\t" + "RSBS lr, lr, #0\n\t" "SBCS r10, r10, r8\n\t" "SBC r11, r11, r9\n\t" "ASR r12, %[b], #31\n\t" @@ -1676,7 +1675,7 @@ WC_OMIT_FRAME_POINTER void fe_cmov_table(fe* r, const fe* base, signed char b) "AND r11, r11, lr\n\t" "MVN r12, lr\n\t" "SUB r4, r4, r12\n\t" - "MOV r12, #0x20\n\t" + "MOV r12, #32\n\t" "AND r12, r12, r3\n\t" "ADD %[r], %[r], r12\n\t" "STM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" @@ -1692,14 +1691,14 @@ WC_OMIT_FRAME_POINTER void fe_cmov_table(fe* r, const fe* base, signed char b) "AND r11, r11, lr\n\t" "MVN r12, lr\n\t" "SUB r4, r4, r12\n\t" - "MOV r12, #0x20\n\t" + "MOV r12, #32\n\t" "BIC r12, r12, r3\n\t" "ADD %[r], %[r], r12\n\t" "STM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "SUB %[r], %[r], r12\n\t" "ADD %[r], %[r], #0x40\n\t" "LDM %[base]!, {r4, r5, r6, r7}\n\t" - "MVN r12, #0x12\n\t" + "MVN r12, #18\n\t" "SUBS r8, r12, r4\n\t" "SBCS r9, r3, r5\n\t" "SBCS r10, r3, r6\n\t" @@ -1764,17 +1763,17 @@ WC_OMIT_FRAME_POINTER void fe_cmov_table(fe* r, const fe* base, signed char b) #ifdef WOLFSSL_ARM_ARCH_7M void fe_mul_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_mul_op(void) +WC_OMIT_FRAME_POINTER void fe_mul_op() #else -WC_OMIT_FRAME_POINTER void fe_mul_op(void) +WC_OMIT_FRAME_POINTER void fe_mul_op() #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "SUB sp, sp, #0x28\n\t" + "SUB sp, sp, #40\n\t" "STR r0, [sp, #36]\n\t" - "MOV r0, #0x0\n\t" + "MOV r0, #0\n\t" "LDR r12, [r1]\n\t" /* A[0] * B[0] */ "LDR lr, [r2]\n\t" @@ -1796,351 +1795,351 @@ WC_OMIT_FRAME_POINTER void fe_mul_op(void) "ADDS r5, r5, r11\n\t" /* A[0] * B[3] */ "LDR lr, [r2, #12]\n\t" - "ADCS r6, r6, #0x0\n\t" - "ADC r11, r0, #0x0\n\t" + "ADCS r6, r6, #0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r6, r11, r12, lr\n\t" "ADDS r7, r7, r11\n\t" /* A[0] * B[5] */ "LDR lr, [r2, #20]\n\t" - "ADCS r8, r8, #0x0\n\t" - "ADC r11, r0, #0x0\n\t" + "ADCS r8, r8, #0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r8, r11, r12, lr\n\t" "ADDS r9, r9, r11\n\t" /* A[0] * B[7] */ "LDR lr, [r2, #28]\n\t" - "ADCS r10, r10, #0x0\n\t" - "ADC r3, r0, #0x0\n\t" + "ADCS r10, r10, #0\n\t" + "ADC r3, r0, #0\n\t" "UMLAL r10, r3, r12, lr\n\t" /* A[1] * B[0] */ "LDR r12, [r1, #4]\n\t" "LDR lr, [r2]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r4, r11, r12, lr\n\t" "STR r4, [sp, #4]\n\t" "ADDS r5, r5, r11\n\t" /* A[1] * B[1] */ "LDR lr, [r2, #4]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r5, r11, r12, lr\n\t" "ADDS r6, r6, r11\n\t" /* A[1] * B[2] */ "LDR lr, [r2, #8]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r6, r11, r12, lr\n\t" "ADDS r7, r7, r11\n\t" /* A[1] * B[3] */ "LDR lr, [r2, #12]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r7, r11, r12, lr\n\t" "ADDS r8, r8, r11\n\t" /* A[1] * B[4] */ "LDR lr, [r2, #16]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r8, r11, r12, lr\n\t" "ADDS r9, r9, r11\n\t" /* A[1] * B[5] */ "LDR lr, [r2, #20]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r9, r11, r12, lr\n\t" "ADDS r10, r10, r11\n\t" /* A[1] * B[6] */ "LDR lr, [r2, #24]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r10, r11, r12, lr\n\t" "ADDS r3, r3, r11\n\t" /* A[1] * B[7] */ "LDR lr, [r2, #28]\n\t" - "ADC r4, r0, #0x0\n\t" + "ADC r4, r0, #0\n\t" "UMLAL r3, r4, r12, lr\n\t" /* A[2] * B[0] */ "LDR r12, [r1, #8]\n\t" "LDR lr, [r2]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r5, r11, r12, lr\n\t" "STR r5, [sp, #8]\n\t" "ADDS r6, r6, r11\n\t" /* A[2] * B[1] */ "LDR lr, [r2, #4]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r6, r11, r12, lr\n\t" "ADDS r7, r7, r11\n\t" /* A[2] * B[2] */ "LDR lr, [r2, #8]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r7, r11, r12, lr\n\t" "ADDS r8, r8, r11\n\t" /* A[2] * B[3] */ "LDR lr, [r2, #12]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r8, r11, r12, lr\n\t" "ADDS r9, r9, r11\n\t" /* A[2] * B[4] */ "LDR lr, [r2, #16]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r9, r11, r12, lr\n\t" "ADDS r10, r10, r11\n\t" /* A[2] * B[5] */ "LDR lr, [r2, #20]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r10, r11, r12, lr\n\t" "ADDS r3, r3, r11\n\t" /* A[2] * B[6] */ "LDR lr, [r2, #24]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r3, r11, r12, lr\n\t" "ADDS r4, r4, r11\n\t" /* A[2] * B[7] */ "LDR lr, [r2, #28]\n\t" - "ADC r5, r0, #0x0\n\t" + "ADC r5, r0, #0\n\t" "UMLAL r4, r5, r12, lr\n\t" /* A[3] * B[0] */ "LDR r12, [r1, #12]\n\t" "LDR lr, [r2]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r6, r11, r12, lr\n\t" "STR r6, [sp, #12]\n\t" "ADDS r7, r7, r11\n\t" /* A[3] * B[1] */ "LDR lr, [r2, #4]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r7, r11, r12, lr\n\t" "ADDS r8, r8, r11\n\t" /* A[3] * B[2] */ "LDR lr, [r2, #8]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r8, r11, r12, lr\n\t" "ADDS r9, r9, r11\n\t" /* A[3] * B[3] */ "LDR lr, [r2, #12]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r9, r11, r12, lr\n\t" "ADDS r10, r10, r11\n\t" /* A[3] * B[4] */ "LDR lr, [r2, #16]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r10, r11, r12, lr\n\t" "ADDS r3, r3, r11\n\t" /* A[3] * B[5] */ "LDR lr, [r2, #20]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r3, r11, r12, lr\n\t" "ADDS r4, r4, r11\n\t" /* A[3] * B[6] */ "LDR lr, [r2, #24]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r4, r11, r12, lr\n\t" "ADDS r5, r5, r11\n\t" /* A[3] * B[7] */ "LDR lr, [r2, #28]\n\t" - "ADC r6, r0, #0x0\n\t" + "ADC r6, r0, #0\n\t" "UMLAL r5, r6, r12, lr\n\t" /* A[4] * B[0] */ "LDR r12, [r1, #16]\n\t" "LDR lr, [r2]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r7, r11, r12, lr\n\t" "STR r7, [sp, #16]\n\t" "ADDS r8, r8, r11\n\t" /* A[4] * B[1] */ "LDR lr, [r2, #4]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r8, r11, r12, lr\n\t" "ADDS r9, r9, r11\n\t" /* A[4] * B[2] */ "LDR lr, [r2, #8]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r9, r11, r12, lr\n\t" "ADDS r10, r10, r11\n\t" /* A[4] * B[3] */ "LDR lr, [r2, #12]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r10, r11, r12, lr\n\t" "ADDS r3, r3, r11\n\t" /* A[4] * B[4] */ "LDR lr, [r2, #16]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r3, r11, r12, lr\n\t" "ADDS r4, r4, r11\n\t" /* A[4] * B[5] */ "LDR lr, [r2, #20]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r4, r11, r12, lr\n\t" "ADDS r5, r5, r11\n\t" /* A[4] * B[6] */ "LDR lr, [r2, #24]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r5, r11, r12, lr\n\t" "ADDS r6, r6, r11\n\t" /* A[4] * B[7] */ "LDR lr, [r2, #28]\n\t" - "ADC r7, r0, #0x0\n\t" + "ADC r7, r0, #0\n\t" "UMLAL r6, r7, r12, lr\n\t" /* A[5] * B[0] */ "LDR r12, [r1, #20]\n\t" "LDR lr, [r2]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r8, r11, r12, lr\n\t" "STR r8, [sp, #20]\n\t" "ADDS r9, r9, r11\n\t" /* A[5] * B[1] */ "LDR lr, [r2, #4]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r9, r11, r12, lr\n\t" "ADDS r10, r10, r11\n\t" /* A[5] * B[2] */ "LDR lr, [r2, #8]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r10, r11, r12, lr\n\t" "ADDS r3, r3, r11\n\t" /* A[5] * B[3] */ "LDR lr, [r2, #12]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r3, r11, r12, lr\n\t" "ADDS r4, r4, r11\n\t" /* A[5] * B[4] */ "LDR lr, [r2, #16]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r4, r11, r12, lr\n\t" "ADDS r5, r5, r11\n\t" /* A[5] * B[5] */ "LDR lr, [r2, #20]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r5, r11, r12, lr\n\t" "ADDS r6, r6, r11\n\t" /* A[5] * B[6] */ "LDR lr, [r2, #24]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r6, r11, r12, lr\n\t" "ADDS r7, r7, r11\n\t" /* A[5] * B[7] */ "LDR lr, [r2, #28]\n\t" - "ADC r8, r0, #0x0\n\t" + "ADC r8, r0, #0\n\t" "UMLAL r7, r8, r12, lr\n\t" /* A[6] * B[0] */ "LDR r12, [r1, #24]\n\t" "LDR lr, [r2]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r9, r11, r12, lr\n\t" "STR r9, [sp, #24]\n\t" "ADDS r10, r10, r11\n\t" /* A[6] * B[1] */ "LDR lr, [r2, #4]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r10, r11, r12, lr\n\t" "ADDS r3, r3, r11\n\t" /* A[6] * B[2] */ "LDR lr, [r2, #8]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r3, r11, r12, lr\n\t" "ADDS r4, r4, r11\n\t" /* A[6] * B[3] */ "LDR lr, [r2, #12]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r4, r11, r12, lr\n\t" "ADDS r5, r5, r11\n\t" /* A[6] * B[4] */ "LDR lr, [r2, #16]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r5, r11, r12, lr\n\t" "ADDS r6, r6, r11\n\t" /* A[6] * B[5] */ "LDR lr, [r2, #20]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r6, r11, r12, lr\n\t" "ADDS r7, r7, r11\n\t" /* A[6] * B[6] */ "LDR lr, [r2, #24]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r7, r11, r12, lr\n\t" "ADDS r8, r8, r11\n\t" /* A[6] * B[7] */ "LDR lr, [r2, #28]\n\t" - "ADC r9, r0, #0x0\n\t" + "ADC r9, r0, #0\n\t" "UMLAL r8, r9, r12, lr\n\t" /* A[7] * B[0] */ "LDR r12, [r1, #28]\n\t" "LDR lr, [r2]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r10, r11, r12, lr\n\t" "STR r10, [sp, #28]\n\t" "ADDS r3, r3, r11\n\t" /* A[7] * B[1] */ "LDR lr, [r2, #4]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r3, r11, r12, lr\n\t" "ADDS r4, r4, r11\n\t" /* A[7] * B[2] */ "LDR lr, [r2, #8]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r4, r11, r12, lr\n\t" "ADDS r5, r5, r11\n\t" /* A[7] * B[3] */ "LDR lr, [r2, #12]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r5, r11, r12, lr\n\t" "ADDS r6, r6, r11\n\t" /* A[7] * B[4] */ "LDR lr, [r2, #16]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r6, r11, r12, lr\n\t" "ADDS r7, r7, r11\n\t" /* A[7] * B[5] */ "LDR lr, [r2, #20]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r7, r11, r12, lr\n\t" "ADDS r8, r8, r11\n\t" /* A[7] * B[6] */ "LDR lr, [r2, #24]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r8, r11, r12, lr\n\t" "ADDS r9, r9, r11\n\t" /* A[7] * B[7] */ "LDR lr, [r2, #28]\n\t" - "ADC r10, r0, #0x0\n\t" + "ADC r10, r0, #0\n\t" "UMLAL r9, r10, r12, lr\n\t" /* Reduce */ "LDR r2, [sp, #28]\n\t" "MOV lr, sp\n\t" - "MOV r12, #0x26\n\t" + "MOV r12, #38\n\t" "UMULL r10, r11, r10, r12\n\t" "ADDS r10, r10, r2\n\t" - "ADC r11, r11, #0x0\n\t" - "MOV r12, #0x13\n\t" + "ADC r11, r11, #0\n\t" + "MOV r12, #19\n\t" "LSL r11, r11, #1\n\t" "ORR r11, r11, r10, LSR #31\n\t" "MUL r11, r11, r12\n\t" "LDM lr!, {r1, r2}\n\t" - "MOV r12, #0x26\n\t" + "MOV r12, #38\n\t" "ADDS r1, r1, r11\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r1, r11, r3, r12\n\t" "ADDS r2, r2, r11\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r2, r11, r4, r12\n\t" "LDM lr!, {r3, r4}\n\t" "ADDS r3, r3, r11\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r3, r11, r5, r12\n\t" "ADDS r4, r4, r11\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r4, r11, r6, r12\n\t" "LDM lr!, {r5, r6}\n\t" "ADDS r5, r5, r11\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r5, r11, r7, r12\n\t" "ADDS r6, r6, r11\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r6, r11, r8, r12\n\t" "LDM lr!, {r7, r8}\n\t" "ADDS r7, r7, r11\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r7, r11, r9, r12\n\t" "BFC r10, #31, #1\n\t" "ADDS r8, r10, r11\n\t" /* Store */ "LDR r0, [sp, #36]\n\t" "STM r0, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" - "ADD sp, sp, #0x28\n\t" + "ADD sp, sp, #40\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : : @@ -2155,15 +2154,15 @@ WC_OMIT_FRAME_POINTER void fe_mul_op(void) #else void fe_mul_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_mul_op(void) +WC_OMIT_FRAME_POINTER void fe_mul_op() #else -WC_OMIT_FRAME_POINTER void fe_mul_op(void) +WC_OMIT_FRAME_POINTER void fe_mul_op() #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "SUB sp, sp, #0x2c\n\t" + "SUB sp, sp, #44\n\t" "STRD r0, r1, [sp, #36]\n\t" "MOV lr, r2\n\t" "LDM r1, {r0, r1, r2, r3}\n\t" @@ -2188,54 +2187,54 @@ WC_OMIT_FRAME_POINTER void fe_mul_op(void) "UMAAL r9, r10, r2, r4\n\t" "UMAAL r10, r11, r3, r4\n\t" "LDM lr, {r4, r5, r6, r7}\n\t" - "MOV r12, #0x0\n\t" + "MOV r12, #0\n\t" "UMLAL r8, r12, r0, r4\n\t" "UMAAL r9, r12, r1, r4\n\t" "UMAAL r10, r12, r2, r4\n\t" "UMAAL r11, r12, r3, r4\n\t" - "MOV r4, #0x0\n\t" + "MOV r4, #0\n\t" "UMLAL r9, r4, r0, r5\n\t" "UMAAL r10, r4, r1, r5\n\t" "UMAAL r11, r4, r2, r5\n\t" "UMAAL r12, r4, r3, r5\n\t" - "MOV r5, #0x0\n\t" + "MOV r5, #0\n\t" "UMLAL r10, r5, r0, r6\n\t" "UMAAL r11, r5, r1, r6\n\t" "UMAAL r12, r5, r2, r6\n\t" "UMAAL r4, r5, r3, r6\n\t" - "MOV r6, #0x0\n\t" + "MOV r6, #0\n\t" "UMLAL r11, r6, r0, r7\n\t" "LDR r0, [sp, #40]\n\t" "UMAAL r12, r6, r1, r7\n\t" - "ADD r0, r0, #0x10\n\t" + "ADD r0, r0, #16\n\t" "UMAAL r4, r6, r2, r7\n\t" - "SUB lr, lr, #0x10\n\t" + "SUB lr, lr, #16\n\t" "UMAAL r5, r6, r3, r7\n\t" "LDM r0, {r0, r1, r2, r3}\n\t" "STR r6, [sp, #32]\n\t" "LDM lr!, {r6}\n\t" - "MOV r7, #0x0\n\t" + "MOV r7, #0\n\t" "UMLAL r8, r7, r0, r6\n\t" "UMAAL r9, r7, r1, r6\n\t" "STR r8, [sp, #16]\n\t" "UMAAL r10, r7, r2, r6\n\t" "UMAAL r11, r7, r3, r6\n\t" "LDM lr!, {r6}\n\t" - "MOV r8, #0x0\n\t" + "MOV r8, #0\n\t" "UMLAL r9, r8, r0, r6\n\t" "UMAAL r10, r8, r1, r6\n\t" "STR r9, [sp, #20]\n\t" "UMAAL r11, r8, r2, r6\n\t" "UMAAL r12, r8, r3, r6\n\t" "LDM lr!, {r6}\n\t" - "MOV r9, #0x0\n\t" + "MOV r9, #0\n\t" "UMLAL r10, r9, r0, r6\n\t" "UMAAL r11, r9, r1, r6\n\t" "STR r10, [sp, #24]\n\t" "UMAAL r12, r9, r2, r6\n\t" "UMAAL r4, r9, r3, r6\n\t" "LDM lr!, {r6}\n\t" - "MOV r10, #0x0\n\t" + "MOV r10, #0\n\t" "UMLAL r11, r10, r0, r6\n\t" "UMAAL r12, r10, r1, r6\n\t" "STR r11, [sp, #28]\n\t" @@ -2263,14 +2262,14 @@ WC_OMIT_FRAME_POINTER void fe_mul_op(void) "UMAAL r9, r10, r3, lr\n\t" /* Reduce */ "LDR r0, [sp, #28]\n\t" - "MOV lr, #0x25\n\t" + "MOV lr, #37\n\t" "UMAAL r10, r0, r10, lr\n\t" - "MOV lr, #0x13\n\t" + "MOV lr, #19\n\t" "LSL r0, r0, #1\n\t" "ORR r0, r0, r10, LSR #31\n\t" "MUL r11, r0, lr\n\t" "POP {r0, r1, r2}\n\t" - "MOV lr, #0x26\n\t" + "MOV lr, #38\n\t" "UMAAL r0, r11, r12, lr\n\t" "UMAAL r1, r11, r4, lr\n\t" "UMAAL r2, r11, r5, lr\n\t" @@ -2285,7 +2284,7 @@ WC_OMIT_FRAME_POINTER void fe_mul_op(void) "LDR lr, [sp, #8]\n\t" /* Store */ "STM lr, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" - "ADD sp, sp, #0x10\n\t" + "ADD sp, sp, #16\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : : @@ -2327,9 +2326,9 @@ WC_OMIT_FRAME_POINTER void fe_mul(fe r, const fe a, const fe b) #ifdef WOLFSSL_ARM_ARCH_7M void fe_sq_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_sq_op(void) +WC_OMIT_FRAME_POINTER void fe_sq_op() #else -WC_OMIT_FRAME_POINTER void fe_sq_op(void) +WC_OMIT_FRAME_POINTER void fe_sq_op() #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -2338,7 +2337,7 @@ WC_OMIT_FRAME_POINTER void fe_sq_op(void) "SUB sp, sp, #0x44\n\t" "STR r0, [sp, #64]\n\t" /* Square */ - "MOV r0, #0x0\n\t" + "MOV r0, #0\n\t" "LDR r12, [r1]\n\t" /* A[0] * A[1] */ "LDR lr, [r1, #4]\n\t" @@ -2354,137 +2353,137 @@ WC_OMIT_FRAME_POINTER void fe_sq_op(void) "UMULL r10, r3, r12, lr\n\t" /* A[0] * A[2] */ "LDR lr, [r1, #8]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r5, r11, r12, lr\n\t" "ADDS r6, r6, r11\n\t" /* A[0] * A[4] */ "LDR lr, [r1, #16]\n\t" - "ADCS r7, r7, #0x0\n\t" - "ADC r11, r0, #0x0\n\t" + "ADCS r7, r7, #0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r7, r11, r12, lr\n\t" "ADDS r8, r8, r11\n\t" /* A[0] * A[6] */ "LDR lr, [r1, #24]\n\t" - "ADCS r9, r9, #0x0\n\t" - "ADC r11, r0, #0x0\n\t" + "ADCS r9, r9, #0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r9, r11, r12, lr\n\t" "ADDS r10, r10, r11\n\t" - "ADCS r3, r3, #0x0\n\t" + "ADCS r3, r3, #0\n\t" "STR r4, [sp, #4]\n\t" "STR r5, [sp, #8]\n\t" /* A[1] * A[2] */ "LDR r12, [r1, #4]\n\t" "LDR lr, [r1, #8]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r6, r11, r12, lr\n\t" "STR r6, [sp, #12]\n\t" "ADDS r7, r7, r11\n\t" /* A[1] * A[3] */ "LDR lr, [r1, #12]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r7, r11, r12, lr\n\t" "STR r7, [sp, #16]\n\t" "ADDS r8, r8, r11\n\t" /* A[1] * A[4] */ "LDR lr, [r1, #16]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r8, r11, r12, lr\n\t" "ADDS r9, r9, r11\n\t" /* A[1] * A[5] */ "LDR lr, [r1, #20]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r9, r11, r12, lr\n\t" "ADDS r10, r10, r11\n\t" /* A[1] * A[6] */ "LDR lr, [r1, #24]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r10, r11, r12, lr\n\t" "ADDS r3, r3, r11\n\t" /* A[1] * A[7] */ "LDR lr, [r1, #28]\n\t" - "ADC r4, r0, #0x0\n\t" + "ADC r4, r0, #0\n\t" "UMLAL r3, r4, r12, lr\n\t" /* A[2] * A[3] */ "LDR r12, [r1, #8]\n\t" "LDR lr, [r1, #12]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r8, r11, r12, lr\n\t" "STR r8, [sp, #20]\n\t" "ADDS r9, r9, r11\n\t" /* A[2] * A[4] */ "LDR lr, [r1, #16]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r9, r11, r12, lr\n\t" "STR r9, [sp, #24]\n\t" "ADDS r10, r10, r11\n\t" /* A[2] * A[5] */ "LDR lr, [r1, #20]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r10, r11, r12, lr\n\t" "ADDS r3, r3, r11\n\t" /* A[2] * A[6] */ "LDR lr, [r1, #24]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r3, r11, r12, lr\n\t" "ADDS r4, r4, r11\n\t" /* A[2] * A[7] */ "LDR lr, [r1, #28]\n\t" - "ADC r5, r0, #0x0\n\t" + "ADC r5, r0, #0\n\t" "UMLAL r4, r5, r12, lr\n\t" /* A[3] * A[4] */ "LDR r12, [r1, #12]\n\t" "LDR lr, [r1, #16]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r10, r11, r12, lr\n\t" "STR r10, [sp, #28]\n\t" "ADDS r3, r3, r11\n\t" /* A[3] * A[5] */ "LDR lr, [r1, #20]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r3, r11, r12, lr\n\t" "ADDS r4, r4, r11\n\t" /* A[3] * A[6] */ "LDR lr, [r1, #24]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r4, r11, r12, lr\n\t" "ADDS r5, r5, r11\n\t" /* A[3] * A[7] */ "LDR lr, [r1, #28]\n\t" - "ADC r6, r0, #0x0\n\t" + "ADC r6, r0, #0\n\t" "UMLAL r5, r6, r12, lr\n\t" /* A[4] * A[5] */ "LDR r12, [r1, #16]\n\t" "LDR lr, [r1, #20]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r4, r11, r12, lr\n\t" "ADDS r5, r5, r11\n\t" /* A[4] * A[6] */ "LDR lr, [r1, #24]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r5, r11, r12, lr\n\t" "ADDS r6, r6, r11\n\t" /* A[4] * A[7] */ "LDR lr, [r1, #28]\n\t" - "ADC r7, r0, #0x0\n\t" + "ADC r7, r0, #0\n\t" "UMLAL r6, r7, r12, lr\n\t" /* A[5] * A[6] */ "LDR r12, [r1, #20]\n\t" "LDR lr, [r1, #24]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r6, r11, r12, lr\n\t" "ADDS r7, r7, r11\n\t" /* A[5] * A[7] */ "LDR lr, [r1, #28]\n\t" - "ADC r8, r0, #0x0\n\t" + "ADC r8, r0, #0\n\t" "UMLAL r7, r8, r12, lr\n\t" /* A[6] * A[7] */ "LDR r12, [r1, #24]\n\t" "LDR lr, [r1, #28]\n\t" - "MOV r9, #0x0\n\t" + "MOV r9, #0\n\t" "UMLAL r8, r9, r12, lr\n\t" - "ADD lr, sp, #0x20\n\t" + "ADD lr, sp, #32\n\t" "STM lr, {r3, r4, r5, r6, r7, r8, r9}\n\t" - "ADD lr, sp, #0x4\n\t" + "ADD lr, sp, #4\n\t" "LDM lr, {r4, r5, r6, r7, r8, r9, r10}\n\t" "ADDS r4, r4, r4\n\t" "ADCS r5, r5, r5\n\t" @@ -2502,9 +2501,9 @@ WC_OMIT_FRAME_POINTER void fe_sq_op(void) "ADCS r7, r7, r7\n\t" "ADCS r8, r8, r8\n\t" "ADCS r9, r9, r9\n\t" - "ADC r10, r0, #0x0\n\t" + "ADC r10, r0, #0\n\t" "STM lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" - "ADD lr, sp, #0x4\n\t" + "ADD lr, sp, #4\n\t" "LDM lr, {r4, r5, r6, r7, r8, r9, r10}\n\t" "MOV lr, sp\n\t" /* A[0] * A[0] */ @@ -2513,83 +2512,83 @@ WC_OMIT_FRAME_POINTER void fe_sq_op(void) "ADDS r4, r4, r11\n\t" /* A[1] * A[1] */ "LDR r12, [r1, #4]\n\t" - "ADCS r5, r5, #0x0\n\t" - "ADC r11, r0, #0x0\n\t" + "ADCS r5, r5, #0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r5, r11, r12, r12\n\t" "ADDS r6, r6, r11\n\t" /* A[2] * A[2] */ "LDR r12, [r1, #8]\n\t" - "ADCS r7, r7, #0x0\n\t" - "ADC r11, r0, #0x0\n\t" + "ADCS r7, r7, #0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r7, r11, r12, r12\n\t" "ADDS r8, r8, r11\n\t" /* A[3] * A[3] */ "LDR r12, [r1, #12]\n\t" - "ADCS r9, r9, #0x0\n\t" - "ADC r11, r0, #0x0\n\t" + "ADCS r9, r9, #0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r9, r11, r12, r12\n\t" "ADDS r10, r10, r11\n\t" "STM lr!, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" "LDM lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" /* A[4] * A[4] */ "LDR r12, [r1, #16]\n\t" - "ADCS r3, r3, #0x0\n\t" - "ADC r11, r0, #0x0\n\t" + "ADCS r3, r3, #0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r3, r11, r12, r12\n\t" "ADDS r4, r4, r11\n\t" /* A[5] * A[5] */ "LDR r12, [r1, #20]\n\t" - "ADCS r5, r5, #0x0\n\t" - "ADC r11, r0, #0x0\n\t" + "ADCS r5, r5, #0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r5, r11, r12, r12\n\t" "ADDS r6, r6, r11\n\t" /* A[6] * A[6] */ "LDR r12, [r1, #24]\n\t" - "ADCS r7, r7, #0x0\n\t" - "ADC r11, r0, #0x0\n\t" + "ADCS r7, r7, #0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r7, r11, r12, r12\n\t" "ADDS r8, r8, r11\n\t" /* A[7] * A[7] */ "LDR r12, [r1, #28]\n\t" - "ADCS r9, r9, #0x0\n\t" - "ADC r10, r10, #0x0\n\t" + "ADCS r9, r9, #0\n\t" + "ADC r10, r10, #0\n\t" "UMLAL r9, r10, r12, r12\n\t" /* Reduce */ "LDR r2, [sp, #28]\n\t" "MOV lr, sp\n\t" - "MOV r12, #0x26\n\t" + "MOV r12, #38\n\t" "UMULL r10, r11, r10, r12\n\t" "ADDS r10, r10, r2\n\t" - "ADC r11, r11, #0x0\n\t" - "MOV r12, #0x13\n\t" + "ADC r11, r11, #0\n\t" + "MOV r12, #19\n\t" "LSL r11, r11, #1\n\t" "ORR r11, r11, r10, LSR #31\n\t" "MUL r11, r11, r12\n\t" "LDM lr!, {r1, r2}\n\t" - "MOV r12, #0x26\n\t" + "MOV r12, #38\n\t" "ADDS r1, r1, r11\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r1, r11, r3, r12\n\t" "ADDS r2, r2, r11\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r2, r11, r4, r12\n\t" "LDM lr!, {r3, r4}\n\t" "ADDS r3, r3, r11\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r3, r11, r5, r12\n\t" "ADDS r4, r4, r11\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r4, r11, r6, r12\n\t" "LDM lr!, {r5, r6}\n\t" "ADDS r5, r5, r11\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r5, r11, r7, r12\n\t" "ADDS r6, r6, r11\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r6, r11, r8, r12\n\t" "LDM lr!, {r7, r8}\n\t" "ADDS r7, r7, r11\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r7, r11, r9, r12\n\t" "BFC r10, #31, #1\n\t" "ADDS r8, r10, r11\n\t" @@ -2611,22 +2610,22 @@ WC_OMIT_FRAME_POINTER void fe_sq_op(void) #else void fe_sq_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_sq_op(void) +WC_OMIT_FRAME_POINTER void fe_sq_op() #else -WC_OMIT_FRAME_POINTER void fe_sq_op(void) +WC_OMIT_FRAME_POINTER void fe_sq_op() #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "SUB sp, sp, #0x20\n\t" + "SUB sp, sp, #32\n\t" "STR r0, [sp, #28]\n\t" "LDM r1, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" /* Square */ "UMULL r9, r10, r0, r0\n\t" "UMULL r11, r12, r0, r1\n\t" "ADDS r11, r11, r11\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "UMAAL r10, r11, lr, lr\n\t" "STM sp, {r9, r10}\n\t" "MOV r8, lr\n\t" @@ -2703,14 +2702,14 @@ WC_OMIT_FRAME_POINTER void fe_sq_op(void) /* R[14] = r9 */ /* R[15] = r7 */ /* Reduce */ - "MOV r6, #0x25\n\t" + "MOV r6, #37\n\t" "UMAAL r7, r0, r7, r6\n\t" - "MOV r6, #0x13\n\t" + "MOV r6, #19\n\t" "LSL r0, r0, #1\n\t" "ORR r0, r0, r7, LSR #31\n\t" "MUL lr, r0, r6\n\t" "POP {r0, r1}\n\t" - "MOV r6, #0x26\n\t" + "MOV r6, #38\n\t" "UMAAL r0, lr, r12, r6\n\t" "UMAAL r1, lr, r11, r6\n\t" "MOV r12, r3\n\t" @@ -2781,43 +2780,43 @@ WC_OMIT_FRAME_POINTER void fe_mul121666(fe r, fe a) __asm__ __volatile__ ( /* Multiply by 121666 */ "LDM %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" - "MOV r12, #0xdb42\n\t" - "MOVT r12, #0x1\n\t" - "UMULL r2, r10, r2, r12\n\t" - "UMULL r3, r11, r3, r12\n\t" - "ADDS r3, r3, r10\n\t" - "ADC r11, r11, #0x0\n\t" - "UMULL r4, r10, r4, r12\n\t" - "ADDS r4, r4, r11\n\t" - "ADC r10, r10, #0x0\n\t" - "UMULL r5, r11, r5, r12\n\t" - "ADDS r5, r5, r10\n\t" - "ADC r11, r11, #0x0\n\t" - "UMULL r6, r10, r6, r12\n\t" - "ADDS r6, r6, r11\n\t" - "ADC r10, r10, #0x0\n\t" - "UMULL r7, r11, r7, r12\n\t" - "ADDS r7, r7, r10\n\t" - "ADC r11, r11, #0x0\n\t" - "UMULL r8, r10, r8, r12\n\t" - "ADDS r8, r8, r11\n\t" - "ADC r10, r10, #0x0\n\t" - "UMULL r9, r11, r9, r12\n\t" - "ADDS r9, r9, r10\n\t" - "MOV r12, #0x13\n\t" - "ADC r11, r11, #0x0\n\t" - "LSL r11, r11, #1\n\t" - "ORR r11, r11, r9, LSR #31\n\t" - "MUL r11, r11, r12\n\t" - "ADDS r2, r2, r11\n\t" - "ADCS r3, r3, #0x0\n\t" - "ADCS r4, r4, #0x0\n\t" - "ADCS r5, r5, #0x0\n\t" - "ADCS r6, r6, #0x0\n\t" - "ADCS r7, r7, #0x0\n\t" + "MOV r10, #0xdb42\n\t" + "MOVT r10, #0x1\n\t" + "UMULL r2, r12, r2, r10\n\t" + "UMULL r3, lr, r3, r10\n\t" + "ADDS r3, r3, r12\n\t" + "ADC lr, lr, #0\n\t" + "UMULL r4, r12, r4, r10\n\t" + "ADDS r4, r4, lr\n\t" + "ADC r12, r12, #0\n\t" + "UMULL r5, lr, r5, r10\n\t" + "ADDS r5, r5, r12\n\t" + "ADC lr, lr, #0\n\t" + "UMULL r6, r12, r6, r10\n\t" + "ADDS r6, r6, lr\n\t" + "ADC r12, r12, #0\n\t" + "UMULL r7, lr, r7, r10\n\t" + "ADDS r7, r7, r12\n\t" + "ADC lr, lr, #0\n\t" + "UMULL r8, r12, r8, r10\n\t" + "ADDS r8, r8, lr\n\t" + "ADC r12, r12, #0\n\t" + "UMULL r9, lr, r9, r10\n\t" + "ADDS r9, r9, r12\n\t" + "MOV r10, #19\n\t" + "ADC lr, lr, #0\n\t" + "LSL lr, lr, #1\n\t" + "ORR lr, lr, r9, LSR #31\n\t" + "MUL lr, lr, r10\n\t" + "ADDS r2, r2, lr\n\t" + "ADCS r3, r3, #0\n\t" + "ADCS r4, r4, #0\n\t" + "ADCS r5, r5, #0\n\t" + "ADCS r6, r6, #0\n\t" + "ADCS r7, r7, #0\n\t" "BFC r9, #31, #1\n\t" - "ADCS r8, r8, #0x0\n\t" - "ADC r9, r9, #0x0\n\t" + "ADCS r8, r8, #0\n\t" + "ADC r9, r9, #0\n\t" "STM %[r], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) @@ -2826,8 +2825,8 @@ WC_OMIT_FRAME_POINTER void fe_mul121666(fe r, fe a) : : [r] "r" (r), [a] "r" (a) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ - : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", - "r11", "r12" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", + "lr", "r10" ); } @@ -2846,30 +2845,30 @@ WC_OMIT_FRAME_POINTER void fe_mul121666(fe r, fe a) __asm__ __volatile__ ( /* Multiply by 121666 */ "LDM %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" - "MOV r11, #0xdb42\n\t" - "MOVT r11, #0x1\n\t" - "UMULL r2, r12, r2, r11\n\t" - "SUB r10, r11, #0x1\n\t" - "UMAAL r3, r12, r3, r10\n\t" - "UMAAL r4, r12, r4, r10\n\t" - "UMAAL r5, r12, r5, r10\n\t" - "UMAAL r6, r12, r6, r10\n\t" - "UMAAL r7, r12, r7, r10\n\t" - "UMAAL r8, r12, r8, r10\n\t" - "MOV r11, #0x13\n\t" - "UMAAL r9, r12, r9, r10\n\t" - "LSL r12, r12, #1\n\t" - "ORR r12, r12, r9, LSR #31\n\t" - "MUL r12, r12, r11\n\t" - "ADDS r2, r2, r12\n\t" - "ADCS r3, r3, #0x0\n\t" - "ADCS r4, r4, #0x0\n\t" - "ADCS r5, r5, #0x0\n\t" - "ADCS r6, r6, #0x0\n\t" - "ADCS r7, r7, #0x0\n\t" + "MOV lr, #0xdb42\n\t" + "MOVT lr, #0x1\n\t" + "UMULL r2, r10, r2, lr\n\t" + "SUB r12, lr, #1\n\t" + "UMAAL r3, r10, r3, r12\n\t" + "UMAAL r4, r10, r4, r12\n\t" + "UMAAL r5, r10, r5, r12\n\t" + "UMAAL r6, r10, r6, r12\n\t" + "UMAAL r7, r10, r7, r12\n\t" + "UMAAL r8, r10, r8, r12\n\t" + "MOV lr, #19\n\t" + "UMAAL r9, r10, r9, r12\n\t" + "LSL r10, r10, #1\n\t" + "ORR r10, r10, r9, LSR #31\n\t" + "MUL r10, r10, lr\n\t" + "ADDS r2, r2, r10\n\t" + "ADCS r3, r3, #0\n\t" + "ADCS r4, r4, #0\n\t" + "ADCS r5, r5, #0\n\t" + "ADCS r6, r6, #0\n\t" + "ADCS r7, r7, #0\n\t" "BFC r9, #31, #1\n\t" - "ADCS r8, r8, #0x0\n\t" - "ADC r9, r9, #0x0\n\t" + "ADCS r8, r8, #0\n\t" + "ADC r9, r9, #0\n\t" "STM %[r], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) @@ -2878,8 +2877,8 @@ WC_OMIT_FRAME_POINTER void fe_mul121666(fe r, fe a) : : [r] "r" (r), [a] "r" (a) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ - : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", - "r11", "r12" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", + "lr", "r10" ); } @@ -2903,29 +2902,29 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "STR %[r], [sp, #160]\n\t" "STR %[n], [sp, #164]\n\t" "STR %[a], [sp, #168]\n\t" - "MOV %[n], #0x0\n\t" + "MOV %[n], #0\n\t" "STR %[n], [sp, #172]\n\t" - "MOV r4, #0x1\n\t" - "MOV r5, #0x0\n\t" - "MOV r6, #0x0\n\t" - "MOV r7, #0x0\n\t" - "MOV r8, #0x0\n\t" - "MOV r9, #0x0\n\t" - "MOV r10, #0x0\n\t" - "MOV r11, #0x0\n\t" + "MOV r4, #1\n\t" + "MOV r5, #0\n\t" + "MOV r6, #0\n\t" + "MOV r7, #0\n\t" + "MOV r8, #0\n\t" + "MOV r9, #0\n\t" + "MOV r10, #0\n\t" + "MOV r11, #0\n\t" "STM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" - "ADD r3, sp, #0x20\n\t" + "ADD r3, sp, #32\n\t" "STM r3, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" - "MOV r4, #0x0\n\t" + "MOV r4, #0\n\t" "MOV r3, sp\n\t" "STM r3, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "ADD r3, sp, #0x40\n\t" /* Copy */ "LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "STM r3, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" - "MOV %[n], #0x1e\n\t" + "MOV %[n], #30\n\t" "STR %[n], [sp, #180]\n\t" - "MOV %[a], #0x1c\n\t" + "MOV %[a], #28\n\t" "STR %[a], [sp, #176]\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -2943,14 +2942,14 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "LDR %[a], [%[n], r2]\n\t" "LDR %[n], [sp, #180]\n\t" "LSR %[a], %[a], %[n]\n\t" - "AND %[a], %[a], #0x1\n\t" + "AND %[a], %[a], #1\n\t" "STR %[a], [sp, #184]\n\t" "LDR %[n], [sp, #172]\n\t" "EOR %[n], %[n], %[a]\n\t" "STR %[n], [sp, #172]\n\t" "LDR %[r], [sp, #160]\n\t" /* Conditional Swap */ - "RSB %[n], %[n], #0x0\n\t" + "RSB %[n], %[n], #0\n\t" "MOV r3, r0\n\t" "ADD r12, sp, #0x40\n\t" "LDM r3, {r4, r5}\n\t" @@ -3003,9 +3002,9 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "STM r12!, {r6, r7}\n\t" "LDR %[n], [sp, #172]\n\t" /* Conditional Swap */ - "RSB %[n], %[n], #0x0\n\t" + "RSB %[n], %[n], #0\n\t" "MOV r3, sp\n\t" - "ADD r12, sp, #0x20\n\t" + "ADD r12, sp, #32\n\t" "LDM r3, {r4, r5}\n\t" "LDM r12, {r6, r7}\n\t" "EOR r8, r4, r6\n\t" @@ -3061,14 +3060,14 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "ADD r1, sp, #0x80\n\t" "LDR r0, [sp, #160]\n\t" "BL fe_add_sub_op\n\t" - "ADD r3, sp, #0x20\n\t" + "ADD r3, sp, #32\n\t" "ADD r2, sp, #0x40\n\t" "ADD r1, sp, #0x60\n\t" "MOV r0, sp\n\t" "BL fe_add_sub_op\n\t" "LDR r2, [sp, #160]\n\t" "ADD r1, sp, #0x60\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r0, sp, #32\n\t" "BL fe_mul_op\n\t" "ADD r2, sp, #0x80\n\t" "MOV r1, sp\n\t" @@ -3081,7 +3080,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "ADD r0, sp, #0x60\n\t" "BL fe_sq_op\n\t" "MOV r3, sp\n\t" - "ADD r2, sp, #0x20\n\t" + "ADD r2, sp, #32\n\t" "MOV r1, sp\n\t" "ADD r0, sp, #0x40\n\t" "BL fe_add_sub_op\n\t" @@ -3097,18 +3096,18 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "MOV r0, sp\n\t" "BL fe_sq_op\n\t" "ADD r1, sp, #0x60\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r0, sp, #32\n\t" "BL fe_mul121666\n\t" "ADD r1, sp, #0x40\n\t" "ADD r0, sp, #0x40\n\t" "BL fe_sq_op\n\t" - "ADD r2, sp, #0x20\n\t" + "ADD r2, sp, #32\n\t" "ADD r1, sp, #0x80\n\t" "ADD r0, sp, #0x80\n\t" "BL fe_add_op\n\t" "MOV r2, sp\n\t" "LDR r1, [sp, #168]\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r0, sp, #32\n\t" "BL fe_mul_op\n\t" "ADD r2, sp, #0x80\n\t" "ADD r1, sp, #0x60\n\t" @@ -3116,7 +3115,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "BL fe_mul_op\n\t" "LDR %[a], [sp, #176]\n\t" "LDR %[n], [sp, #180]\n\t" - "SUBS %[n], %[n], #0x1\n\t" + "SUBS %[n], %[n], #1\n\t" "STR %[n], [sp, #180]\n\t" #if defined(__GNUC__) "BGE L_curve25519_bits_%=\n\t" @@ -3125,9 +3124,9 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) #else "BGE.W L_curve25519_bits_%=\n\t" #endif - "MOV %[n], #0x1f\n\t" + "MOV %[n], #31\n\t" "STR %[n], [sp, #180]\n\t" - "SUBS %[a], %[a], #0x4\n\t" + "SUBS %[a], %[a], #4\n\t" "STR %[a], [sp, #176]\n\t" #if defined(__GNUC__) "BGE L_curve25519_words_%=\n\t" @@ -3137,24 +3136,24 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "BGE.W L_curve25519_words_%=\n\t" #endif /* Invert */ - "ADD r1, sp, #0x0\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r1, sp, #0\n\t" + "ADD r0, sp, #32\n\t" "BL fe_sq_op\n\t" - "ADD r1, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" "ADD r0, sp, #0x40\n\t" "BL fe_sq_op\n\t" "ADD r1, sp, #0x40\n\t" "ADD r0, sp, #0x40\n\t" "BL fe_sq_op\n\t" "ADD r2, sp, #0x40\n\t" - "ADD r1, sp, #0x0\n\t" + "ADD r1, sp, #0\n\t" "ADD r0, sp, #0x40\n\t" "BL fe_mul_op\n\t" "ADD r2, sp, #0x40\n\t" - "ADD r1, sp, #0x20\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" + "ADD r0, sp, #32\n\t" "BL fe_mul_op\n\t" - "ADD r1, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" "ADD r0, sp, #0x60\n\t" "BL fe_sq_op\n\t" "ADD r2, sp, #0x60\n\t" @@ -3164,7 +3163,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "ADD r1, sp, #0x40\n\t" "ADD r0, sp, #0x60\n\t" "BL fe_sq_op\n\t" - "MOV r12, #0x4\n\t" + "MOV r12, #4\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_curve25519_inv_1:\n\t" @@ -3176,7 +3175,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_curve25519_inv_1_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3191,7 +3190,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "ADD r1, sp, #0x40\n\t" "ADD r0, sp, #0x60\n\t" "BL fe_sq_op\n\t" - "MOV r12, #0x9\n\t" + "MOV r12, #9\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_curve25519_inv_2:\n\t" @@ -3203,7 +3202,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_curve25519_inv_2_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3218,7 +3217,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "ADD r1, sp, #0x60\n\t" "ADD r0, sp, #0x80\n\t" "BL fe_sq_op\n\t" - "MOV r12, #0x13\n\t" + "MOV r12, #19\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_curve25519_inv_3:\n\t" @@ -3230,7 +3229,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_curve25519_inv_3_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3242,7 +3241,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "ADD r1, sp, #0x80\n\t" "ADD r0, sp, #0x60\n\t" "BL fe_mul_op\n\t" - "MOV r12, #0xa\n\t" + "MOV r12, #10\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_curve25519_inv_4:\n\t" @@ -3254,7 +3253,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_curve25519_inv_4_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3269,7 +3268,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "ADD r1, sp, #0x40\n\t" "ADD r0, sp, #0x60\n\t" "BL fe_sq_op\n\t" - "MOV r12, #0x31\n\t" + "MOV r12, #49\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_curve25519_inv_5:\n\t" @@ -3281,7 +3280,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_curve25519_inv_5_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3308,7 +3307,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_curve25519_inv_6_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3320,7 +3319,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "ADD r1, sp, #0x80\n\t" "ADD r0, sp, #0x60\n\t" "BL fe_mul_op\n\t" - "MOV r12, #0x32\n\t" + "MOV r12, #50\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_curve25519_inv_7:\n\t" @@ -3332,7 +3331,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_curve25519_inv_7_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3344,7 +3343,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "ADD r1, sp, #0x60\n\t" "ADD r0, sp, #0x40\n\t" "BL fe_mul_op\n\t" - "MOV r12, #0x5\n\t" + "MOV r12, #5\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_curve25519_inv_8:\n\t" @@ -3356,7 +3355,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_curve25519_inv_8_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3364,9 +3363,9 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) #else "BNE.N L_curve25519_inv_8_%=\n\t" #endif - "ADD r2, sp, #0x20\n\t" + "ADD r2, sp, #32\n\t" "ADD r1, sp, #0x40\n\t" - "ADD r0, sp, #0x0\n\t" + "ADD r0, sp, #0\n\t" "BL fe_mul_op\n\t" "MOV r2, sp\n\t" "LDR r1, [sp, #160]\n\t" @@ -3375,27 +3374,27 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) /* Ensure result is less than modulus */ "LDR %[r], [sp, #160]\n\t" "LDM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" - "ADDS %[a], r4, #0x13\n\t" - "ADCS %[a], r5, #0x0\n\t" - "ADCS %[a], r6, #0x0\n\t" - "ADCS %[a], r7, #0x0\n\t" - "ADCS %[a], r8, #0x0\n\t" - "ADCS %[a], r9, #0x0\n\t" - "ADCS %[a], r10, #0x0\n\t" - "ADC %[a], r11, #0x0\n\t" + "ADDS %[a], r4, #19\n\t" + "ADCS %[a], r5, #0\n\t" + "ADCS %[a], r6, #0\n\t" + "ADCS %[a], r7, #0\n\t" + "ADCS %[a], r8, #0\n\t" + "ADCS %[a], r9, #0\n\t" + "ADCS %[a], r10, #0\n\t" + "ADC %[a], r11, #0\n\t" "ASR %[a], %[a], #31\n\t" - "AND %[a], %[a], #0x13\n\t" + "AND %[a], %[a], #19\n\t" "ADDS r4, r4, %[a]\n\t" - "ADCS r5, r5, #0x0\n\t" - "ADCS r6, r6, #0x0\n\t" - "ADCS r7, r7, #0x0\n\t" - "ADCS r8, r8, #0x0\n\t" - "ADCS r9, r9, #0x0\n\t" - "ADCS r10, r10, #0x0\n\t" - "ADC r11, r11, #0x0\n\t" + "ADCS r5, r5, #0\n\t" + "ADCS r6, r6, #0\n\t" + "ADCS r7, r7, #0\n\t" + "ADCS r8, r8, #0\n\t" + "ADCS r9, r9, #0\n\t" + "ADCS r10, r10, #0\n\t" + "ADC r11, r11, #0\n\t" "BFC r11, #31, #1\n\t" "STM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" - "MOV r0, #0x0\n\t" + "MOV r0, #0\n\t" "ADD sp, sp, #0xbc\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [n] "+r" (n), [a] "+r" (a) @@ -3430,24 +3429,24 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "STR %[n], [sp, #160]\n\t" "STR %[a], [sp, #172]\n\t" "ADD r5, sp, #0x40\n\t" - "ADD r4, sp, #0x20\n\t" + "ADD r4, sp, #32\n\t" "STR sp, [sp, #184]\n\t" "STR r5, [sp, #180]\n\t" "STR r4, [sp, #188]\n\t" - "MOV %[n], #0x0\n\t" + "MOV %[n], #0\n\t" "STR %[n], [sp, #164]\n\t" - "MOV r4, #0x1\n\t" - "MOV r5, #0x0\n\t" - "MOV r6, #0x0\n\t" - "MOV r7, #0x0\n\t" - "MOV r8, #0x0\n\t" - "MOV r9, #0x0\n\t" - "MOV r10, #0x0\n\t" - "MOV r11, #0x0\n\t" + "MOV r4, #1\n\t" + "MOV r5, #0\n\t" + "MOV r6, #0\n\t" + "MOV r7, #0\n\t" + "MOV r8, #0\n\t" + "MOV r9, #0\n\t" + "MOV r10, #0\n\t" + "MOV r11, #0\n\t" "STM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" - "ADD r3, sp, #0x20\n\t" + "ADD r3, sp, #32\n\t" "STM r3, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" - "MOV r4, #0x0\n\t" + "MOV r4, #0\n\t" "MOV r3, sp\n\t" "STM r3, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "ADD r3, sp, #0x40\n\t" @@ -3463,10 +3462,10 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) #endif "STR %[a], [sp, #168]\n\t" "LDR %[n], [sp, #160]\n\t" - "AND r4, %[a], #0x1f\n\t" + "AND r4, %[a], #31\n\t" "LSR %[a], %[a], #5\n\t" "LDR %[a], [%[n], r2, LSL #2]\n\t" - "RSB r4, r4, #0x1f\n\t" + "RSB r4, r4, #31\n\t" "LSL %[a], %[a], r4\n\t" "LDR %[n], [sp, #164]\n\t" "EOR %[n], %[n], %[a]\n\t" @@ -3544,7 +3543,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "LDR r0, [sp, #184]\n\t" "BL fe_mul_op\n\t" "LDR %[a], [sp, #168]\n\t" - "SUBS %[a], %[a], #0x1\n\t" + "SUBS %[a], %[a], #1\n\t" #if defined(__GNUC__) "BGE L_curve25519_bits_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3558,24 +3557,24 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "LDM r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "STM sp, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" /* Invert */ - "ADD r1, sp, #0x0\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r1, sp, #0\n\t" + "ADD r0, sp, #32\n\t" "BL fe_sq_op\n\t" - "ADD r1, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" "ADD r0, sp, #0x40\n\t" "BL fe_sq_op\n\t" "ADD r1, sp, #0x40\n\t" "ADD r0, sp, #0x40\n\t" "BL fe_sq_op\n\t" "ADD r2, sp, #0x40\n\t" - "ADD r1, sp, #0x0\n\t" + "ADD r1, sp, #0\n\t" "ADD r0, sp, #0x40\n\t" "BL fe_mul_op\n\t" "ADD r2, sp, #0x40\n\t" - "ADD r1, sp, #0x20\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" + "ADD r0, sp, #32\n\t" "BL fe_mul_op\n\t" - "ADD r1, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" "ADD r0, sp, #0x60\n\t" "BL fe_sq_op\n\t" "ADD r2, sp, #0x60\n\t" @@ -3585,7 +3584,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "ADD r1, sp, #0x40\n\t" "ADD r0, sp, #0x60\n\t" "BL fe_sq_op\n\t" - "MOV r12, #0x4\n\t" + "MOV r12, #4\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_curve25519_inv_1:\n\t" @@ -3597,7 +3596,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_curve25519_inv_1_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3612,7 +3611,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "ADD r1, sp, #0x40\n\t" "ADD r0, sp, #0x60\n\t" "BL fe_sq_op\n\t" - "MOV r12, #0x9\n\t" + "MOV r12, #9\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_curve25519_inv_2:\n\t" @@ -3624,7 +3623,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_curve25519_inv_2_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3639,7 +3638,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "ADD r1, sp, #0x60\n\t" "ADD r0, sp, #0x80\n\t" "BL fe_sq_op\n\t" - "MOV r12, #0x13\n\t" + "MOV r12, #19\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_curve25519_inv_3:\n\t" @@ -3651,7 +3650,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_curve25519_inv_3_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3663,7 +3662,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "ADD r1, sp, #0x80\n\t" "ADD r0, sp, #0x60\n\t" "BL fe_mul_op\n\t" - "MOV r12, #0xa\n\t" + "MOV r12, #10\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_curve25519_inv_4:\n\t" @@ -3675,7 +3674,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_curve25519_inv_4_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3690,7 +3689,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "ADD r1, sp, #0x40\n\t" "ADD r0, sp, #0x60\n\t" "BL fe_sq_op\n\t" - "MOV r12, #0x31\n\t" + "MOV r12, #49\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_curve25519_inv_5:\n\t" @@ -3702,7 +3701,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_curve25519_inv_5_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3729,7 +3728,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_curve25519_inv_6_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3741,7 +3740,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "ADD r1, sp, #0x80\n\t" "ADD r0, sp, #0x60\n\t" "BL fe_mul_op\n\t" - "MOV r12, #0x32\n\t" + "MOV r12, #50\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_curve25519_inv_7:\n\t" @@ -3753,7 +3752,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_curve25519_inv_7_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3765,7 +3764,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "ADD r1, sp, #0x60\n\t" "ADD r0, sp, #0x40\n\t" "BL fe_mul_op\n\t" - "MOV r12, #0x5\n\t" + "MOV r12, #5\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_curve25519_inv_8:\n\t" @@ -3777,7 +3776,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_curve25519_inv_8_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3785,9 +3784,9 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) #else "BNE.N L_curve25519_inv_8_%=\n\t" #endif - "ADD r2, sp, #0x20\n\t" + "ADD r2, sp, #32\n\t" "ADD r1, sp, #0x40\n\t" - "ADD r0, sp, #0x0\n\t" + "ADD r0, sp, #0\n\t" "BL fe_mul_op\n\t" "LDR r2, [sp, #184]\n\t" "LDR r1, [sp, #176]\n\t" @@ -3796,27 +3795,27 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) /* Ensure result is less than modulus */ "LDR %[r], [sp, #176]\n\t" "LDM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" - "ADDS %[a], r4, #0x13\n\t" - "ADCS %[a], r5, #0x0\n\t" - "ADCS %[a], r6, #0x0\n\t" - "ADCS %[a], r7, #0x0\n\t" - "ADCS %[a], r8, #0x0\n\t" - "ADCS %[a], r9, #0x0\n\t" - "ADCS %[a], r10, #0x0\n\t" - "ADC %[a], r11, #0x0\n\t" + "ADDS %[a], r4, #19\n\t" + "ADCS %[a], r5, #0\n\t" + "ADCS %[a], r6, #0\n\t" + "ADCS %[a], r7, #0\n\t" + "ADCS %[a], r8, #0\n\t" + "ADCS %[a], r9, #0\n\t" + "ADCS %[a], r10, #0\n\t" + "ADC %[a], r11, #0\n\t" "ASR %[a], %[a], #31\n\t" - "AND %[a], %[a], #0x13\n\t" + "AND %[a], %[a], #19\n\t" "ADDS r4, r4, %[a]\n\t" - "ADCS r5, r5, #0x0\n\t" - "ADCS r6, r6, #0x0\n\t" - "ADCS r7, r7, #0x0\n\t" - "ADCS r8, r8, #0x0\n\t" - "ADCS r9, r9, #0x0\n\t" - "ADCS r10, r10, #0x0\n\t" - "ADC r11, r11, #0x0\n\t" + "ADCS r5, r5, #0\n\t" + "ADCS r6, r6, #0\n\t" + "ADCS r7, r7, #0\n\t" + "ADCS r8, r8, #0\n\t" + "ADCS r9, r9, #0\n\t" + "ADCS r10, r10, #0\n\t" + "ADC r11, r11, #0\n\t" "BFC r11, #31, #1\n\t" "STM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" - "MOV r0, #0x0\n\t" + "MOV r0, #0\n\t" "ADD sp, sp, #0xc0\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [n] "+r" (n), [a] "+r" (a) @@ -3854,16 +3853,16 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) "MOV r0, sp\n\t" "BL fe_sq_op\n\t" "MOV r1, sp\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r0, sp, #32\n\t" "BL fe_sq_op\n\t" - "ADD r1, sp, #0x20\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" + "ADD r0, sp, #32\n\t" "BL fe_sq_op\n\t" - "ADD r2, sp, #0x20\n\t" + "ADD r2, sp, #32\n\t" "LDR r1, [sp, #132]\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r0, sp, #32\n\t" "BL fe_mul_op\n\t" - "ADD r2, sp, #0x20\n\t" + "ADD r2, sp, #32\n\t" "MOV r1, sp\n\t" "MOV r0, sp\n\t" "BL fe_mul_op\n\t" @@ -3871,13 +3870,13 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) "ADD r0, sp, #0x40\n\t" "BL fe_sq_op\n\t" "ADD r2, sp, #0x40\n\t" - "ADD r1, sp, #0x20\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" + "ADD r0, sp, #32\n\t" "BL fe_mul_op\n\t" - "ADD r1, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" "ADD r0, sp, #0x40\n\t" "BL fe_sq_op\n\t" - "MOV r12, #0x4\n\t" + "MOV r12, #4\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_fe_invert1:\n\t" @@ -3889,7 +3888,7 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_fe_invert1_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3897,14 +3896,14 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) #else "BNE.N L_fe_invert1_%=\n\t" #endif - "ADD r2, sp, #0x20\n\t" + "ADD r2, sp, #32\n\t" "ADD r1, sp, #0x40\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r0, sp, #32\n\t" "BL fe_mul_op\n\t" - "ADD r1, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" "ADD r0, sp, #0x40\n\t" "BL fe_sq_op\n\t" - "MOV r12, #0x9\n\t" + "MOV r12, #9\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_fe_invert2:\n\t" @@ -3916,7 +3915,7 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_fe_invert2_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3924,14 +3923,14 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) #else "BNE.N L_fe_invert2_%=\n\t" #endif - "ADD r2, sp, #0x20\n\t" + "ADD r2, sp, #32\n\t" "ADD r1, sp, #0x40\n\t" "ADD r0, sp, #0x40\n\t" "BL fe_mul_op\n\t" "ADD r1, sp, #0x40\n\t" "ADD r0, sp, #0x60\n\t" "BL fe_sq_op\n\t" - "MOV r12, #0x13\n\t" + "MOV r12, #19\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_fe_invert3:\n\t" @@ -3943,7 +3942,7 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_fe_invert3_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3955,7 +3954,7 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) "ADD r1, sp, #0x60\n\t" "ADD r0, sp, #0x40\n\t" "BL fe_mul_op\n\t" - "MOV r12, #0xa\n\t" + "MOV r12, #10\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_fe_invert4:\n\t" @@ -3967,7 +3966,7 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_fe_invert4_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3975,14 +3974,14 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) #else "BNE.N L_fe_invert4_%=\n\t" #endif - "ADD r2, sp, #0x20\n\t" + "ADD r2, sp, #32\n\t" "ADD r1, sp, #0x40\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r0, sp, #32\n\t" "BL fe_mul_op\n\t" - "ADD r1, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" "ADD r0, sp, #0x40\n\t" "BL fe_sq_op\n\t" - "MOV r12, #0x31\n\t" + "MOV r12, #49\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_fe_invert5:\n\t" @@ -3994,7 +3993,7 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_fe_invert5_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -4002,7 +4001,7 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) #else "BNE.N L_fe_invert5_%=\n\t" #endif - "ADD r2, sp, #0x20\n\t" + "ADD r2, sp, #32\n\t" "ADD r1, sp, #0x40\n\t" "ADD r0, sp, #0x40\n\t" "BL fe_mul_op\n\t" @@ -4021,7 +4020,7 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_fe_invert6_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -4033,7 +4032,7 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) "ADD r1, sp, #0x60\n\t" "ADD r0, sp, #0x40\n\t" "BL fe_mul_op\n\t" - "MOV r12, #0x32\n\t" + "MOV r12, #50\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_fe_invert7:\n\t" @@ -4045,7 +4044,7 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_fe_invert7_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -4053,23 +4052,23 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) #else "BNE.N L_fe_invert7_%=\n\t" #endif - "ADD r2, sp, #0x20\n\t" + "ADD r2, sp, #32\n\t" "ADD r1, sp, #0x40\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r0, sp, #32\n\t" "BL fe_mul_op\n\t" - "MOV r12, #0x5\n\t" + "MOV r12, #5\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_fe_invert8:\n\t" #else "L_fe_invert8_%=:\n\t" #endif - "ADD r1, sp, #0x20\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" + "ADD r0, sp, #32\n\t" "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_fe_invert8_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -4078,7 +4077,7 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) "BNE.N L_fe_invert8_%=\n\t" #endif "MOV r2, sp\n\t" - "ADD r1, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" "LDR r0, [sp, #128]\n\t" "BL fe_mul_op\n\t" "LDR %[a], [sp, #132]\n\t" @@ -4112,7 +4111,7 @@ WC_OMIT_FRAME_POINTER void fe_sq2(fe r, const fe a) "SUB sp, sp, #0x44\n\t" "STR r0, [sp, #64]\n\t" /* Square * 2 */ - "MOV r0, #0x0\n\t" + "MOV r0, #0\n\t" "LDR r12, [r1]\n\t" /* A[0] * A[1] */ "LDR lr, [r1, #4]\n\t" @@ -4128,137 +4127,137 @@ WC_OMIT_FRAME_POINTER void fe_sq2(fe r, const fe a) "UMULL r10, r3, r12, lr\n\t" /* A[0] * A[2] */ "LDR lr, [r1, #8]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r5, r11, r12, lr\n\t" "ADDS r6, r6, r11\n\t" /* A[0] * A[4] */ "LDR lr, [r1, #16]\n\t" - "ADCS r7, r7, #0x0\n\t" - "ADC r11, r0, #0x0\n\t" + "ADCS r7, r7, #0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r7, r11, r12, lr\n\t" "ADDS r8, r8, r11\n\t" /* A[0] * A[6] */ "LDR lr, [r1, #24]\n\t" - "ADCS r9, r9, #0x0\n\t" - "ADC r11, r0, #0x0\n\t" + "ADCS r9, r9, #0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r9, r11, r12, lr\n\t" "ADDS r10, r10, r11\n\t" - "ADCS r3, r3, #0x0\n\t" + "ADCS r3, r3, #0\n\t" "STR r4, [sp, #4]\n\t" "STR r5, [sp, #8]\n\t" /* A[1] * A[2] */ "LDR r12, [r1, #4]\n\t" "LDR lr, [r1, #8]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r6, r11, r12, lr\n\t" "STR r6, [sp, #12]\n\t" "ADDS r7, r7, r11\n\t" /* A[1] * A[3] */ "LDR lr, [r1, #12]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r7, r11, r12, lr\n\t" "STR r7, [sp, #16]\n\t" "ADDS r8, r8, r11\n\t" /* A[1] * A[4] */ "LDR lr, [r1, #16]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r8, r11, r12, lr\n\t" "ADDS r9, r9, r11\n\t" /* A[1] * A[5] */ "LDR lr, [r1, #20]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r9, r11, r12, lr\n\t" "ADDS r10, r10, r11\n\t" /* A[1] * A[6] */ "LDR lr, [r1, #24]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r10, r11, r12, lr\n\t" "ADDS r3, r3, r11\n\t" /* A[1] * A[7] */ "LDR lr, [r1, #28]\n\t" - "ADC r4, r0, #0x0\n\t" + "ADC r4, r0, #0\n\t" "UMLAL r3, r4, r12, lr\n\t" /* A[2] * A[3] */ "LDR r12, [r1, #8]\n\t" "LDR lr, [r1, #12]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r8, r11, r12, lr\n\t" "STR r8, [sp, #20]\n\t" "ADDS r9, r9, r11\n\t" /* A[2] * A[4] */ "LDR lr, [r1, #16]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r9, r11, r12, lr\n\t" "STR r9, [sp, #24]\n\t" "ADDS r10, r10, r11\n\t" /* A[2] * A[5] */ "LDR lr, [r1, #20]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r10, r11, r12, lr\n\t" "ADDS r3, r3, r11\n\t" /* A[2] * A[6] */ "LDR lr, [r1, #24]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r3, r11, r12, lr\n\t" "ADDS r4, r4, r11\n\t" /* A[2] * A[7] */ "LDR lr, [r1, #28]\n\t" - "ADC r5, r0, #0x0\n\t" + "ADC r5, r0, #0\n\t" "UMLAL r4, r5, r12, lr\n\t" /* A[3] * A[4] */ "LDR r12, [r1, #12]\n\t" "LDR lr, [r1, #16]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r10, r11, r12, lr\n\t" "STR r10, [sp, #28]\n\t" "ADDS r3, r3, r11\n\t" /* A[3] * A[5] */ "LDR lr, [r1, #20]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r3, r11, r12, lr\n\t" "ADDS r4, r4, r11\n\t" /* A[3] * A[6] */ "LDR lr, [r1, #24]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r4, r11, r12, lr\n\t" "ADDS r5, r5, r11\n\t" /* A[3] * A[7] */ "LDR lr, [r1, #28]\n\t" - "ADC r6, r0, #0x0\n\t" + "ADC r6, r0, #0\n\t" "UMLAL r5, r6, r12, lr\n\t" /* A[4] * A[5] */ "LDR r12, [r1, #16]\n\t" "LDR lr, [r1, #20]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r4, r11, r12, lr\n\t" "ADDS r5, r5, r11\n\t" /* A[4] * A[6] */ "LDR lr, [r1, #24]\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r5, r11, r12, lr\n\t" "ADDS r6, r6, r11\n\t" /* A[4] * A[7] */ "LDR lr, [r1, #28]\n\t" - "ADC r7, r0, #0x0\n\t" + "ADC r7, r0, #0\n\t" "UMLAL r6, r7, r12, lr\n\t" /* A[5] * A[6] */ "LDR r12, [r1, #20]\n\t" "LDR lr, [r1, #24]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r6, r11, r12, lr\n\t" "ADDS r7, r7, r11\n\t" /* A[5] * A[7] */ "LDR lr, [r1, #28]\n\t" - "ADC r8, r0, #0x0\n\t" + "ADC r8, r0, #0\n\t" "UMLAL r7, r8, r12, lr\n\t" /* A[6] * A[7] */ "LDR r12, [r1, #24]\n\t" "LDR lr, [r1, #28]\n\t" - "MOV r9, #0x0\n\t" + "MOV r9, #0\n\t" "UMLAL r8, r9, r12, lr\n\t" - "ADD lr, sp, #0x20\n\t" + "ADD lr, sp, #32\n\t" "STM lr, {r3, r4, r5, r6, r7, r8, r9}\n\t" - "ADD lr, sp, #0x4\n\t" + "ADD lr, sp, #4\n\t" "LDM lr, {r4, r5, r6, r7, r8, r9, r10}\n\t" "ADDS r4, r4, r4\n\t" "ADCS r5, r5, r5\n\t" @@ -4276,9 +4275,9 @@ WC_OMIT_FRAME_POINTER void fe_sq2(fe r, const fe a) "ADCS r7, r7, r7\n\t" "ADCS r8, r8, r8\n\t" "ADCS r9, r9, r9\n\t" - "ADC r10, r0, #0x0\n\t" + "ADC r10, r0, #0\n\t" "STM lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" - "ADD lr, sp, #0x4\n\t" + "ADD lr, sp, #4\n\t" "LDM lr, {r4, r5, r6, r7, r8, r9, r10}\n\t" "MOV lr, sp\n\t" /* A[0] * A[0] */ @@ -4287,98 +4286,98 @@ WC_OMIT_FRAME_POINTER void fe_sq2(fe r, const fe a) "ADDS r4, r4, r11\n\t" /* A[1] * A[1] */ "LDR r12, [r1, #4]\n\t" - "ADCS r5, r5, #0x0\n\t" - "ADC r11, r0, #0x0\n\t" + "ADCS r5, r5, #0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r5, r11, r12, r12\n\t" "ADDS r6, r6, r11\n\t" /* A[2] * A[2] */ "LDR r12, [r1, #8]\n\t" - "ADCS r7, r7, #0x0\n\t" - "ADC r11, r0, #0x0\n\t" + "ADCS r7, r7, #0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r7, r11, r12, r12\n\t" "ADDS r8, r8, r11\n\t" /* A[3] * A[3] */ "LDR r12, [r1, #12]\n\t" - "ADCS r9, r9, #0x0\n\t" - "ADC r11, r0, #0x0\n\t" + "ADCS r9, r9, #0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r9, r11, r12, r12\n\t" "ADDS r10, r10, r11\n\t" "STM lr!, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" "LDM lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" /* A[4] * A[4] */ "LDR r12, [r1, #16]\n\t" - "ADCS r3, r3, #0x0\n\t" - "ADC r11, r0, #0x0\n\t" + "ADCS r3, r3, #0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r3, r11, r12, r12\n\t" "ADDS r4, r4, r11\n\t" /* A[5] * A[5] */ "LDR r12, [r1, #20]\n\t" - "ADCS r5, r5, #0x0\n\t" - "ADC r11, r0, #0x0\n\t" + "ADCS r5, r5, #0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r5, r11, r12, r12\n\t" "ADDS r6, r6, r11\n\t" /* A[6] * A[6] */ "LDR r12, [r1, #24]\n\t" - "ADCS r7, r7, #0x0\n\t" - "ADC r11, r0, #0x0\n\t" + "ADCS r7, r7, #0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r7, r11, r12, r12\n\t" "ADDS r8, r8, r11\n\t" /* A[7] * A[7] */ "LDR r12, [r1, #28]\n\t" - "ADCS r9, r9, #0x0\n\t" - "ADC r10, r10, #0x0\n\t" + "ADCS r9, r9, #0\n\t" + "ADC r10, r10, #0\n\t" "UMLAL r9, r10, r12, r12\n\t" /* Reduce */ "LDR r2, [sp, #28]\n\t" "MOV lr, sp\n\t" - "MOV r12, #0x26\n\t" + "MOV r12, #38\n\t" "UMULL r10, r11, r10, r12\n\t" "ADDS r10, r10, r2\n\t" - "ADC r11, r11, #0x0\n\t" - "MOV r12, #0x13\n\t" + "ADC r11, r11, #0\n\t" + "MOV r12, #19\n\t" "LSL r11, r11, #1\n\t" "ORR r11, r11, r10, LSR #31\n\t" "MUL r11, r11, r12\n\t" "LDM lr!, {r1, r2}\n\t" - "MOV r12, #0x26\n\t" + "MOV r12, #38\n\t" "ADDS r1, r1, r11\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r1, r11, r3, r12\n\t" "ADDS r2, r2, r11\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r2, r11, r4, r12\n\t" "LDM lr!, {r3, r4}\n\t" "ADDS r3, r3, r11\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r3, r11, r5, r12\n\t" "ADDS r4, r4, r11\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r4, r11, r6, r12\n\t" "LDM lr!, {r5, r6}\n\t" "ADDS r5, r5, r11\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r5, r11, r7, r12\n\t" "ADDS r6, r6, r11\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r6, r11, r8, r12\n\t" "LDM lr!, {r7, r8}\n\t" "ADDS r7, r7, r11\n\t" - "ADC r11, r0, #0x0\n\t" + "ADC r11, r0, #0\n\t" "UMLAL r7, r11, r9, r12\n\t" "BFC r10, #31, #1\n\t" "ADDS r8, r10, r11\n\t" /* Reduce if top bit set */ - "MOV r12, #0x13\n\t" + "MOV r12, #19\n\t" "AND r11, r12, r8, ASR #31\n\t" "ADDS r1, r1, r11\n\t" - "ADCS r2, r2, #0x0\n\t" - "ADCS r3, r3, #0x0\n\t" - "ADCS r4, r4, #0x0\n\t" - "ADCS r5, r5, #0x0\n\t" - "ADCS r6, r6, #0x0\n\t" + "ADCS r2, r2, #0\n\t" + "ADCS r3, r3, #0\n\t" + "ADCS r4, r4, #0\n\t" + "ADCS r5, r5, #0\n\t" + "ADCS r6, r6, #0\n\t" "BFC r8, #31, #1\n\t" - "ADCS r7, r7, #0x0\n\t" - "ADC r8, r8, #0x0\n\t" + "ADCS r7, r7, #0\n\t" + "ADC r8, r8, #0\n\t" /* Double */ "ADDS r1, r1, r1\n\t" "ADCS r2, r2, r2\n\t" @@ -4389,17 +4388,17 @@ WC_OMIT_FRAME_POINTER void fe_sq2(fe r, const fe a) "ADCS r7, r7, r7\n\t" "ADC r8, r8, r8\n\t" /* Reduce if top bit set */ - "MOV r12, #0x13\n\t" + "MOV r12, #19\n\t" "AND r11, r12, r8, ASR #31\n\t" "ADDS r1, r1, r11\n\t" - "ADCS r2, r2, #0x0\n\t" - "ADCS r3, r3, #0x0\n\t" - "ADCS r4, r4, #0x0\n\t" - "ADCS r5, r5, #0x0\n\t" - "ADCS r6, r6, #0x0\n\t" + "ADCS r2, r2, #0\n\t" + "ADCS r3, r3, #0\n\t" + "ADCS r4, r4, #0\n\t" + "ADCS r5, r5, #0\n\t" + "ADCS r6, r6, #0\n\t" "BFC r8, #31, #1\n\t" - "ADCS r7, r7, #0x0\n\t" - "ADC r8, r8, #0x0\n\t" + "ADCS r7, r7, #0\n\t" + "ADC r8, r8, #0\n\t" /* Store */ "LDR r0, [sp, #64]\n\t" "STM r0, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" @@ -4428,14 +4427,14 @@ WC_OMIT_FRAME_POINTER void fe_sq2(fe r, const fe a) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "SUB sp, sp, #0x24\n\t" + "SUB sp, sp, #36\n\t" "STRD r0, r1, [sp, #28]\n\t" "LDM r1, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" /* Square * 2 */ "UMULL r9, r10, r0, r0\n\t" "UMULL r11, r12, r0, r1\n\t" "ADDS r11, r11, r11\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "UMAAL r10, r11, lr, lr\n\t" "STM sp, {r9, r10}\n\t" "MOV r8, lr\n\t" @@ -4512,14 +4511,14 @@ WC_OMIT_FRAME_POINTER void fe_sq2(fe r, const fe a) /* R[14] = r9 */ /* R[15] = r7 */ /* Reduce */ - "MOV r6, #0x25\n\t" + "MOV r6, #37\n\t" "UMAAL r7, r0, r7, r6\n\t" - "MOV r6, #0x13\n\t" + "MOV r6, #19\n\t" "LSL r0, r0, #1\n\t" "ORR r0, r0, r7, LSR #31\n\t" "MUL lr, r0, r6\n\t" "POP {r0, r1}\n\t" - "MOV r6, #0x26\n\t" + "MOV r6, #38\n\t" "UMAAL r0, lr, r12, r6\n\t" "UMAAL r1, lr, r11, r6\n\t" "MOV r12, r3\n\t" @@ -4535,17 +4534,17 @@ WC_OMIT_FRAME_POINTER void fe_sq2(fe r, const fe a) "UMAAL r6, lr, r9, r12\n\t" "ADD r7, r7, lr\n\t" /* Reduce if top bit set */ - "MOV r11, #0x13\n\t" + "MOV r11, #19\n\t" "AND r12, r11, r7, ASR #31\n\t" "ADDS r0, r0, r12\n\t" - "ADCS r1, r1, #0x0\n\t" - "ADCS r2, r2, #0x0\n\t" - "ADCS r3, r3, #0x0\n\t" - "ADCS r4, r4, #0x0\n\t" - "ADCS r5, r5, #0x0\n\t" + "ADCS r1, r1, #0\n\t" + "ADCS r2, r2, #0\n\t" + "ADCS r3, r3, #0\n\t" + "ADCS r4, r4, #0\n\t" + "ADCS r5, r5, #0\n\t" "BFC r7, #31, #1\n\t" - "ADCS r6, r6, #0x0\n\t" - "ADC r7, r7, #0x0\n\t" + "ADCS r6, r6, #0\n\t" + "ADC r7, r7, #0\n\t" /* Double */ "ADDS r0, r0, r0\n\t" "ADCS r1, r1, r1\n\t" @@ -4556,17 +4555,17 @@ WC_OMIT_FRAME_POINTER void fe_sq2(fe r, const fe a) "ADCS r6, r6, r6\n\t" "ADC r7, r7, r7\n\t" /* Reduce if top bit set */ - "MOV r11, #0x13\n\t" + "MOV r11, #19\n\t" "AND r12, r11, r7, ASR #31\n\t" "ADDS r0, r0, r12\n\t" - "ADCS r1, r1, #0x0\n\t" - "ADCS r2, r2, #0x0\n\t" - "ADCS r3, r3, #0x0\n\t" - "ADCS r4, r4, #0x0\n\t" - "ADCS r5, r5, #0x0\n\t" + "ADCS r1, r1, #0\n\t" + "ADCS r2, r2, #0\n\t" + "ADCS r3, r3, #0\n\t" + "ADCS r4, r4, #0\n\t" + "ADCS r5, r5, #0\n\t" "BFC r7, #31, #1\n\t" - "ADCS r6, r6, #0x0\n\t" - "ADC r7, r7, #0x0\n\t" + "ADCS r6, r6, #0\n\t" + "ADC r7, r7, #0\n\t" "POP {r12, lr}\n\t" /* Store */ "STM r12, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" @@ -4604,16 +4603,16 @@ WC_OMIT_FRAME_POINTER void fe_pow22523(fe r, const fe a) "MOV r0, sp\n\t" "BL fe_sq_op\n\t" "MOV r1, sp\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r0, sp, #32\n\t" "BL fe_sq_op\n\t" - "ADD r1, sp, #0x20\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" + "ADD r0, sp, #32\n\t" "BL fe_sq_op\n\t" - "ADD r2, sp, #0x20\n\t" + "ADD r2, sp, #32\n\t" "LDR r1, [sp, #100]\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r0, sp, #32\n\t" "BL fe_mul_op\n\t" - "ADD r2, sp, #0x20\n\t" + "ADD r2, sp, #32\n\t" "MOV r1, sp\n\t" "MOV r0, sp\n\t" "BL fe_mul_op\n\t" @@ -4621,25 +4620,25 @@ WC_OMIT_FRAME_POINTER void fe_pow22523(fe r, const fe a) "MOV r0, sp\n\t" "BL fe_sq_op\n\t" "MOV r2, sp\n\t" - "ADD r1, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" "MOV r0, sp\n\t" "BL fe_mul_op\n\t" "MOV r1, sp\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r0, sp, #32\n\t" "BL fe_sq_op\n\t" - "MOV r12, #0x4\n\t" + "MOV r12, #4\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_fe_pow22523_1:\n\t" #else "L_fe_pow22523_1_%=:\n\t" #endif - "ADD r1, sp, #0x20\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" + "ADD r0, sp, #32\n\t" "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_fe_pow22523_1_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -4648,25 +4647,25 @@ WC_OMIT_FRAME_POINTER void fe_pow22523(fe r, const fe a) "BNE.N L_fe_pow22523_1_%=\n\t" #endif "MOV r2, sp\n\t" - "ADD r1, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" "MOV r0, sp\n\t" "BL fe_mul_op\n\t" "MOV r1, sp\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r0, sp, #32\n\t" "BL fe_sq_op\n\t" - "MOV r12, #0x9\n\t" + "MOV r12, #9\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_fe_pow22523_2:\n\t" #else "L_fe_pow22523_2_%=:\n\t" #endif - "ADD r1, sp, #0x20\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" + "ADD r0, sp, #32\n\t" "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_fe_pow22523_2_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -4675,13 +4674,13 @@ WC_OMIT_FRAME_POINTER void fe_pow22523(fe r, const fe a) "BNE.N L_fe_pow22523_2_%=\n\t" #endif "MOV r2, sp\n\t" - "ADD r1, sp, #0x20\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" + "ADD r0, sp, #32\n\t" "BL fe_mul_op\n\t" - "ADD r1, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" "ADD r0, sp, #0x40\n\t" "BL fe_sq_op\n\t" - "MOV r12, #0x13\n\t" + "MOV r12, #19\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_fe_pow22523_3:\n\t" @@ -4693,7 +4692,7 @@ WC_OMIT_FRAME_POINTER void fe_pow22523(fe r, const fe a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_fe_pow22523_3_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -4701,23 +4700,23 @@ WC_OMIT_FRAME_POINTER void fe_pow22523(fe r, const fe a) #else "BNE.N L_fe_pow22523_3_%=\n\t" #endif - "ADD r2, sp, #0x20\n\t" + "ADD r2, sp, #32\n\t" "ADD r1, sp, #0x40\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r0, sp, #32\n\t" "BL fe_mul_op\n\t" - "MOV r12, #0xa\n\t" + "MOV r12, #10\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_fe_pow22523_4:\n\t" #else "L_fe_pow22523_4_%=:\n\t" #endif - "ADD r1, sp, #0x20\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" + "ADD r0, sp, #32\n\t" "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_fe_pow22523_4_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -4726,25 +4725,25 @@ WC_OMIT_FRAME_POINTER void fe_pow22523(fe r, const fe a) "BNE.N L_fe_pow22523_4_%=\n\t" #endif "MOV r2, sp\n\t" - "ADD r1, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" "MOV r0, sp\n\t" "BL fe_mul_op\n\t" "MOV r1, sp\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r0, sp, #32\n\t" "BL fe_sq_op\n\t" - "MOV r12, #0x31\n\t" + "MOV r12, #49\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_fe_pow22523_5:\n\t" #else "L_fe_pow22523_5_%=:\n\t" #endif - "ADD r1, sp, #0x20\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" + "ADD r0, sp, #32\n\t" "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_fe_pow22523_5_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -4753,10 +4752,10 @@ WC_OMIT_FRAME_POINTER void fe_pow22523(fe r, const fe a) "BNE.N L_fe_pow22523_5_%=\n\t" #endif "MOV r2, sp\n\t" - "ADD r1, sp, #0x20\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" + "ADD r0, sp, #32\n\t" "BL fe_mul_op\n\t" - "ADD r1, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" "ADD r0, sp, #0x40\n\t" "BL fe_sq_op\n\t" "MOV r12, #0x63\n\t" @@ -4771,7 +4770,7 @@ WC_OMIT_FRAME_POINTER void fe_pow22523(fe r, const fe a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_fe_pow22523_6_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -4779,23 +4778,23 @@ WC_OMIT_FRAME_POINTER void fe_pow22523(fe r, const fe a) #else "BNE.N L_fe_pow22523_6_%=\n\t" #endif - "ADD r2, sp, #0x20\n\t" + "ADD r2, sp, #32\n\t" "ADD r1, sp, #0x40\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r0, sp, #32\n\t" "BL fe_mul_op\n\t" - "MOV r12, #0x32\n\t" + "MOV r12, #50\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_fe_pow22523_7:\n\t" #else "L_fe_pow22523_7_%=:\n\t" #endif - "ADD r1, sp, #0x20\n\t" - "ADD r0, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" + "ADD r0, sp, #32\n\t" "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_fe_pow22523_7_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -4804,10 +4803,10 @@ WC_OMIT_FRAME_POINTER void fe_pow22523(fe r, const fe a) "BNE.N L_fe_pow22523_7_%=\n\t" #endif "MOV r2, sp\n\t" - "ADD r1, sp, #0x20\n\t" + "ADD r1, sp, #32\n\t" "MOV r0, sp\n\t" "BL fe_mul_op\n\t" - "MOV r12, #0x2\n\t" + "MOV r12, #2\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_fe_pow22523_8:\n\t" @@ -4819,7 +4818,7 @@ WC_OMIT_FRAME_POINTER void fe_pow22523(fe r, const fe a) "PUSH {r12}\n\t" "BL fe_sq_op\n\t" "POP {r12}\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_fe_pow22523_8_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -4858,7 +4857,7 @@ WC_OMIT_FRAME_POINTER void ge_p1p1_to_p2(ge_p2 * r, const ge_p1p1 * p) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "SUB sp, sp, #0x8\n\t" + "SUB sp, sp, #8\n\t" "STR %[r], [sp]\n\t" "STR %[p], [sp, #4]\n\t" "ADD r2, r1, #0x60\n\t" @@ -4866,8 +4865,8 @@ WC_OMIT_FRAME_POINTER void ge_p1p1_to_p2(ge_p2 * r, const ge_p1p1 * p) "LDR r0, [sp]\n\t" "LDR r1, [sp, #4]\n\t" "ADD r2, r1, #0x40\n\t" - "ADD r1, r1, #0x20\n\t" - "ADD r0, r0, #0x20\n\t" + "ADD r1, r1, #32\n\t" + "ADD r0, r0, #32\n\t" "BL fe_mul_op\n\t" "LDR r0, [sp]\n\t" "LDR r1, [sp, #4]\n\t" @@ -4875,7 +4874,7 @@ WC_OMIT_FRAME_POINTER void ge_p1p1_to_p2(ge_p2 * r, const ge_p1p1 * p) "ADD r1, r1, #0x40\n\t" "ADD r0, r0, #0x40\n\t" "BL fe_mul_op\n\t" - "ADD sp, sp, #0x8\n\t" + "ADD sp, sp, #8\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [p] "+r" (p) : @@ -4900,7 +4899,7 @@ WC_OMIT_FRAME_POINTER void ge_p1p1_to_p3(ge_p3 * r, const ge_p1p1 * p) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "SUB sp, sp, #0x8\n\t" + "SUB sp, sp, #8\n\t" "STR %[r], [sp]\n\t" "STR %[p], [sp, #4]\n\t" "ADD r2, r1, #0x60\n\t" @@ -4908,8 +4907,8 @@ WC_OMIT_FRAME_POINTER void ge_p1p1_to_p3(ge_p3 * r, const ge_p1p1 * p) "LDR r0, [sp]\n\t" "LDR r1, [sp, #4]\n\t" "ADD r2, r1, #0x40\n\t" - "ADD r1, r1, #0x20\n\t" - "ADD r0, r0, #0x20\n\t" + "ADD r1, r1, #32\n\t" + "ADD r0, r0, #32\n\t" "BL fe_mul_op\n\t" "LDR r0, [sp]\n\t" "LDR r1, [sp, #4]\n\t" @@ -4919,10 +4918,10 @@ WC_OMIT_FRAME_POINTER void ge_p1p1_to_p3(ge_p3 * r, const ge_p1p1 * p) "BL fe_mul_op\n\t" "LDR r0, [sp]\n\t" "LDR r1, [sp, #4]\n\t" - "ADD r2, r1, #0x20\n\t" + "ADD r2, r1, #32\n\t" "ADD r0, r0, #0x60\n\t" "BL fe_mul_op\n\t" - "ADD sp, sp, #0x8\n\t" + "ADD sp, sp, #8\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [p] "+r" (p) : @@ -4947,19 +4946,19 @@ WC_OMIT_FRAME_POINTER void ge_p2_dbl(ge_p1p1 * r, const ge_p2 * p) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "SUB sp, sp, #0x8\n\t" + "SUB sp, sp, #8\n\t" "STR %[r], [sp]\n\t" "STR %[p], [sp, #4]\n\t" "BL fe_sq_op\n\t" "LDR r0, [sp]\n\t" "LDR r1, [sp, #4]\n\t" - "ADD r1, r1, #0x20\n\t" + "ADD r1, r1, #32\n\t" "ADD r0, r0, #0x40\n\t" "BL fe_sq_op\n\t" "LDR r0, [sp]\n\t" "LDR r1, [sp, #4]\n\t" - "ADD r2, r1, #0x20\n\t" - "ADD r0, r0, #0x20\n\t" + "ADD r2, r1, #32\n\t" + "ADD r0, r0, #32\n\t" "BL fe_add_op\n\t" "MOV r1, r0\n\t" "ADD r0, r0, #0x40\n\t" @@ -4968,20 +4967,20 @@ WC_OMIT_FRAME_POINTER void ge_p2_dbl(ge_p1p1 * r, const ge_p2 * p) "MOV r3, r0\n\t" "ADD r2, r0, #0x40\n\t" "ADD r1, r0, #0x40\n\t" - "ADD r0, r0, #0x20\n\t" + "ADD r0, r0, #32\n\t" "BL fe_add_sub_op\n\t" "MOV r2, r0\n\t" "ADD r1, r0, #0x40\n\t" - "SUB r0, r0, #0x20\n\t" + "SUB r0, r0, #32\n\t" "BL fe_sub_op\n\t" "LDR r1, [sp, #4]\n\t" "ADD r1, r1, #0x40\n\t" "ADD r0, r0, #0x60\n\t" "BL fe_sq2\n\t" - "SUB r2, r0, #0x20\n\t" + "SUB r2, r0, #32\n\t" "MOV r1, r0\n\t" "BL fe_sub_op\n\t" - "ADD sp, sp, #0x8\n\t" + "ADD sp, sp, #8\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [p] "+r" (p) : @@ -5009,27 +5008,27 @@ WC_OMIT_FRAME_POINTER void ge_madd(ge_p1p1 * r, const ge_p3 * p, #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "SUB sp, sp, #0xc\n\t" + "SUB sp, sp, #12\n\t" "STR %[r], [sp]\n\t" "STR %[p], [sp, #4]\n\t" "STR %[q], [sp, #8]\n\t" "MOV r2, r1\n\t" - "ADD r1, r1, #0x20\n\t" + "ADD r1, r1, #32\n\t" "BL fe_add_op\n\t" "LDR r1, [sp, #4]\n\t" "MOV r2, r1\n\t" - "ADD r1, r1, #0x20\n\t" - "ADD r0, r0, #0x20\n\t" + "ADD r1, r1, #32\n\t" + "ADD r0, r0, #32\n\t" "BL fe_sub_op\n\t" "LDR r2, [sp, #8]\n\t" - "SUB r1, r0, #0x20\n\t" - "ADD r0, r0, #0x20\n\t" + "SUB r1, r0, #32\n\t" + "ADD r0, r0, #32\n\t" "BL fe_mul_op\n\t" "LDR r0, [sp]\n\t" "LDR r2, [sp, #8]\n\t" - "ADD r2, r2, #0x20\n\t" - "ADD r1, r0, #0x20\n\t" - "ADD r0, r0, #0x20\n\t" + "ADD r2, r2, #32\n\t" + "ADD r1, r0, #32\n\t" + "ADD r0, r0, #32\n\t" "BL fe_mul_op\n\t" "LDR r0, [sp]\n\t" "LDR r1, [sp, #8]\n\t" @@ -5039,14 +5038,14 @@ WC_OMIT_FRAME_POINTER void ge_madd(ge_p1p1 * r, const ge_p3 * p, "ADD r0, r0, #0x60\n\t" "BL fe_mul_op\n\t" "LDR r0, [sp]\n\t" - "ADD r3, r0, #0x20\n\t" + "ADD r3, r0, #32\n\t" "ADD r2, r0, #0x40\n\t" "MOV r1, r0\n\t" - "ADD r0, r0, #0x20\n\t" + "ADD r0, r0, #32\n\t" "BL fe_add_sub_op\n\t" "LDR r1, [sp, #4]\n\t" "ADD r1, r1, #0x40\n\t" - "ADD r0, r0, #0x20\n\t" + "ADD r0, r0, #32\n\t" /* Double */ "LDM r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "ADDS r4, r4, r4\n\t" @@ -5056,28 +5055,28 @@ WC_OMIT_FRAME_POINTER void ge_madd(ge_p1p1 * r, const ge_p3 * p, "ADCS r8, r8, r8\n\t" "ADCS r9, r9, r9\n\t" "ADCS r10, r10, r10\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "ADCS r11, r11, r11\n\t" - "ADC lr, lr, #0x0\n\t" - "MOV r12, #0x13\n\t" + "ADC lr, lr, #0\n\t" + "MOV r12, #19\n\t" "LSL lr, lr, #1\n\t" "ORR lr, lr, r11, LSR #31\n\t" "MUL r12, lr, r12\n\t" "ADDS r4, r4, r12\n\t" - "ADCS r5, r5, #0x0\n\t" - "ADCS r6, r6, #0x0\n\t" - "ADCS r7, r7, #0x0\n\t" - "ADCS r8, r8, #0x0\n\t" - "ADCS r9, r9, #0x0\n\t" + "ADCS r5, r5, #0\n\t" + "ADCS r6, r6, #0\n\t" + "ADCS r7, r7, #0\n\t" + "ADCS r8, r8, #0\n\t" + "ADCS r9, r9, #0\n\t" "BFC r11, #31, #1\n\t" - "ADCS r10, r10, #0x0\n\t" - "ADC r11, r11, #0x0\n\t" + "ADCS r10, r10, #0\n\t" + "ADC r11, r11, #0\n\t" "STM r0, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" /* Done Double */ - "ADD r3, r0, #0x20\n\t" - "ADD r1, r0, #0x20\n\t" + "ADD r3, r0, #32\n\t" + "ADD r1, r0, #32\n\t" "BL fe_add_sub_op\n\t" - "ADD sp, sp, #0xc\n\t" + "ADD sp, sp, #12\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : @@ -5105,27 +5104,27 @@ WC_OMIT_FRAME_POINTER void ge_msub(ge_p1p1 * r, const ge_p3 * p, #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "SUB sp, sp, #0xc\n\t" + "SUB sp, sp, #12\n\t" "STR %[r], [sp]\n\t" "STR %[p], [sp, #4]\n\t" "STR %[q], [sp, #8]\n\t" "MOV r2, r1\n\t" - "ADD r1, r1, #0x20\n\t" + "ADD r1, r1, #32\n\t" "BL fe_add_op\n\t" "LDR r1, [sp, #4]\n\t" "MOV r2, r1\n\t" - "ADD r1, r1, #0x20\n\t" - "ADD r0, r0, #0x20\n\t" + "ADD r1, r1, #32\n\t" + "ADD r0, r0, #32\n\t" "BL fe_sub_op\n\t" "LDR r2, [sp, #8]\n\t" - "ADD r2, r2, #0x20\n\t" - "SUB r1, r0, #0x20\n\t" - "ADD r0, r0, #0x20\n\t" + "ADD r2, r2, #32\n\t" + "SUB r1, r0, #32\n\t" + "ADD r0, r0, #32\n\t" "BL fe_mul_op\n\t" "LDR r0, [sp]\n\t" "LDR r2, [sp, #8]\n\t" - "ADD r1, r0, #0x20\n\t" - "ADD r0, r0, #0x20\n\t" + "ADD r1, r0, #32\n\t" + "ADD r0, r0, #32\n\t" "BL fe_mul_op\n\t" "LDR r0, [sp]\n\t" "LDR r1, [sp, #8]\n\t" @@ -5135,14 +5134,14 @@ WC_OMIT_FRAME_POINTER void ge_msub(ge_p1p1 * r, const ge_p3 * p, "ADD r0, r0, #0x60\n\t" "BL fe_mul_op\n\t" "LDR r0, [sp]\n\t" - "ADD r3, r0, #0x20\n\t" + "ADD r3, r0, #32\n\t" "ADD r2, r0, #0x40\n\t" "MOV r1, r0\n\t" - "ADD r0, r0, #0x20\n\t" + "ADD r0, r0, #32\n\t" "BL fe_add_sub_op\n\t" "LDR r1, [sp, #4]\n\t" "ADD r1, r1, #0x40\n\t" - "ADD r0, r0, #0x20\n\t" + "ADD r0, r0, #32\n\t" /* Double */ "LDM r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "ADDS r4, r4, r4\n\t" @@ -5152,29 +5151,29 @@ WC_OMIT_FRAME_POINTER void ge_msub(ge_p1p1 * r, const ge_p3 * p, "ADCS r8, r8, r8\n\t" "ADCS r9, r9, r9\n\t" "ADCS r10, r10, r10\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "ADCS r11, r11, r11\n\t" - "ADC lr, lr, #0x0\n\t" - "MOV r12, #0x13\n\t" + "ADC lr, lr, #0\n\t" + "MOV r12, #19\n\t" "LSL lr, lr, #1\n\t" "ORR lr, lr, r11, LSR #31\n\t" "MUL r12, lr, r12\n\t" "ADDS r4, r4, r12\n\t" - "ADCS r5, r5, #0x0\n\t" - "ADCS r6, r6, #0x0\n\t" - "ADCS r7, r7, #0x0\n\t" - "ADCS r8, r8, #0x0\n\t" - "ADCS r9, r9, #0x0\n\t" + "ADCS r5, r5, #0\n\t" + "ADCS r6, r6, #0\n\t" + "ADCS r7, r7, #0\n\t" + "ADCS r8, r8, #0\n\t" + "ADCS r9, r9, #0\n\t" "BFC r11, #31, #1\n\t" - "ADCS r10, r10, #0x0\n\t" - "ADC r11, r11, #0x0\n\t" + "ADCS r10, r10, #0\n\t" + "ADC r11, r11, #0\n\t" "STM r0, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" /* Done Double */ - "ADD r3, r0, #0x20\n\t" + "ADD r3, r0, #32\n\t" "MOV r1, r0\n\t" - "ADD r0, r0, #0x20\n\t" + "ADD r0, r0, #32\n\t" "BL fe_add_sub_op\n\t" - "ADD sp, sp, #0xc\n\t" + "ADD sp, sp, #12\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : @@ -5202,13 +5201,13 @@ WC_OMIT_FRAME_POINTER void ge_add(ge_p1p1 * r, const ge_p3 * p, #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "SUB sp, sp, #0x2c\n\t" + "SUB sp, sp, #44\n\t" "STR %[r], [sp]\n\t" "STR %[p], [sp, #4]\n\t" "STR %[q], [sp, #8]\n\t" "MOV r3, r1\n\t" - "ADD r2, r1, #0x20\n\t" - "ADD r1, r0, #0x20\n\t" + "ADD r2, r1, #32\n\t" + "ADD r1, r0, #32\n\t" "BL fe_add_sub_op\n\t" "LDR r2, [sp, #8]\n\t" "MOV r1, r0\n\t" @@ -5216,9 +5215,9 @@ WC_OMIT_FRAME_POINTER void ge_add(ge_p1p1 * r, const ge_p3 * p, "BL fe_mul_op\n\t" "LDR r0, [sp]\n\t" "LDR r2, [sp, #8]\n\t" - "ADD r2, r2, #0x20\n\t" - "ADD r1, r0, #0x20\n\t" - "ADD r0, r0, #0x20\n\t" + "ADD r2, r2, #32\n\t" + "ADD r1, r0, #32\n\t" + "ADD r0, r0, #32\n\t" "BL fe_mul_op\n\t" "LDR r0, [sp]\n\t" "LDR r1, [sp, #8]\n\t" @@ -5234,7 +5233,7 @@ WC_OMIT_FRAME_POINTER void ge_add(ge_p1p1 * r, const ge_p3 * p, "ADD r1, r1, #0x40\n\t" "BL fe_mul_op\n\t" "LDR r1, [sp]\n\t" - "ADD r0, sp, #0xc\n\t" + "ADD r0, sp, #12\n\t" /* Double */ "LDM r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "ADDS r4, r4, r4\n\t" @@ -5244,34 +5243,34 @@ WC_OMIT_FRAME_POINTER void ge_add(ge_p1p1 * r, const ge_p3 * p, "ADCS r8, r8, r8\n\t" "ADCS r9, r9, r9\n\t" "ADCS r10, r10, r10\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "ADCS r11, r11, r11\n\t" - "ADC lr, lr, #0x0\n\t" - "MOV r12, #0x13\n\t" + "ADC lr, lr, #0\n\t" + "MOV r12, #19\n\t" "LSL lr, lr, #1\n\t" "ORR lr, lr, r11, LSR #31\n\t" "MUL r12, lr, r12\n\t" "ADDS r4, r4, r12\n\t" - "ADCS r5, r5, #0x0\n\t" - "ADCS r6, r6, #0x0\n\t" - "ADCS r7, r7, #0x0\n\t" - "ADCS r8, r8, #0x0\n\t" - "ADCS r9, r9, #0x0\n\t" + "ADCS r5, r5, #0\n\t" + "ADCS r6, r6, #0\n\t" + "ADCS r7, r7, #0\n\t" + "ADCS r8, r8, #0\n\t" + "ADCS r9, r9, #0\n\t" "BFC r11, #31, #1\n\t" - "ADCS r10, r10, #0x0\n\t" - "ADC r11, r11, #0x0\n\t" + "ADCS r10, r10, #0\n\t" + "ADC r11, r11, #0\n\t" "STM r0, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" /* Done Double */ - "ADD r3, r1, #0x20\n\t" + "ADD r3, r1, #32\n\t" "ADD r2, r1, #0x40\n\t" - "ADD r0, r1, #0x20\n\t" + "ADD r0, r1, #32\n\t" "BL fe_add_sub_op\n\t" "ADD r3, r0, #0x40\n\t" - "ADD r2, sp, #0xc\n\t" + "ADD r2, sp, #12\n\t" "ADD r1, r0, #0x40\n\t" - "ADD r0, r0, #0x20\n\t" + "ADD r0, r0, #32\n\t" "BL fe_add_sub_op\n\t" - "ADD sp, sp, #0x2c\n\t" + "ADD sp, sp, #44\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : @@ -5299,23 +5298,23 @@ WC_OMIT_FRAME_POINTER void ge_sub(ge_p1p1 * r, const ge_p3 * p, #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "SUB sp, sp, #0x2c\n\t" + "SUB sp, sp, #44\n\t" "STR %[r], [sp]\n\t" "STR %[p], [sp, #4]\n\t" "STR %[q], [sp, #8]\n\t" "MOV r3, r1\n\t" - "ADD r2, r1, #0x20\n\t" - "ADD r1, r0, #0x20\n\t" + "ADD r2, r1, #32\n\t" + "ADD r1, r0, #32\n\t" "BL fe_add_sub_op\n\t" "LDR r2, [sp, #8]\n\t" - "ADD r2, r2, #0x20\n\t" + "ADD r2, r2, #32\n\t" "MOV r1, r0\n\t" "ADD r0, r0, #0x40\n\t" "BL fe_mul_op\n\t" "LDR r0, [sp]\n\t" "LDR r2, [sp, #8]\n\t" - "ADD r1, r0, #0x20\n\t" - "ADD r0, r0, #0x20\n\t" + "ADD r1, r0, #32\n\t" + "ADD r0, r0, #32\n\t" "BL fe_mul_op\n\t" "LDR r0, [sp]\n\t" "LDR r1, [sp, #8]\n\t" @@ -5331,7 +5330,7 @@ WC_OMIT_FRAME_POINTER void ge_sub(ge_p1p1 * r, const ge_p3 * p, "ADD r1, r1, #0x40\n\t" "BL fe_mul_op\n\t" "LDR r1, [sp]\n\t" - "ADD r0, sp, #0xc\n\t" + "ADD r0, sp, #12\n\t" /* Double */ "LDM r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "ADDS r4, r4, r4\n\t" @@ -5341,34 +5340,34 @@ WC_OMIT_FRAME_POINTER void ge_sub(ge_p1p1 * r, const ge_p3 * p, "ADCS r8, r8, r8\n\t" "ADCS r9, r9, r9\n\t" "ADCS r10, r10, r10\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "ADCS r11, r11, r11\n\t" - "ADC lr, lr, #0x0\n\t" - "MOV r12, #0x13\n\t" + "ADC lr, lr, #0\n\t" + "MOV r12, #19\n\t" "LSL lr, lr, #1\n\t" "ORR lr, lr, r11, LSR #31\n\t" "MUL r12, lr, r12\n\t" "ADDS r4, r4, r12\n\t" - "ADCS r5, r5, #0x0\n\t" - "ADCS r6, r6, #0x0\n\t" - "ADCS r7, r7, #0x0\n\t" - "ADCS r8, r8, #0x0\n\t" - "ADCS r9, r9, #0x0\n\t" + "ADCS r5, r5, #0\n\t" + "ADCS r6, r6, #0\n\t" + "ADCS r7, r7, #0\n\t" + "ADCS r8, r8, #0\n\t" + "ADCS r9, r9, #0\n\t" "BFC r11, #31, #1\n\t" - "ADCS r10, r10, #0x0\n\t" - "ADC r11, r11, #0x0\n\t" + "ADCS r10, r10, #0\n\t" + "ADC r11, r11, #0\n\t" "STM r0, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" /* Done Double */ - "ADD r3, r1, #0x20\n\t" + "ADD r3, r1, #32\n\t" "ADD r2, r1, #0x40\n\t" - "ADD r0, r1, #0x20\n\t" + "ADD r0, r1, #32\n\t" "BL fe_add_sub_op\n\t" "ADD r3, r0, #0x40\n\t" - "ADD r2, sp, #0xc\n\t" - "ADD r1, r0, #0x20\n\t" + "ADD r2, sp, #12\n\t" + "ADD r1, r0, #32\n\t" "ADD r0, r0, #0x40\n\t" "BL fe_add_sub_op\n\t" - "ADD sp, sp, #0x2c\n\t" + "ADD sp, sp, #44\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : @@ -5395,10 +5394,10 @@ WC_OMIT_FRAME_POINTER void sc_reduce(byte* s) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "SUB sp, sp, #0x38\n\t" + "SUB sp, sp, #56\n\t" "STR %[s], [sp, #52]\n\t" /* Load bits 252-511 */ - "ADD %[s], %[s], #0x1c\n\t" + "ADD %[s], %[s], #28\n\t" "LDM %[s], {r1, r2, r3, r4, r5, r6, r7, r8, r9}\n\t" "LSR lr, r9, #24\n\t" "LSL r9, r9, #4\n\t" @@ -5418,206 +5417,206 @@ WC_OMIT_FRAME_POINTER void sc_reduce(byte* s) "LSL r2, r2, #4\n\t" "ORR r2, r2, r1, LSR #28\n\t" "BFC r9, #28, #4\n\t" - "SUB %[s], %[s], #0x1c\n\t" + "SUB %[s], %[s], #28\n\t" /* Add order times bits 504..511 */ "MOV r10, #0x2c13\n\t" "MOVT r10, #0xa30a\n\t" "MOV r11, #0x9ce5\n\t" "MOVT r11, #0xa7ed\n\t" - "MOV r1, #0x0\n\t" + "MOV r1, #0\n\t" "UMLAL r2, r1, r10, lr\n\t" "ADDS r3, r3, r1\n\t" - "MOV r1, #0x0\n\t" - "ADC r1, r1, #0x0\n\t" + "MOV r1, #0\n\t" + "ADC r1, r1, #0\n\t" "UMLAL r3, r1, r11, lr\n\t" "MOV r10, #0x6329\n\t" "MOVT r10, #0x5d08\n\t" "MOV r11, #0x621\n\t" "MOVT r11, #0xeb21\n\t" "ADDS r4, r4, r1\n\t" - "MOV r1, #0x0\n\t" - "ADC r1, r1, #0x0\n\t" + "MOV r1, #0\n\t" + "ADC r1, r1, #0\n\t" "UMLAL r4, r1, r10, lr\n\t" "ADDS r5, r5, r1\n\t" - "MOV r1, #0x0\n\t" - "ADC r1, r1, #0x0\n\t" + "MOV r1, #0\n\t" + "ADC r1, r1, #0\n\t" "UMLAL r5, r1, r11, lr\n\t" "ADDS r6, r6, r1\n\t" - "ADCS r7, r7, #0x0\n\t" - "ADCS r8, r8, #0x0\n\t" - "ADC r9, r9, #0x0\n\t" + "ADCS r7, r7, #0\n\t" + "ADCS r8, r8, #0\n\t" + "ADC r9, r9, #0\n\t" "SUBS r6, r6, lr\n\t" - "SBCS r7, r7, #0x0\n\t" - "SBCS r8, r8, #0x0\n\t" - "SBC r9, r9, #0x0\n\t" + "SBCS r7, r7, #0\n\t" + "SBCS r8, r8, #0\n\t" + "SBC r9, r9, #0\n\t" /* Sub product of top 8 words and order */ "MOV r12, sp\n\t" "MOV r1, #0x2c13\n\t" "MOVT r1, #0xa30a\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "LDM %[s]!, {r10, r11}\n\t" "UMLAL r10, lr, r2, r1\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r3, r1\n\t" "STM r12!, {r10, r11}\n\t" "LDM %[s]!, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r10, lr, r4, r1\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r5, r1\n\t" "STM r12!, {r10, r11}\n\t" "LDM %[s]!, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r10, lr, r6, r1\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r7, r1\n\t" "STM r12!, {r10, r11}\n\t" "LDM %[s]!, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r10, lr, r8, r1\n\t" "BFC r11, #28, #4\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r9, r1\n\t" "STM r12!, {r10, r11, lr}\n\t" - "SUB %[s], %[s], #0x10\n\t" - "SUB r12, r12, #0x20\n\t" + "SUB %[s], %[s], #16\n\t" + "SUB r12, r12, #32\n\t" "MOV r1, #0x9ce5\n\t" "MOVT r1, #0xa7ed\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "LDM r12, {r10, r11}\n\t" "UMLAL r10, lr, r2, r1\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r3, r1\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r10, lr, r4, r1\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r5, r1\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r10, lr, r6, r1\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r7, r1\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r10, lr, r8, r1\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r9, r1\n\t" "STM r12!, {r10, r11, lr}\n\t" - "SUB r12, r12, #0x20\n\t" + "SUB r12, r12, #32\n\t" "MOV r1, #0x6329\n\t" "MOVT r1, #0x5d08\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "LDM r12, {r10, r11}\n\t" "UMLAL r10, lr, r2, r1\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r3, r1\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r10, lr, r4, r1\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r5, r1\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r10, lr, r6, r1\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r7, r1\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r10, lr, r8, r1\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r9, r1\n\t" "STM r12!, {r10, r11, lr}\n\t" - "SUB r12, r12, #0x20\n\t" + "SUB r12, r12, #32\n\t" "MOV r1, #0x621\n\t" "MOVT r1, #0xeb21\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "LDM r12, {r10, r11}\n\t" "UMLAL r10, lr, r2, r1\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r3, r1\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r10, lr, r4, r1\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r5, r1\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r10, lr, r6, r1\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r7, r1\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r10, lr, r8, r1\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r9, r1\n\t" "STM r12!, {r10, r11, lr}\n\t" - "SUB r12, r12, #0x20\n\t" + "SUB r12, r12, #32\n\t" /* Subtract at 4 * 32 */ "LDM r12, {r10, r11}\n\t" "SUBS r10, r10, r2\n\t" @@ -5635,7 +5634,7 @@ WC_OMIT_FRAME_POINTER void sc_reduce(byte* s) "SBCS r10, r10, r8\n\t" "SBC r11, r11, r9\n\t" "STM r12!, {r10, r11}\n\t" - "SUB r12, r12, #0x24\n\t" + "SUB r12, r12, #36\n\t" "ASR lr, r11, #25\n\t" /* Conditionally subtract order starting at bit 125 */ "MOV r1, #0xa0000000\n\t" @@ -5664,19 +5663,19 @@ WC_OMIT_FRAME_POINTER void sc_reduce(byte* s) "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" "ADCS r10, r10, r5\n\t" - "ADCS r11, r11, #0x0\n\t" + "ADCS r11, r11, #0\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" - "ADCS r10, r10, #0x0\n\t" - "ADCS r11, r11, #0x0\n\t" + "ADCS r10, r10, #0\n\t" + "ADCS r11, r11, #0\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10}\n\t" - "ADCS r10, r10, #0x0\n\t" + "ADCS r10, r10, #0\n\t" "STM r12!, {r10}\n\t" - "SUB %[s], %[s], #0x10\n\t" + "SUB %[s], %[s], #16\n\t" "MOV r12, sp\n\t" /* Load bits 252-376 */ - "ADD r12, r12, #0x1c\n\t" + "ADD r12, r12, #28\n\t" "LDM r12, {r1, r2, r3, r4, r5}\n\t" "LSL r5, r5, #4\n\t" "ORR r5, r5, r4, LSR #28\n\t" @@ -5687,89 +5686,89 @@ WC_OMIT_FRAME_POINTER void sc_reduce(byte* s) "LSL r2, r2, #4\n\t" "ORR r2, r2, r1, LSR #28\n\t" "BFC r5, #29, #3\n\t" - "SUB r12, r12, #0x1c\n\t" + "SUB r12, r12, #28\n\t" /* Sub product of top 4 words and order */ "MOV %[s], sp\n\t" /* * -5cf5d3ed */ "MOV r1, #0x2c13\n\t" "MOVT r1, #0xa30a\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "LDM %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, lr, r2, r1\n\t" "ADDS r7, r7, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r7, lr, r3, r1\n\t" "ADDS r8, r8, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r8, lr, r4, r1\n\t" "ADDS r9, r9, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r9, lr, r5, r1\n\t" "STM %[s], {r6, r7, r8, r9}\n\t" - "ADD %[s], %[s], #0x4\n\t" + "ADD %[s], %[s], #4\n\t" /* * -5812631b */ "MOV r1, #0x9ce5\n\t" "MOVT r1, #0xa7ed\n\t" - "MOV r10, #0x0\n\t" + "MOV r10, #0\n\t" "LDM %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, r10, r2, r1\n\t" "ADDS r7, r7, r10\n\t" - "MOV r10, #0x0\n\t" - "ADC r10, r10, #0x0\n\t" + "MOV r10, #0\n\t" + "ADC r10, r10, #0\n\t" "UMLAL r7, r10, r3, r1\n\t" "ADDS r8, r8, r10\n\t" - "MOV r10, #0x0\n\t" - "ADC r10, r10, #0x0\n\t" + "MOV r10, #0\n\t" + "ADC r10, r10, #0\n\t" "UMLAL r8, r10, r4, r1\n\t" "ADDS r9, r9, r10\n\t" - "MOV r10, #0x0\n\t" - "ADC r10, r10, #0x0\n\t" + "MOV r10, #0\n\t" + "ADC r10, r10, #0\n\t" "UMLAL r9, r10, r5, r1\n\t" "STM %[s], {r6, r7, r8, r9}\n\t" - "ADD %[s], %[s], #0x4\n\t" + "ADD %[s], %[s], #4\n\t" /* * -a2f79cd7 */ "MOV r1, #0x6329\n\t" "MOVT r1, #0x5d08\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "LDM %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, r11, r2, r1\n\t" "ADDS r7, r7, r11\n\t" - "MOV r11, #0x0\n\t" - "ADC r11, r11, #0x0\n\t" + "MOV r11, #0\n\t" + "ADC r11, r11, #0\n\t" "UMLAL r7, r11, r3, r1\n\t" "ADDS r8, r8, r11\n\t" - "MOV r11, #0x0\n\t" - "ADC r11, r11, #0x0\n\t" + "MOV r11, #0\n\t" + "ADC r11, r11, #0\n\t" "UMLAL r8, r11, r4, r1\n\t" "ADDS r9, r9, r11\n\t" - "MOV r11, #0x0\n\t" - "ADC r11, r11, #0x0\n\t" + "MOV r11, #0\n\t" + "ADC r11, r11, #0\n\t" "UMLAL r9, r11, r5, r1\n\t" "STM %[s], {r6, r7, r8, r9}\n\t" - "ADD %[s], %[s], #0x4\n\t" + "ADD %[s], %[s], #4\n\t" /* * -14def9df */ "MOV r1, #0x621\n\t" "MOVT r1, #0xeb21\n\t" - "MOV r12, #0x0\n\t" + "MOV r12, #0\n\t" "LDM %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, r12, r2, r1\n\t" "ADDS r7, r7, r12\n\t" - "MOV r12, #0x0\n\t" - "ADC r12, r12, #0x0\n\t" + "MOV r12, #0\n\t" + "ADC r12, r12, #0\n\t" "UMLAL r7, r12, r3, r1\n\t" "ADDS r8, r8, r12\n\t" - "MOV r12, #0x0\n\t" - "ADC r12, r12, #0x0\n\t" + "MOV r12, #0\n\t" + "ADC r12, r12, #0\n\t" "UMLAL r8, r12, r4, r1\n\t" "ADDS r9, r9, r12\n\t" - "MOV r12, #0x0\n\t" - "ADC r12, r12, #0x0\n\t" + "MOV r12, #0\n\t" + "ADC r12, r12, #0\n\t" "UMLAL r9, r12, r5, r1\n\t" "STM %[s], {r6, r7, r8, r9}\n\t" - "ADD %[s], %[s], #0x4\n\t" + "ADD %[s], %[s], #4\n\t" /* Add overflows at 4 * 32 */ "LDM %[s], {r6, r7, r8, r9}\n\t" "BFC r9, #28, #4\n\t" @@ -5783,7 +5782,7 @@ WC_OMIT_FRAME_POINTER void sc_reduce(byte* s) "SBCS r8, r8, r4\n\t" "SBCS r9, r9, r5\n\t" "SBC r1, r1, r1\n\t" - "SUB %[s], %[s], #0x10\n\t" + "SUB %[s], %[s], #16\n\t" "LDM %[s], {r2, r3, r4, r5}\n\t" "MOV r10, #0xd3ed\n\t" "MOVT r10, #0x5cf5\n\t" @@ -5801,16 +5800,16 @@ WC_OMIT_FRAME_POINTER void sc_reduce(byte* s) "ADCS r3, r3, r11\n\t" "ADCS r4, r4, r12\n\t" "ADCS r5, r5, lr\n\t" - "ADCS r6, r6, #0x0\n\t" - "ADCS r7, r7, #0x0\n\t" + "ADCS r6, r6, #0\n\t" + "ADCS r7, r7, #0\n\t" "AND r1, r1, #0x10000000\n\t" - "ADCS r8, r8, #0x0\n\t" + "ADCS r8, r8, #0\n\t" "ADC r9, r9, r1\n\t" "BFC r9, #28, #4\n\t" /* Store result */ "LDR %[s], [sp, #52]\n\t" "STM %[s], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" - "ADD sp, sp, #0x38\n\t" + "ADD sp, sp, #56\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [s] "+r" (s) : @@ -5835,10 +5834,10 @@ WC_OMIT_FRAME_POINTER void sc_reduce(byte* s) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "SUB sp, sp, #0x38\n\t" + "SUB sp, sp, #56\n\t" "STR %[s], [sp, #52]\n\t" /* Load bits 252-511 */ - "ADD %[s], %[s], #0x1c\n\t" + "ADD %[s], %[s], #28\n\t" "LDM %[s], {r1, r2, r3, r4, r5, r6, r7, r8, r9}\n\t" "LSR lr, r9, #24\n\t" "LSL r9, r9, #4\n\t" @@ -5858,13 +5857,13 @@ WC_OMIT_FRAME_POINTER void sc_reduce(byte* s) "LSL r2, r2, #4\n\t" "ORR r2, r2, r1, LSR #28\n\t" "BFC r9, #28, #4\n\t" - "SUB %[s], %[s], #0x1c\n\t" + "SUB %[s], %[s], #28\n\t" /* Add order times bits 504..511 */ "MOV r10, #0x2c13\n\t" "MOVT r10, #0xa30a\n\t" "MOV r11, #0x9ce5\n\t" "MOVT r11, #0xa7ed\n\t" - "MOV r1, #0x0\n\t" + "MOV r1, #0\n\t" "UMLAL r2, r1, r10, lr\n\t" "UMAAL r3, r1, r11, lr\n\t" "MOV r10, #0x6329\n\t" @@ -5874,18 +5873,18 @@ WC_OMIT_FRAME_POINTER void sc_reduce(byte* s) "UMAAL r4, r1, r10, lr\n\t" "UMAAL r5, r1, r11, lr\n\t" "ADDS r6, r6, r1\n\t" - "ADCS r7, r7, #0x0\n\t" - "ADCS r8, r8, #0x0\n\t" - "ADC r9, r9, #0x0\n\t" + "ADCS r7, r7, #0\n\t" + "ADCS r8, r8, #0\n\t" + "ADC r9, r9, #0\n\t" "SUBS r6, r6, lr\n\t" - "SBCS r7, r7, #0x0\n\t" - "SBCS r8, r8, #0x0\n\t" - "SBC r9, r9, #0x0\n\t" + "SBCS r7, r7, #0\n\t" + "SBCS r8, r8, #0\n\t" + "SBC r9, r9, #0\n\t" /* Sub product of top 8 words and order */ "MOV r12, sp\n\t" "MOV r1, #0x2c13\n\t" "MOVT r1, #0xa30a\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "LDM %[s]!, {r10, r11}\n\t" "UMLAL r10, lr, r2, r1\n\t" "UMAAL r11, lr, r3, r1\n\t" @@ -5903,11 +5902,11 @@ WC_OMIT_FRAME_POINTER void sc_reduce(byte* s) "BFC r11, #28, #4\n\t" "UMAAL r11, lr, r9, r1\n\t" "STM r12!, {r10, r11, lr}\n\t" - "SUB %[s], %[s], #0x10\n\t" - "SUB r12, r12, #0x20\n\t" + "SUB %[s], %[s], #16\n\t" + "SUB r12, r12, #32\n\t" "MOV r1, #0x9ce5\n\t" "MOVT r1, #0xa7ed\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "LDM r12, {r10, r11}\n\t" "UMLAL r10, lr, r2, r1\n\t" "UMAAL r11, lr, r3, r1\n\t" @@ -5924,10 +5923,10 @@ WC_OMIT_FRAME_POINTER void sc_reduce(byte* s) "UMAAL r10, lr, r8, r1\n\t" "UMAAL r11, lr, r9, r1\n\t" "STM r12!, {r10, r11, lr}\n\t" - "SUB r12, r12, #0x20\n\t" + "SUB r12, r12, #32\n\t" "MOV r1, #0x6329\n\t" "MOVT r1, #0x5d08\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "LDM r12, {r10, r11}\n\t" "UMLAL r10, lr, r2, r1\n\t" "UMAAL r11, lr, r3, r1\n\t" @@ -5944,10 +5943,10 @@ WC_OMIT_FRAME_POINTER void sc_reduce(byte* s) "UMAAL r10, lr, r8, r1\n\t" "UMAAL r11, lr, r9, r1\n\t" "STM r12!, {r10, r11, lr}\n\t" - "SUB r12, r12, #0x20\n\t" + "SUB r12, r12, #32\n\t" "MOV r1, #0x621\n\t" "MOVT r1, #0xeb21\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "LDM r12, {r10, r11}\n\t" "UMLAL r10, lr, r2, r1\n\t" "UMAAL r11, lr, r3, r1\n\t" @@ -5964,7 +5963,7 @@ WC_OMIT_FRAME_POINTER void sc_reduce(byte* s) "UMAAL r10, lr, r8, r1\n\t" "UMAAL r11, lr, r9, r1\n\t" "STM r12!, {r10, r11, lr}\n\t" - "SUB r12, r12, #0x20\n\t" + "SUB r12, r12, #32\n\t" /* Subtract at 4 * 32 */ "LDM r12, {r10, r11}\n\t" "SUBS r10, r10, r2\n\t" @@ -5982,7 +5981,7 @@ WC_OMIT_FRAME_POINTER void sc_reduce(byte* s) "SBCS r10, r10, r8\n\t" "SBC r11, r11, r9\n\t" "STM r12!, {r10, r11}\n\t" - "SUB r12, r12, #0x24\n\t" + "SUB r12, r12, #36\n\t" "ASR lr, r11, #25\n\t" /* Conditionally subtract order starting at bit 125 */ "MOV r1, #0xa0000000\n\t" @@ -6011,19 +6010,19 @@ WC_OMIT_FRAME_POINTER void sc_reduce(byte* s) "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" "ADCS r10, r10, r5\n\t" - "ADCS r11, r11, #0x0\n\t" + "ADCS r11, r11, #0\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" - "ADCS r10, r10, #0x0\n\t" - "ADCS r11, r11, #0x0\n\t" + "ADCS r10, r10, #0\n\t" + "ADCS r11, r11, #0\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10}\n\t" - "ADCS r10, r10, #0x0\n\t" + "ADCS r10, r10, #0\n\t" "STM r12!, {r10}\n\t" - "SUB %[s], %[s], #0x10\n\t" + "SUB %[s], %[s], #16\n\t" "MOV r12, sp\n\t" /* Load bits 252-376 */ - "ADD r12, r12, #0x1c\n\t" + "ADD r12, r12, #28\n\t" "LDM r12, {r1, r2, r3, r4, r5}\n\t" "LSL r5, r5, #4\n\t" "ORR r5, r5, r4, LSR #28\n\t" @@ -6034,53 +6033,53 @@ WC_OMIT_FRAME_POINTER void sc_reduce(byte* s) "LSL r2, r2, #4\n\t" "ORR r2, r2, r1, LSR #28\n\t" "BFC r5, #29, #3\n\t" - "SUB r12, r12, #0x1c\n\t" + "SUB r12, r12, #28\n\t" /* Sub product of top 4 words and order */ "MOV %[s], sp\n\t" /* * -5cf5d3ed */ "MOV r1, #0x2c13\n\t" "MOVT r1, #0xa30a\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "LDM %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, lr, r2, r1\n\t" "UMAAL r7, lr, r3, r1\n\t" "UMAAL r8, lr, r4, r1\n\t" "UMAAL r9, lr, r5, r1\n\t" "STM %[s], {r6, r7, r8, r9}\n\t" - "ADD %[s], %[s], #0x4\n\t" + "ADD %[s], %[s], #4\n\t" /* * -5812631b */ "MOV r1, #0x9ce5\n\t" "MOVT r1, #0xa7ed\n\t" - "MOV r10, #0x0\n\t" + "MOV r10, #0\n\t" "LDM %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, r10, r2, r1\n\t" "UMAAL r7, r10, r3, r1\n\t" "UMAAL r8, r10, r4, r1\n\t" "UMAAL r9, r10, r5, r1\n\t" "STM %[s], {r6, r7, r8, r9}\n\t" - "ADD %[s], %[s], #0x4\n\t" + "ADD %[s], %[s], #4\n\t" /* * -a2f79cd7 */ "MOV r1, #0x6329\n\t" "MOVT r1, #0x5d08\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "LDM %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, r11, r2, r1\n\t" "UMAAL r7, r11, r3, r1\n\t" "UMAAL r8, r11, r4, r1\n\t" "UMAAL r9, r11, r5, r1\n\t" "STM %[s], {r6, r7, r8, r9}\n\t" - "ADD %[s], %[s], #0x4\n\t" + "ADD %[s], %[s], #4\n\t" /* * -14def9df */ "MOV r1, #0x621\n\t" "MOVT r1, #0xeb21\n\t" - "MOV r12, #0x0\n\t" + "MOV r12, #0\n\t" "LDM %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, r12, r2, r1\n\t" "UMAAL r7, r12, r3, r1\n\t" "UMAAL r8, r12, r4, r1\n\t" "UMAAL r9, r12, r5, r1\n\t" "STM %[s], {r6, r7, r8, r9}\n\t" - "ADD %[s], %[s], #0x4\n\t" + "ADD %[s], %[s], #4\n\t" /* Add overflows at 4 * 32 */ "LDM %[s], {r6, r7, r8, r9}\n\t" "BFC r9, #28, #4\n\t" @@ -6094,7 +6093,7 @@ WC_OMIT_FRAME_POINTER void sc_reduce(byte* s) "SBCS r8, r8, r4\n\t" "SBCS r9, r9, r5\n\t" "SBC r1, r1, r1\n\t" - "SUB %[s], %[s], #0x10\n\t" + "SUB %[s], %[s], #16\n\t" "LDM %[s], {r2, r3, r4, r5}\n\t" "MOV r10, #0xd3ed\n\t" "MOVT r10, #0x5cf5\n\t" @@ -6112,16 +6111,16 @@ WC_OMIT_FRAME_POINTER void sc_reduce(byte* s) "ADCS r3, r3, r11\n\t" "ADCS r4, r4, r12\n\t" "ADCS r5, r5, lr\n\t" - "ADCS r6, r6, #0x0\n\t" - "ADCS r7, r7, #0x0\n\t" + "ADCS r6, r6, #0\n\t" + "ADCS r7, r7, #0\n\t" "AND r1, r1, #0x10000000\n\t" - "ADCS r8, r8, #0x0\n\t" + "ADCS r8, r8, #0\n\t" "ADC r9, r9, r1\n\t" "BFC r9, #28, #4\n\t" /* Store result */ "LDR %[s], [sp, #52]\n\t" "STM %[s], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" - "ADD sp, sp, #0x38\n\t" + "ADD sp, sp, #56\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [s] "+r" (s) : @@ -6156,7 +6155,7 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "SUB sp, sp, #0x50\n\t" "ADD lr, sp, #0x44\n\t" "STM lr, {%[s], %[a], %[c]}\n\t" - "MOV %[s], #0x0\n\t" + "MOV %[s], #0\n\t" "LDR r12, [%[a]]\n\t" /* A[0] * B[0] */ "LDR lr, [%[b]]\n\t" @@ -6178,309 +6177,309 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "ADDS r5, r5, r11\n\t" /* A[0] * B[3] */ "LDR lr, [%[b], #12]\n\t" - "ADCS r6, r6, #0x0\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADCS r6, r6, #0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r6, r11, r12, lr\n\t" "ADDS r7, r7, r11\n\t" /* A[0] * B[5] */ "LDR lr, [%[b], #20]\n\t" - "ADCS r8, r8, #0x0\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADCS r8, r8, #0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r8, r11, r12, lr\n\t" "ADDS r9, r9, r11\n\t" /* A[0] * B[7] */ "LDR lr, [%[b], #28]\n\t" - "ADCS r10, r10, #0x0\n\t" - "ADC %[c], %[s], #0x0\n\t" + "ADCS r10, r10, #0\n\t" + "ADC %[c], %[s], #0\n\t" "UMLAL r10, %[c], r12, lr\n\t" /* A[1] * B[0] */ "LDR r12, [%[a], #4]\n\t" "LDR lr, [%[b]]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r4, r11, r12, lr\n\t" "STR r4, [sp, #4]\n\t" "ADDS r5, r5, r11\n\t" /* A[1] * B[1] */ "LDR lr, [%[b], #4]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r5, r11, r12, lr\n\t" "ADDS r6, r6, r11\n\t" /* A[1] * B[2] */ "LDR lr, [%[b], #8]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r6, r11, r12, lr\n\t" "ADDS r7, r7, r11\n\t" /* A[1] * B[3] */ "LDR lr, [%[b], #12]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r7, r11, r12, lr\n\t" "ADDS r8, r8, r11\n\t" /* A[1] * B[4] */ "LDR lr, [%[b], #16]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r8, r11, r12, lr\n\t" "ADDS r9, r9, r11\n\t" /* A[1] * B[5] */ "LDR lr, [%[b], #20]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r9, r11, r12, lr\n\t" "ADDS r10, r10, r11\n\t" /* A[1] * B[6] */ "LDR lr, [%[b], #24]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r10, r11, r12, lr\n\t" "ADDS %[c], %[c], r11\n\t" /* A[1] * B[7] */ "LDR lr, [%[b], #28]\n\t" - "ADC r4, %[s], #0x0\n\t" + "ADC r4, %[s], #0\n\t" "UMLAL %[c], r4, r12, lr\n\t" /* A[2] * B[0] */ "LDR r12, [%[a], #8]\n\t" "LDR lr, [%[b]]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r5, r11, r12, lr\n\t" "STR r5, [sp, #8]\n\t" "ADDS r6, r6, r11\n\t" /* A[2] * B[1] */ "LDR lr, [%[b], #4]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r6, r11, r12, lr\n\t" "ADDS r7, r7, r11\n\t" /* A[2] * B[2] */ "LDR lr, [%[b], #8]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r7, r11, r12, lr\n\t" "ADDS r8, r8, r11\n\t" /* A[2] * B[3] */ "LDR lr, [%[b], #12]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r8, r11, r12, lr\n\t" "ADDS r9, r9, r11\n\t" /* A[2] * B[4] */ "LDR lr, [%[b], #16]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r9, r11, r12, lr\n\t" "ADDS r10, r10, r11\n\t" /* A[2] * B[5] */ "LDR lr, [%[b], #20]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r10, r11, r12, lr\n\t" "ADDS %[c], %[c], r11\n\t" /* A[2] * B[6] */ "LDR lr, [%[b], #24]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL %[c], r11, r12, lr\n\t" "ADDS r4, r4, r11\n\t" /* A[2] * B[7] */ "LDR lr, [%[b], #28]\n\t" - "ADC r5, %[s], #0x0\n\t" + "ADC r5, %[s], #0\n\t" "UMLAL r4, r5, r12, lr\n\t" /* A[3] * B[0] */ "LDR r12, [%[a], #12]\n\t" "LDR lr, [%[b]]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r6, r11, r12, lr\n\t" "STR r6, [sp, #12]\n\t" "ADDS r7, r7, r11\n\t" /* A[3] * B[1] */ "LDR lr, [%[b], #4]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r7, r11, r12, lr\n\t" "ADDS r8, r8, r11\n\t" /* A[3] * B[2] */ "LDR lr, [%[b], #8]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r8, r11, r12, lr\n\t" "ADDS r9, r9, r11\n\t" /* A[3] * B[3] */ "LDR lr, [%[b], #12]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r9, r11, r12, lr\n\t" "ADDS r10, r10, r11\n\t" /* A[3] * B[4] */ "LDR lr, [%[b], #16]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r10, r11, r12, lr\n\t" "ADDS %[c], %[c], r11\n\t" /* A[3] * B[5] */ "LDR lr, [%[b], #20]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL %[c], r11, r12, lr\n\t" "ADDS r4, r4, r11\n\t" /* A[3] * B[6] */ "LDR lr, [%[b], #24]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r4, r11, r12, lr\n\t" "ADDS r5, r5, r11\n\t" /* A[3] * B[7] */ "LDR lr, [%[b], #28]\n\t" - "ADC r6, %[s], #0x0\n\t" + "ADC r6, %[s], #0\n\t" "UMLAL r5, r6, r12, lr\n\t" /* A[4] * B[0] */ "LDR r12, [%[a], #16]\n\t" "LDR lr, [%[b]]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r7, r11, r12, lr\n\t" "STR r7, [sp, #16]\n\t" "ADDS r8, r8, r11\n\t" /* A[4] * B[1] */ "LDR lr, [%[b], #4]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r8, r11, r12, lr\n\t" "ADDS r9, r9, r11\n\t" /* A[4] * B[2] */ "LDR lr, [%[b], #8]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r9, r11, r12, lr\n\t" "ADDS r10, r10, r11\n\t" /* A[4] * B[3] */ "LDR lr, [%[b], #12]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r10, r11, r12, lr\n\t" "ADDS %[c], %[c], r11\n\t" /* A[4] * B[4] */ "LDR lr, [%[b], #16]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL %[c], r11, r12, lr\n\t" "ADDS r4, r4, r11\n\t" /* A[4] * B[5] */ "LDR lr, [%[b], #20]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r4, r11, r12, lr\n\t" "ADDS r5, r5, r11\n\t" /* A[4] * B[6] */ "LDR lr, [%[b], #24]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r5, r11, r12, lr\n\t" "ADDS r6, r6, r11\n\t" /* A[4] * B[7] */ "LDR lr, [%[b], #28]\n\t" - "ADC r7, %[s], #0x0\n\t" + "ADC r7, %[s], #0\n\t" "UMLAL r6, r7, r12, lr\n\t" /* A[5] * B[0] */ "LDR r12, [%[a], #20]\n\t" "LDR lr, [%[b]]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r8, r11, r12, lr\n\t" "STR r8, [sp, #20]\n\t" "ADDS r9, r9, r11\n\t" /* A[5] * B[1] */ "LDR lr, [%[b], #4]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r9, r11, r12, lr\n\t" "ADDS r10, r10, r11\n\t" /* A[5] * B[2] */ "LDR lr, [%[b], #8]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r10, r11, r12, lr\n\t" "ADDS %[c], %[c], r11\n\t" /* A[5] * B[3] */ "LDR lr, [%[b], #12]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL %[c], r11, r12, lr\n\t" "ADDS r4, r4, r11\n\t" /* A[5] * B[4] */ "LDR lr, [%[b], #16]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r4, r11, r12, lr\n\t" "ADDS r5, r5, r11\n\t" /* A[5] * B[5] */ "LDR lr, [%[b], #20]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r5, r11, r12, lr\n\t" "ADDS r6, r6, r11\n\t" /* A[5] * B[6] */ "LDR lr, [%[b], #24]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r6, r11, r12, lr\n\t" "ADDS r7, r7, r11\n\t" /* A[5] * B[7] */ "LDR lr, [%[b], #28]\n\t" - "ADC r8, %[s], #0x0\n\t" + "ADC r8, %[s], #0\n\t" "UMLAL r7, r8, r12, lr\n\t" /* A[6] * B[0] */ "LDR r12, [%[a], #24]\n\t" "LDR lr, [%[b]]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r9, r11, r12, lr\n\t" "STR r9, [sp, #24]\n\t" "ADDS r10, r10, r11\n\t" /* A[6] * B[1] */ "LDR lr, [%[b], #4]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r10, r11, r12, lr\n\t" "ADDS %[c], %[c], r11\n\t" /* A[6] * B[2] */ "LDR lr, [%[b], #8]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL %[c], r11, r12, lr\n\t" "ADDS r4, r4, r11\n\t" /* A[6] * B[3] */ "LDR lr, [%[b], #12]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r4, r11, r12, lr\n\t" "ADDS r5, r5, r11\n\t" /* A[6] * B[4] */ "LDR lr, [%[b], #16]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r5, r11, r12, lr\n\t" "ADDS r6, r6, r11\n\t" /* A[6] * B[5] */ "LDR lr, [%[b], #20]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r6, r11, r12, lr\n\t" "ADDS r7, r7, r11\n\t" /* A[6] * B[6] */ "LDR lr, [%[b], #24]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r7, r11, r12, lr\n\t" "ADDS r8, r8, r11\n\t" /* A[6] * B[7] */ "LDR lr, [%[b], #28]\n\t" - "ADC r9, %[s], #0x0\n\t" + "ADC r9, %[s], #0\n\t" "UMLAL r8, r9, r12, lr\n\t" /* A[7] * B[0] */ "LDR r12, [%[a], #28]\n\t" "LDR lr, [%[b]]\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "UMLAL r10, r11, r12, lr\n\t" "STR r10, [sp, #28]\n\t" "ADDS %[c], %[c], r11\n\t" /* A[7] * B[1] */ "LDR lr, [%[b], #4]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL %[c], r11, r12, lr\n\t" "ADDS r4, r4, r11\n\t" /* A[7] * B[2] */ "LDR lr, [%[b], #8]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r4, r11, r12, lr\n\t" "ADDS r5, r5, r11\n\t" /* A[7] * B[3] */ "LDR lr, [%[b], #12]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r5, r11, r12, lr\n\t" "ADDS r6, r6, r11\n\t" /* A[7] * B[4] */ "LDR lr, [%[b], #16]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r6, r11, r12, lr\n\t" "ADDS r7, r7, r11\n\t" /* A[7] * B[5] */ "LDR lr, [%[b], #20]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r7, r11, r12, lr\n\t" "ADDS r8, r8, r11\n\t" /* A[7] * B[6] */ "LDR lr, [%[b], #24]\n\t" - "ADC r11, %[s], #0x0\n\t" + "ADC r11, %[s], #0\n\t" "UMLAL r8, r11, r12, lr\n\t" "ADDS r9, r9, r11\n\t" /* A[7] * B[7] */ "LDR lr, [%[b], #28]\n\t" - "ADC r10, %[s], #0x0\n\t" + "ADC r10, %[s], #0\n\t" "UMLAL r9, r10, r12, lr\n\t" - "ADD lr, sp, #0x20\n\t" + "ADD lr, sp, #32\n\t" "STM lr, {%[c], r4, r5, r6, r7, r8, r9, r10}\n\t" "MOV %[s], sp\n\t" /* Add c to a * b */ @@ -6499,15 +6498,15 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "MOV %[a], r9\n\t" "STM %[s]!, {%[b], %[c], r4, r5, r6, r7, r8, r9}\n\t" "LDM %[s], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" - "ADCS %[b], %[b], #0x0\n\t" - "ADCS %[c], %[c], #0x0\n\t" - "ADCS r4, r4, #0x0\n\t" - "ADCS r5, r5, #0x0\n\t" - "ADCS r6, r6, #0x0\n\t" - "ADCS r7, r7, #0x0\n\t" - "ADCS r8, r8, #0x0\n\t" - "ADC r9, r9, #0x0\n\t" - "SUB %[s], %[s], #0x20\n\t" + "ADCS %[b], %[b], #0\n\t" + "ADCS %[c], %[c], #0\n\t" + "ADCS r4, r4, #0\n\t" + "ADCS r5, r5, #0\n\t" + "ADCS r6, r6, #0\n\t" + "ADCS r7, r7, #0\n\t" + "ADCS r8, r8, #0\n\t" + "ADC r9, r9, #0\n\t" + "SUB %[s], %[s], #32\n\t" /* Get 252..503 and 504..507 */ "LSR lr, r9, #24\n\t" "LSL r9, r9, #4\n\t" @@ -6532,200 +6531,200 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "MOVT r10, #0xa30a\n\t" "MOV r11, #0x9ce5\n\t" "MOVT r11, #0xa7ed\n\t" - "MOV %[a], #0x0\n\t" + "MOV %[a], #0\n\t" "UMLAL %[b], %[a], r10, lr\n\t" "ADDS %[c], %[c], %[a]\n\t" - "MOV %[a], #0x0\n\t" - "ADC %[a], %[a], #0x0\n\t" + "MOV %[a], #0\n\t" + "ADC %[a], %[a], #0\n\t" "UMLAL %[c], %[a], r11, lr\n\t" "MOV r10, #0x6329\n\t" "MOVT r10, #0x5d08\n\t" "MOV r11, #0x621\n\t" "MOVT r11, #0xeb21\n\t" "ADDS r4, r4, %[a]\n\t" - "MOV %[a], #0x0\n\t" - "ADC %[a], %[a], #0x0\n\t" + "MOV %[a], #0\n\t" + "ADC %[a], %[a], #0\n\t" "UMLAL r4, %[a], r10, lr\n\t" "ADDS r5, r5, %[a]\n\t" - "MOV %[a], #0x0\n\t" - "ADC %[a], %[a], #0x0\n\t" + "MOV %[a], #0\n\t" + "ADC %[a], %[a], #0\n\t" "UMLAL r5, %[a], r11, lr\n\t" "ADDS r6, r6, %[a]\n\t" - "ADCS r7, r7, #0x0\n\t" - "ADCS r8, r8, #0x0\n\t" - "ADC r9, r9, #0x0\n\t" + "ADCS r7, r7, #0\n\t" + "ADCS r8, r8, #0\n\t" + "ADC r9, r9, #0\n\t" "SUBS r6, r6, lr\n\t" - "SBCS r7, r7, #0x0\n\t" - "SBCS r8, r8, #0x0\n\t" - "SBC r9, r9, #0x0\n\t" + "SBCS r7, r7, #0\n\t" + "SBCS r8, r8, #0\n\t" + "SBC r9, r9, #0\n\t" /* Sub product of top 8 words and order */ "MOV r12, sp\n\t" "MOV %[a], #0x2c13\n\t" "MOVT %[a], #0xa30a\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "LDM %[s]!, {r10, r11}\n\t" "UMLAL r10, lr, %[b], %[a]\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, %[c], %[a]\n\t" "STM r12!, {r10, r11}\n\t" "LDM %[s]!, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r10, lr, r4, %[a]\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r5, %[a]\n\t" "STM r12!, {r10, r11}\n\t" "LDM %[s]!, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r10, lr, r6, %[a]\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r7, %[a]\n\t" "STM r12!, {r10, r11}\n\t" "LDM %[s]!, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r10, lr, r8, %[a]\n\t" "BFC r11, #28, #4\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r9, %[a]\n\t" "STM r12!, {r10, r11, lr}\n\t" - "SUB %[s], %[s], #0x10\n\t" - "SUB r12, r12, #0x20\n\t" + "SUB %[s], %[s], #16\n\t" + "SUB r12, r12, #32\n\t" "MOV %[a], #0x9ce5\n\t" "MOVT %[a], #0xa7ed\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "LDM r12, {r10, r11}\n\t" "UMLAL r10, lr, %[b], %[a]\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, %[c], %[a]\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r10, lr, r4, %[a]\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r5, %[a]\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r10, lr, r6, %[a]\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r7, %[a]\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r10, lr, r8, %[a]\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r9, %[a]\n\t" "STM r12!, {r10, r11, lr}\n\t" - "SUB r12, r12, #0x20\n\t" + "SUB r12, r12, #32\n\t" "MOV %[a], #0x6329\n\t" "MOVT %[a], #0x5d08\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "LDM r12, {r10, r11}\n\t" "UMLAL r10, lr, %[b], %[a]\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, %[c], %[a]\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r10, lr, r4, %[a]\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r5, %[a]\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r10, lr, r6, %[a]\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r7, %[a]\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r10, lr, r8, %[a]\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r9, %[a]\n\t" "STM r12!, {r10, r11, lr}\n\t" - "SUB r12, r12, #0x20\n\t" + "SUB r12, r12, #32\n\t" "MOV %[a], #0x621\n\t" "MOVT %[a], #0xeb21\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "LDM r12, {r10, r11}\n\t" "UMLAL r10, lr, %[b], %[a]\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, %[c], %[a]\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r10, lr, r4, %[a]\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r5, %[a]\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r10, lr, r6, %[a]\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r7, %[a]\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" "ADDS r10, r10, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r10, lr, r8, %[a]\n\t" "ADDS r11, r11, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r11, lr, r9, %[a]\n\t" "STM r12!, {r10, r11, lr}\n\t" - "SUB r12, r12, #0x20\n\t" + "SUB r12, r12, #32\n\t" /* Subtract at 4 * 32 */ "LDM r12, {r10, r11}\n\t" "SUBS r10, r10, %[b]\n\t" @@ -6743,7 +6742,7 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "SBCS r10, r10, r8\n\t" "SBC r11, r11, r9\n\t" "STM r12!, {r10, r11}\n\t" - "SUB r12, r12, #0x24\n\t" + "SUB r12, r12, #36\n\t" "ASR lr, r11, #25\n\t" /* Conditionally subtract order starting at bit 125 */ "MOV %[a], #0xa0000000\n\t" @@ -6772,19 +6771,19 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" "ADCS r10, r10, r5\n\t" - "ADCS r11, r11, #0x0\n\t" + "ADCS r11, r11, #0\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" - "ADCS r10, r10, #0x0\n\t" - "ADCS r11, r11, #0x0\n\t" + "ADCS r10, r10, #0\n\t" + "ADCS r11, r11, #0\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10}\n\t" - "ADCS r10, r10, #0x0\n\t" + "ADCS r10, r10, #0\n\t" "STM r12!, {r10}\n\t" - "SUB %[s], %[s], #0x10\n\t" + "SUB %[s], %[s], #16\n\t" "MOV r12, sp\n\t" /* Load bits 252-376 */ - "ADD r12, r12, #0x1c\n\t" + "ADD r12, r12, #28\n\t" "LDM r12, {r1, r2, r3, r4, r5}\n\t" "LSL r5, r5, #4\n\t" "ORR r5, r5, r4, LSR #28\n\t" @@ -6795,89 +6794,89 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "LSL %[b], %[b], #4\n\t" "ORR %[b], %[b], %[a], LSR #28\n\t" "BFC r5, #29, #3\n\t" - "SUB r12, r12, #0x1c\n\t" + "SUB r12, r12, #28\n\t" /* Sub product of top 4 words and order */ "MOV %[s], sp\n\t" /* * -5cf5d3ed */ "MOV %[a], #0x2c13\n\t" "MOVT %[a], #0xa30a\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "LDM %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, lr, %[b], %[a]\n\t" "ADDS r7, r7, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r7, lr, %[c], %[a]\n\t" "ADDS r8, r8, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r8, lr, r4, %[a]\n\t" "ADDS r9, r9, lr\n\t" - "MOV lr, #0x0\n\t" - "ADC lr, lr, #0x0\n\t" + "MOV lr, #0\n\t" + "ADC lr, lr, #0\n\t" "UMLAL r9, lr, r5, %[a]\n\t" "STM %[s], {r6, r7, r8, r9}\n\t" - "ADD %[s], %[s], #0x4\n\t" + "ADD %[s], %[s], #4\n\t" /* * -5812631b */ "MOV %[a], #0x9ce5\n\t" "MOVT %[a], #0xa7ed\n\t" - "MOV r10, #0x0\n\t" + "MOV r10, #0\n\t" "LDM %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, r10, %[b], %[a]\n\t" "ADDS r7, r7, r10\n\t" - "MOV r10, #0x0\n\t" - "ADC r10, r10, #0x0\n\t" + "MOV r10, #0\n\t" + "ADC r10, r10, #0\n\t" "UMLAL r7, r10, %[c], %[a]\n\t" "ADDS r8, r8, r10\n\t" - "MOV r10, #0x0\n\t" - "ADC r10, r10, #0x0\n\t" + "MOV r10, #0\n\t" + "ADC r10, r10, #0\n\t" "UMLAL r8, r10, r4, %[a]\n\t" "ADDS r9, r9, r10\n\t" - "MOV r10, #0x0\n\t" - "ADC r10, r10, #0x0\n\t" + "MOV r10, #0\n\t" + "ADC r10, r10, #0\n\t" "UMLAL r9, r10, r5, %[a]\n\t" "STM %[s], {r6, r7, r8, r9}\n\t" - "ADD %[s], %[s], #0x4\n\t" + "ADD %[s], %[s], #4\n\t" /* * -a2f79cd7 */ "MOV %[a], #0x6329\n\t" "MOVT %[a], #0x5d08\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "LDM %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, r11, %[b], %[a]\n\t" "ADDS r7, r7, r11\n\t" - "MOV r11, #0x0\n\t" - "ADC r11, r11, #0x0\n\t" + "MOV r11, #0\n\t" + "ADC r11, r11, #0\n\t" "UMLAL r7, r11, %[c], %[a]\n\t" "ADDS r8, r8, r11\n\t" - "MOV r11, #0x0\n\t" - "ADC r11, r11, #0x0\n\t" + "MOV r11, #0\n\t" + "ADC r11, r11, #0\n\t" "UMLAL r8, r11, r4, %[a]\n\t" "ADDS r9, r9, r11\n\t" - "MOV r11, #0x0\n\t" - "ADC r11, r11, #0x0\n\t" + "MOV r11, #0\n\t" + "ADC r11, r11, #0\n\t" "UMLAL r9, r11, r5, %[a]\n\t" "STM %[s], {r6, r7, r8, r9}\n\t" - "ADD %[s], %[s], #0x4\n\t" + "ADD %[s], %[s], #4\n\t" /* * -14def9df */ "MOV %[a], #0x621\n\t" "MOVT %[a], #0xeb21\n\t" - "MOV r12, #0x0\n\t" + "MOV r12, #0\n\t" "LDM %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, r12, %[b], %[a]\n\t" "ADDS r7, r7, r12\n\t" - "MOV r12, #0x0\n\t" - "ADC r12, r12, #0x0\n\t" + "MOV r12, #0\n\t" + "ADC r12, r12, #0\n\t" "UMLAL r7, r12, %[c], %[a]\n\t" "ADDS r8, r8, r12\n\t" - "MOV r12, #0x0\n\t" - "ADC r12, r12, #0x0\n\t" + "MOV r12, #0\n\t" + "ADC r12, r12, #0\n\t" "UMLAL r8, r12, r4, %[a]\n\t" "ADDS r9, r9, r12\n\t" - "MOV r12, #0x0\n\t" - "ADC r12, r12, #0x0\n\t" + "MOV r12, #0\n\t" + "ADC r12, r12, #0\n\t" "UMLAL r9, r12, r5, %[a]\n\t" "STM %[s], {r6, r7, r8, r9}\n\t" - "ADD %[s], %[s], #0x4\n\t" + "ADD %[s], %[s], #4\n\t" /* Add overflows at 4 * 32 */ "LDM %[s], {r6, r7, r8, r9}\n\t" "BFC r9, #28, #4\n\t" @@ -6891,7 +6890,7 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "SBCS r8, r8, r4\n\t" "SBCS r9, r9, r5\n\t" "SBC %[a], %[a], %[a]\n\t" - "SUB %[s], %[s], #0x10\n\t" + "SUB %[s], %[s], #16\n\t" "LDM %[s], {r2, r3, r4, r5}\n\t" "MOV r10, #0xd3ed\n\t" "MOVT r10, #0x5cf5\n\t" @@ -6909,10 +6908,10 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "ADCS %[c], %[c], r11\n\t" "ADCS r4, r4, r12\n\t" "ADCS r5, r5, lr\n\t" - "ADCS r6, r6, #0x0\n\t" - "ADCS r7, r7, #0x0\n\t" + "ADCS r6, r6, #0\n\t" + "ADCS r7, r7, #0\n\t" "AND %[a], %[a], #0x10000000\n\t" - "ADCS r8, r8, #0x0\n\t" + "ADCS r8, r8, #0\n\t" "ADC r9, r9, %[a]\n\t" "BFC r9, #28, #4\n\t" "LDR %[s], [sp, #68]\n\t" @@ -6981,54 +6980,54 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "UMAAL r9, r10, %[b], r4\n\t" "UMAAL r10, r11, %[c], r4\n\t" "LDM lr, {r4, r5, r6, r7}\n\t" - "MOV r12, #0x0\n\t" + "MOV r12, #0\n\t" "UMLAL r8, r12, %[s], r4\n\t" "UMAAL r9, r12, %[a], r4\n\t" "UMAAL r10, r12, %[b], r4\n\t" "UMAAL r11, r12, %[c], r4\n\t" - "MOV r4, #0x0\n\t" + "MOV r4, #0\n\t" "UMLAL r9, r4, %[s], r5\n\t" "UMAAL r10, r4, %[a], r5\n\t" "UMAAL r11, r4, %[b], r5\n\t" "UMAAL r12, r4, %[c], r5\n\t" - "MOV r5, #0x0\n\t" + "MOV r5, #0\n\t" "UMLAL r10, r5, %[s], r6\n\t" "UMAAL r11, r5, %[a], r6\n\t" "UMAAL r12, r5, %[b], r6\n\t" "UMAAL r4, r5, %[c], r6\n\t" - "MOV r6, #0x0\n\t" + "MOV r6, #0\n\t" "UMLAL r11, r6, %[s], r7\n\t" "LDR %[s], [sp, #72]\n\t" "UMAAL r12, r6, %[a], r7\n\t" - "ADD %[s], %[s], #0x10\n\t" + "ADD %[s], %[s], #16\n\t" "UMAAL r4, r6, %[b], r7\n\t" - "SUB lr, lr, #0x10\n\t" + "SUB lr, lr, #16\n\t" "UMAAL r5, r6, %[c], r7\n\t" "LDM %[s], {r0, r1, r2, r3}\n\t" "STR r6, [sp, #64]\n\t" "LDM lr!, {r6}\n\t" - "MOV r7, #0x0\n\t" + "MOV r7, #0\n\t" "UMLAL r8, r7, %[s], r6\n\t" "UMAAL r9, r7, %[a], r6\n\t" "STR r8, [sp, #16]\n\t" "UMAAL r10, r7, %[b], r6\n\t" "UMAAL r11, r7, %[c], r6\n\t" "LDM lr!, {r6}\n\t" - "MOV r8, #0x0\n\t" + "MOV r8, #0\n\t" "UMLAL r9, r8, %[s], r6\n\t" "UMAAL r10, r8, %[a], r6\n\t" "STR r9, [sp, #20]\n\t" "UMAAL r11, r8, %[b], r6\n\t" "UMAAL r12, r8, %[c], r6\n\t" "LDM lr!, {r6}\n\t" - "MOV r9, #0x0\n\t" + "MOV r9, #0\n\t" "UMLAL r10, r9, %[s], r6\n\t" "UMAAL r11, r9, %[a], r6\n\t" "STR r10, [sp, #24]\n\t" "UMAAL r12, r9, %[b], r6\n\t" "UMAAL r4, r9, %[c], r6\n\t" "LDM lr!, {r6}\n\t" - "MOV r10, #0x0\n\t" + "MOV r10, #0\n\t" "UMLAL r11, r10, %[s], r6\n\t" "UMAAL r12, r10, %[a], r6\n\t" "STR r11, [sp, #28]\n\t" @@ -7055,7 +7054,7 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "UMAAL r8, r9, %[c], r11\n\t" "UMAAL r9, r10, %[c], lr\n\t" "MOV %[c], r12\n\t" - "ADD lr, sp, #0x20\n\t" + "ADD lr, sp, #32\n\t" "STM lr, {%[c], r4, r5, r6, r7, r8, r9, r10}\n\t" "MOV %[s], sp\n\t" /* Add c to a * b */ @@ -7074,15 +7073,15 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "MOV %[a], r9\n\t" "STM %[s]!, {%[b], %[c], r4, r5, r6, r7, r8, r9}\n\t" "LDM %[s], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" - "ADCS %[b], %[b], #0x0\n\t" - "ADCS %[c], %[c], #0x0\n\t" - "ADCS r4, r4, #0x0\n\t" - "ADCS r5, r5, #0x0\n\t" - "ADCS r6, r6, #0x0\n\t" - "ADCS r7, r7, #0x0\n\t" - "ADCS r8, r8, #0x0\n\t" - "ADC r9, r9, #0x0\n\t" - "SUB %[s], %[s], #0x20\n\t" + "ADCS %[b], %[b], #0\n\t" + "ADCS %[c], %[c], #0\n\t" + "ADCS r4, r4, #0\n\t" + "ADCS r5, r5, #0\n\t" + "ADCS r6, r6, #0\n\t" + "ADCS r7, r7, #0\n\t" + "ADCS r8, r8, #0\n\t" + "ADC r9, r9, #0\n\t" + "SUB %[s], %[s], #32\n\t" /* Get 252..503 and 504..507 */ "LSR lr, r9, #24\n\t" "LSL r9, r9, #4\n\t" @@ -7107,7 +7106,7 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "MOVT r10, #0xa30a\n\t" "MOV r11, #0x9ce5\n\t" "MOVT r11, #0xa7ed\n\t" - "MOV %[a], #0x0\n\t" + "MOV %[a], #0\n\t" "UMLAL %[b], %[a], r10, lr\n\t" "UMAAL %[c], %[a], r11, lr\n\t" "MOV r10, #0x6329\n\t" @@ -7117,18 +7116,18 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "UMAAL r4, %[a], r10, lr\n\t" "UMAAL r5, %[a], r11, lr\n\t" "ADDS r6, r6, %[a]\n\t" - "ADCS r7, r7, #0x0\n\t" - "ADCS r8, r8, #0x0\n\t" - "ADC r9, r9, #0x0\n\t" + "ADCS r7, r7, #0\n\t" + "ADCS r8, r8, #0\n\t" + "ADC r9, r9, #0\n\t" "SUBS r6, r6, lr\n\t" - "SBCS r7, r7, #0x0\n\t" - "SBCS r8, r8, #0x0\n\t" - "SBC r9, r9, #0x0\n\t" + "SBCS r7, r7, #0\n\t" + "SBCS r8, r8, #0\n\t" + "SBC r9, r9, #0\n\t" /* Sub product of top 8 words and order */ "MOV r12, sp\n\t" "MOV %[a], #0x2c13\n\t" "MOVT %[a], #0xa30a\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "LDM %[s]!, {r10, r11}\n\t" "UMLAL r10, lr, %[b], %[a]\n\t" "UMAAL r11, lr, %[c], %[a]\n\t" @@ -7146,11 +7145,11 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "BFC r11, #28, #4\n\t" "UMAAL r11, lr, r9, %[a]\n\t" "STM r12!, {r10, r11, lr}\n\t" - "SUB %[s], %[s], #0x10\n\t" - "SUB r12, r12, #0x20\n\t" + "SUB %[s], %[s], #16\n\t" + "SUB r12, r12, #32\n\t" "MOV %[a], #0x9ce5\n\t" "MOVT %[a], #0xa7ed\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "LDM r12, {r10, r11}\n\t" "UMLAL r10, lr, %[b], %[a]\n\t" "UMAAL r11, lr, %[c], %[a]\n\t" @@ -7167,10 +7166,10 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "UMAAL r10, lr, r8, %[a]\n\t" "UMAAL r11, lr, r9, %[a]\n\t" "STM r12!, {r10, r11, lr}\n\t" - "SUB r12, r12, #0x20\n\t" + "SUB r12, r12, #32\n\t" "MOV %[a], #0x6329\n\t" "MOVT %[a], #0x5d08\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "LDM r12, {r10, r11}\n\t" "UMLAL r10, lr, %[b], %[a]\n\t" "UMAAL r11, lr, %[c], %[a]\n\t" @@ -7187,10 +7186,10 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "UMAAL r10, lr, r8, %[a]\n\t" "UMAAL r11, lr, r9, %[a]\n\t" "STM r12!, {r10, r11, lr}\n\t" - "SUB r12, r12, #0x20\n\t" + "SUB r12, r12, #32\n\t" "MOV %[a], #0x621\n\t" "MOVT %[a], #0xeb21\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "LDM r12, {r10, r11}\n\t" "UMLAL r10, lr, %[b], %[a]\n\t" "UMAAL r11, lr, %[c], %[a]\n\t" @@ -7207,7 +7206,7 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "UMAAL r10, lr, r8, %[a]\n\t" "UMAAL r11, lr, r9, %[a]\n\t" "STM r12!, {r10, r11, lr}\n\t" - "SUB r12, r12, #0x20\n\t" + "SUB r12, r12, #32\n\t" /* Subtract at 4 * 32 */ "LDM r12, {r10, r11}\n\t" "SUBS r10, r10, %[b]\n\t" @@ -7225,7 +7224,7 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "SBCS r10, r10, r8\n\t" "SBC r11, r11, r9\n\t" "STM r12!, {r10, r11}\n\t" - "SUB r12, r12, #0x24\n\t" + "SUB r12, r12, #36\n\t" "ASR lr, r11, #25\n\t" /* Conditionally subtract order starting at bit 125 */ "MOV %[a], #0xa0000000\n\t" @@ -7254,19 +7253,19 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" "ADCS r10, r10, r5\n\t" - "ADCS r11, r11, #0x0\n\t" + "ADCS r11, r11, #0\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10, r11}\n\t" - "ADCS r10, r10, #0x0\n\t" - "ADCS r11, r11, #0x0\n\t" + "ADCS r10, r10, #0\n\t" + "ADCS r11, r11, #0\n\t" "STM r12!, {r10, r11}\n\t" "LDM r12, {r10}\n\t" - "ADCS r10, r10, #0x0\n\t" + "ADCS r10, r10, #0\n\t" "STM r12!, {r10}\n\t" - "SUB %[s], %[s], #0x10\n\t" + "SUB %[s], %[s], #16\n\t" "MOV r12, sp\n\t" /* Load bits 252-376 */ - "ADD r12, r12, #0x1c\n\t" + "ADD r12, r12, #28\n\t" "LDM r12, {r1, r2, r3, r4, r5}\n\t" "LSL r5, r5, #4\n\t" "ORR r5, r5, r4, LSR #28\n\t" @@ -7277,53 +7276,53 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "LSL %[b], %[b], #4\n\t" "ORR %[b], %[b], %[a], LSR #28\n\t" "BFC r5, #29, #3\n\t" - "SUB r12, r12, #0x1c\n\t" + "SUB r12, r12, #28\n\t" /* Sub product of top 4 words and order */ "MOV %[s], sp\n\t" /* * -5cf5d3ed */ "MOV %[a], #0x2c13\n\t" "MOVT %[a], #0xa30a\n\t" - "MOV lr, #0x0\n\t" + "MOV lr, #0\n\t" "LDM %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, lr, %[b], %[a]\n\t" "UMAAL r7, lr, %[c], %[a]\n\t" "UMAAL r8, lr, r4, %[a]\n\t" "UMAAL r9, lr, r5, %[a]\n\t" "STM %[s], {r6, r7, r8, r9}\n\t" - "ADD %[s], %[s], #0x4\n\t" + "ADD %[s], %[s], #4\n\t" /* * -5812631b */ "MOV %[a], #0x9ce5\n\t" "MOVT %[a], #0xa7ed\n\t" - "MOV r10, #0x0\n\t" + "MOV r10, #0\n\t" "LDM %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, r10, %[b], %[a]\n\t" "UMAAL r7, r10, %[c], %[a]\n\t" "UMAAL r8, r10, r4, %[a]\n\t" "UMAAL r9, r10, r5, %[a]\n\t" "STM %[s], {r6, r7, r8, r9}\n\t" - "ADD %[s], %[s], #0x4\n\t" + "ADD %[s], %[s], #4\n\t" /* * -a2f79cd7 */ "MOV %[a], #0x6329\n\t" "MOVT %[a], #0x5d08\n\t" - "MOV r11, #0x0\n\t" + "MOV r11, #0\n\t" "LDM %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, r11, %[b], %[a]\n\t" "UMAAL r7, r11, %[c], %[a]\n\t" "UMAAL r8, r11, r4, %[a]\n\t" "UMAAL r9, r11, r5, %[a]\n\t" "STM %[s], {r6, r7, r8, r9}\n\t" - "ADD %[s], %[s], #0x4\n\t" + "ADD %[s], %[s], #4\n\t" /* * -14def9df */ "MOV %[a], #0x621\n\t" "MOVT %[a], #0xeb21\n\t" - "MOV r12, #0x0\n\t" + "MOV r12, #0\n\t" "LDM %[s], {r6, r7, r8, r9}\n\t" "UMLAL r6, r12, %[b], %[a]\n\t" "UMAAL r7, r12, %[c], %[a]\n\t" "UMAAL r8, r12, r4, %[a]\n\t" "UMAAL r9, r12, r5, %[a]\n\t" "STM %[s], {r6, r7, r8, r9}\n\t" - "ADD %[s], %[s], #0x4\n\t" + "ADD %[s], %[s], #4\n\t" /* Add overflows at 4 * 32 */ "LDM %[s], {r6, r7, r8, r9}\n\t" "BFC r9, #28, #4\n\t" @@ -7337,7 +7336,7 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "SBCS r8, r8, r4\n\t" "SBCS r9, r9, r5\n\t" "SBC %[a], %[a], %[a]\n\t" - "SUB %[s], %[s], #0x10\n\t" + "SUB %[s], %[s], #16\n\t" "LDM %[s], {r2, r3, r4, r5}\n\t" "MOV r10, #0xd3ed\n\t" "MOVT r10, #0x5cf5\n\t" @@ -7355,10 +7354,10 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "ADCS %[c], %[c], r11\n\t" "ADCS r4, r4, r12\n\t" "ADCS r5, r5, lr\n\t" - "ADCS r6, r6, #0x0\n\t" - "ADCS r7, r7, #0x0\n\t" + "ADCS r6, r6, #0\n\t" + "ADCS r7, r7, #0\n\t" "AND %[a], %[a], #0x10000000\n\t" - "ADCS r8, r8, #0x0\n\t" + "ADCS r8, r8, #0\n\t" "ADC r9, r9, %[a]\n\t" "BFC r9, #28, #4\n\t" "LDR %[s], [sp, #68]\n\t" diff --git a/wolfcrypt/src/port/arm/thumb2-mlkem-asm.S b/wolfcrypt/src/port/arm/thumb2-mlkem-asm.S index 49b638a67cf..936cd6e3195 100644 --- a/wolfcrypt/src/port/arm/thumb2-mlkem-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-mlkem-asm.S @@ -69,13 +69,13 @@ L_mlkem_thumb2_ntt_zetas: .type mlkem_thumb2_ntt, %function mlkem_thumb2_ntt: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} - SUB sp, sp, #0x8 + SUB sp, sp, #8 ADR r1, L_mlkem_thumb2_ntt_zetas #ifndef WOLFSSL_ARM_ARCH_7M MOV r12, #0xd01 MOVT r12, #0xcff #endif /* !WOLFSSL_ARM_ARCH_7M */ - MOV r2, #0x10 + MOV r2, #16 L_mlkem_thumb2_ntt_loop_123: STR r2, [sp] LDRH lr, [r1, #2] @@ -507,19 +507,19 @@ L_mlkem_thumb2_ntt_loop_123: STR r8, [r0, #384] STR r9, [r0, #448] LDR r2, [sp] - SUBS r2, r2, #0x1 - ADD r0, r0, #0x4 + SUBS r2, r2, #1 + ADD r0, r0, #4 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_mlkem_thumb2_ntt_loop_123 #else BNE.N L_mlkem_thumb2_ntt_loop_123 #endif SUB r0, r0, #0x40 - MOV r3, #0x0 + MOV r3, #0 L_mlkem_thumb2_ntt_loop_4_j: STR r3, [sp, #4] ADD lr, r1, r3, LSR #4 - MOV r2, #0x4 + MOV r2, #4 LDR lr, [lr, #16] L_mlkem_thumb2_ntt_loop_4_i: STR r2, [sp] @@ -676,8 +676,8 @@ L_mlkem_thumb2_ntt_loop_4_i: STR r8, [r0, #96] STR r9, [r0, #112] LDRD r2, r3, [sp] - SUBS r2, r2, #0x1 - ADD r0, r0, #0x4 + SUBS r2, r2, #1 + ADD r0, r0, #4 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_mlkem_thumb2_ntt_loop_4_i #else @@ -692,7 +692,7 @@ L_mlkem_thumb2_ntt_loop_4_i: BNE.N L_mlkem_thumb2_ntt_loop_4_j #endif SUB r0, r0, #0x200 - MOV r3, #0x0 + MOV r3, #0 L_mlkem_thumb2_ntt_loop_567: ADD lr, r1, r3, LSR #3 STR r3, [sp, #4] @@ -1310,15 +1310,15 @@ L_mlkem_thumb2_ntt_loop_567: STR r8, [r0, #24] STR r9, [r0, #28] LDR r3, [sp, #4] - ADD r3, r3, #0x10 + ADD r3, r3, #16 RSBS r10, r3, #0x100 - ADD r0, r0, #0x20 + ADD r0, r0, #32 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_mlkem_thumb2_ntt_loop_567 #else BNE.N L_mlkem_thumb2_ntt_loop_567 #endif - ADD sp, sp, #0x8 + ADD sp, sp, #8 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 1270 */ .size mlkem_thumb2_ntt,.-mlkem_thumb2_ntt @@ -1358,13 +1358,13 @@ L_mlkem_invntt_zetas_inv: .type mlkem_thumb2_invntt, %function mlkem_thumb2_invntt: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} - SUB sp, sp, #0x8 + SUB sp, sp, #8 ADR r1, L_mlkem_invntt_zetas_inv #ifndef WOLFSSL_ARM_ARCH_7M MOV r12, #0xd01 MOVT r12, #0xcff #endif /* !WOLFSSL_ARM_ARCH_7M */ - MOV r3, #0x0 + MOV r3, #0 L_mlkem_invntt_loop_765: ADD lr, r1, r3, LSR #1 STR r3, [sp, #4] @@ -1953,20 +1953,20 @@ L_mlkem_invntt_loop_765: STR r8, [r0, #24] STR r9, [r0, #28] LDR r3, [sp, #4] - ADD r3, r3, #0x10 + ADD r3, r3, #16 RSBS r10, r3, #0x100 - ADD r0, r0, #0x20 + ADD r0, r0, #32 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_mlkem_invntt_loop_765 #else BNE.N L_mlkem_invntt_loop_765 #endif SUB r0, r0, #0x200 - MOV r3, #0x0 + MOV r3, #0 L_mlkem_invntt_loop_4_j: STR r3, [sp, #4] ADD lr, r1, r3, LSR #4 - MOV r2, #0x4 + MOV r2, #4 LDR lr, [lr, #224] L_mlkem_invntt_loop_4_i: STR r2, [sp] @@ -2143,8 +2143,8 @@ L_mlkem_invntt_loop_4_i: STR r8, [r0, #96] STR r9, [r0, #112] LDRD r2, r3, [sp] - SUBS r2, r2, #0x1 - ADD r0, r0, #0x4 + SUBS r2, r2, #1 + ADD r0, r0, #4 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_mlkem_invntt_loop_4_i #else @@ -2159,7 +2159,7 @@ L_mlkem_invntt_loop_4_i: BNE.N L_mlkem_invntt_loop_4_j #endif SUB r0, r0, #0x200 - MOV r2, #0x10 + MOV r2, #16 L_mlkem_invntt_loop_321: STR r2, [sp] LDRH lr, [r1, #2] @@ -2967,14 +2967,14 @@ L_mlkem_invntt_loop_321: STR r8, [r0, #384] STR r9, [r0, #448] LDR r2, [sp] - SUBS r2, r2, #0x1 - ADD r0, r0, #0x4 + SUBS r2, r2, #1 + ADD r0, r0, #4 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_mlkem_invntt_loop_321 #else BNE.N L_mlkem_invntt_loop_321 #endif - ADD sp, sp, #0x8 + ADD sp, sp, #8 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 1629 */ .size mlkem_thumb2_invntt,.-mlkem_thumb2_invntt @@ -3020,12 +3020,12 @@ mlkem_thumb2_basemul_mont: MOV r12, #0xd01 MOVT r12, #0xcff #endif /* !WOLFSSL_ARM_ARCH_7M */ - MOV r8, #0x0 + MOV r8, #0 L_mlkem_basemul_mont_loop: LDM r1!, {r4, r5} LDM r2!, {r6, r7} LDR lr, [r3, r8] - ADD r8, r8, #0x2 + ADD r8, r8, #2 PUSH {r8} CMP r8, #0x80 #ifndef WOLFSSL_ARM_ARCH_7M @@ -3035,7 +3035,7 @@ L_mlkem_basemul_mont_loop: SMULTB r11, r12, r10 SMLABB r8, r12, r9, r8 SMLABB r10, r12, r11, r10 - RSB r11, lr, #0x0 + RSB r11, lr, #0 SMULBT r8, lr, r8 SMULBT r10, r11, r10 SMLABB r8, r4, r6, r8 @@ -3071,7 +3071,7 @@ L_mlkem_basemul_mont_loop: SBFX r11, r11, #0, #16 MLA r8, r12, r9, r8 MLA r10, r12, r11, r10 - RSB r11, lr, #0x0 + RSB r11, lr, #0 SBFX r9, lr, #0, #16 SBFX r11, r11, #0, #16 ASR r8, r8, #16 @@ -3143,12 +3143,12 @@ mlkem_thumb2_basemul_mont_add: MOV r12, #0xd01 MOVT r12, #0xcff #endif /* !WOLFSSL_ARM_ARCH_7M */ - MOV r8, #0x0 + MOV r8, #0 L_mlkem_thumb2_basemul_mont_add_loop: LDM r1!, {r4, r5} LDM r2!, {r6, r7} LDR lr, [r3, r8] - ADD r8, r8, #0x2 + ADD r8, r8, #2 PUSH {r8} CMP r8, #0x80 #ifndef WOLFSSL_ARM_ARCH_7M @@ -3158,7 +3158,7 @@ L_mlkem_thumb2_basemul_mont_add_loop: SMULTB r11, r12, r10 SMLABB r8, r12, r9, r8 SMLABB r10, r12, r11, r10 - RSB r11, lr, #0x0 + RSB r11, lr, #0 SMULBT r8, lr, r8 SMULBT r10, r11, r10 SMLABB r8, r4, r6, r8 @@ -3197,7 +3197,7 @@ L_mlkem_thumb2_basemul_mont_add_loop: SBFX r11, r11, #0, #16 MLA r8, r12, r9, r8 MLA r10, r12, r11, r10 - RSB r11, lr, #0x0 + RSB r11, lr, #0 SBFX r9, lr, #0, #16 SBFX r11, r11, #0, #16 ASR r8, r8, #16 @@ -3346,7 +3346,7 @@ L_mlkem_thumb2_csubq_loop: BFI r5, r10, #0, #16 #endif /* !WOLFSSL_ARM_ARCH_7M */ STM r0!, {r2, r3, r4, r5} - SUBS r1, r1, #0x8 + SUBS r1, r1, #8 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_mlkem_thumb2_csubq_loop #else @@ -3362,9 +3362,9 @@ L_mlkem_thumb2_csubq_loop: mlkem_thumb2_rej_uniform: PUSH {r4, r5, r6, r7, r8, r9, r10, lr} MOV r8, #0xd01 - MOV r9, #0x0 + MOV r9, #0 L_mlkem_thumb2_rej_uniform_loop_no_fail: - CMP r1, #0x8 + CMP r1, #8 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BLT L_mlkem_thumb2_rej_uniform_done_no_fail #else @@ -3421,7 +3421,7 @@ L_mlkem_thumb2_rej_uniform_loop_no_fail: LSR r10, r10, #31 SUB r1, r1, r10 ADD r9, r9, r10, LSL #1 - SUBS r3, r3, #0xc + SUBS r3, r3, #12 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_mlkem_thumb2_rej_uniform_loop_no_fail #else @@ -3433,7 +3433,7 @@ L_mlkem_thumb2_rej_uniform_loop_no_fail: B.N L_mlkem_thumb2_rej_uniform_done #endif L_mlkem_thumb2_rej_uniform_done_no_fail: - CMP r1, #0x0 + CMP r1, #0 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_mlkem_thumb2_rej_uniform_done #else @@ -3449,8 +3449,8 @@ L_mlkem_thumb2_rej_uniform_loop: BGE.N L_mlkem_thumb2_rej_uniform_fail_0 #endif STRH r7, [r0, r9] - SUBS r1, r1, #0x1 - ADD r9, r9, #0x2 + SUBS r1, r1, #1 + ADD r9, r9, #2 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_mlkem_thumb2_rej_uniform_done #else @@ -3465,8 +3465,8 @@ L_mlkem_thumb2_rej_uniform_fail_0: BGE.N L_mlkem_thumb2_rej_uniform_fail_1 #endif STRH r7, [r0, r9] - SUBS r1, r1, #0x1 - ADD r9, r9, #0x2 + SUBS r1, r1, #1 + ADD r9, r9, #2 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_mlkem_thumb2_rej_uniform_done #else @@ -3482,8 +3482,8 @@ L_mlkem_thumb2_rej_uniform_fail_1: BGE.N L_mlkem_thumb2_rej_uniform_fail_2 #endif STRH r7, [r0, r9] - SUBS r1, r1, #0x1 - ADD r9, r9, #0x2 + SUBS r1, r1, #1 + ADD r9, r9, #2 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_mlkem_thumb2_rej_uniform_done #else @@ -3498,8 +3498,8 @@ L_mlkem_thumb2_rej_uniform_fail_2: BGE.N L_mlkem_thumb2_rej_uniform_fail_3 #endif STRH r7, [r0, r9] - SUBS r1, r1, #0x1 - ADD r9, r9, #0x2 + SUBS r1, r1, #1 + ADD r9, r9, #2 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_mlkem_thumb2_rej_uniform_done #else @@ -3514,8 +3514,8 @@ L_mlkem_thumb2_rej_uniform_fail_3: BGE.N L_mlkem_thumb2_rej_uniform_fail_4 #endif STRH r7, [r0, r9] - SUBS r1, r1, #0x1 - ADD r9, r9, #0x2 + SUBS r1, r1, #1 + ADD r9, r9, #2 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_mlkem_thumb2_rej_uniform_done #else @@ -3531,8 +3531,8 @@ L_mlkem_thumb2_rej_uniform_fail_4: BGE.N L_mlkem_thumb2_rej_uniform_fail_5 #endif STRH r7, [r0, r9] - SUBS r1, r1, #0x1 - ADD r9, r9, #0x2 + SUBS r1, r1, #1 + ADD r9, r9, #2 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_mlkem_thumb2_rej_uniform_done #else @@ -3547,8 +3547,8 @@ L_mlkem_thumb2_rej_uniform_fail_5: BGE.N L_mlkem_thumb2_rej_uniform_fail_6 #endif STRH r7, [r0, r9] - SUBS r1, r1, #0x1 - ADD r9, r9, #0x2 + SUBS r1, r1, #1 + ADD r9, r9, #2 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_mlkem_thumb2_rej_uniform_done #else @@ -3563,15 +3563,15 @@ L_mlkem_thumb2_rej_uniform_fail_6: BGE.N L_mlkem_thumb2_rej_uniform_fail_7 #endif STRH r7, [r0, r9] - SUBS r1, r1, #0x1 - ADD r9, r9, #0x2 + SUBS r1, r1, #1 + ADD r9, r9, #2 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_mlkem_thumb2_rej_uniform_done #else BEQ.N L_mlkem_thumb2_rej_uniform_done #endif L_mlkem_thumb2_rej_uniform_fail_7: - SUBS r3, r3, #0xc + SUBS r3, r3, #12 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BGT L_mlkem_thumb2_rej_uniform_loop #else diff --git a/wolfcrypt/src/port/arm/thumb2-mlkem-asm_c.c b/wolfcrypt/src/port/arm/thumb2-mlkem-asm_c.c index 30a403959c4..6eac784c4de 100644 --- a/wolfcrypt/src/port/arm/thumb2-mlkem-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-mlkem-asm_c.c @@ -85,13 +85,13 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "SUB sp, sp, #0x8\n\t" + "SUB sp, sp, #8\n\t" "MOV r1, %[L_mlkem_thumb2_ntt_zetas]\n\t" #ifndef WOLFSSL_ARM_ARCH_7M "MOV r12, #0xd01\n\t" "MOVT r12, #0xcff\n\t" #endif /* !WOLFSSL_ARM_ARCH_7M */ - "MOV r2, #0x10\n\t" + "MOV r2, #16\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_mlkem_thumb2_ntt_loop_123:\n\t" @@ -528,8 +528,8 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r) "STR r8, [%[r], #384]\n\t" "STR r9, [%[r], #448]\n\t" "LDR r2, [sp]\n\t" - "SUBS r2, r2, #0x1\n\t" - "ADD %[r], %[r], #0x4\n\t" + "SUBS r2, r2, #1\n\t" + "ADD %[r], %[r], #4\n\t" #if defined(__GNUC__) "BNE L_mlkem_thumb2_ntt_loop_123_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -538,7 +538,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r) "BNE.N L_mlkem_thumb2_ntt_loop_123_%=\n\t" #endif "SUB %[r], %[r], #0x40\n\t" - "MOV r3, #0x0\n\t" + "MOV r3, #0\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_mlkem_thumb2_ntt_loop_4_j:\n\t" @@ -547,7 +547,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r) #endif "STR r3, [sp, #4]\n\t" "ADD lr, r1, r3, LSR #4\n\t" - "MOV r2, #0x4\n\t" + "MOV r2, #4\n\t" "LDR lr, [lr, #16]\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -709,8 +709,8 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r) "STR r8, [%[r], #96]\n\t" "STR r9, [%[r], #112]\n\t" "LDRD r2, r3, [sp]\n\t" - "SUBS r2, r2, #0x1\n\t" - "ADD %[r], %[r], #0x4\n\t" + "SUBS r2, r2, #1\n\t" + "ADD %[r], %[r], #4\n\t" #if defined(__GNUC__) "BNE L_mlkem_thumb2_ntt_loop_4_i_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -729,7 +729,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r) "BNE.N L_mlkem_thumb2_ntt_loop_4_j_%=\n\t" #endif "SUB %[r], %[r], #0x200\n\t" - "MOV r3, #0x0\n\t" + "MOV r3, #0\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_mlkem_thumb2_ntt_loop_567:\n\t" @@ -1352,9 +1352,9 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r) "STR r8, [%[r], #24]\n\t" "STR r9, [%[r], #28]\n\t" "LDR r3, [sp, #4]\n\t" - "ADD r3, r3, #0x10\n\t" + "ADD r3, r3, #16\n\t" "RSBS r10, r3, #0x100\n\t" - "ADD %[r], %[r], #0x20\n\t" + "ADD %[r], %[r], #32\n\t" #if defined(__GNUC__) "BNE L_mlkem_thumb2_ntt_loop_567_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -1362,7 +1362,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r) #else "BNE.N L_mlkem_thumb2_ntt_loop_567_%=\n\t" #endif - "ADD sp, sp, #0x8\n\t" + "ADD sp, sp, #8\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [L_mlkem_thumb2_ntt_zetas] "+r" (L_mlkem_thumb2_ntt_zetas_c) @@ -1412,13 +1412,13 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "SUB sp, sp, #0x8\n\t" + "SUB sp, sp, #8\n\t" "MOV r1, %[L_mlkem_invntt_zetas_inv]\n\t" #ifndef WOLFSSL_ARM_ARCH_7M "MOV r12, #0xd01\n\t" "MOVT r12, #0xcff\n\t" #endif /* !WOLFSSL_ARM_ARCH_7M */ - "MOV r3, #0x0\n\t" + "MOV r3, #0\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_mlkem_invntt_loop_765:\n\t" @@ -2012,9 +2012,9 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r) "STR r8, [%[r], #24]\n\t" "STR r9, [%[r], #28]\n\t" "LDR r3, [sp, #4]\n\t" - "ADD r3, r3, #0x10\n\t" + "ADD r3, r3, #16\n\t" "RSBS r10, r3, #0x100\n\t" - "ADD %[r], %[r], #0x20\n\t" + "ADD %[r], %[r], #32\n\t" #if defined(__GNUC__) "BNE L_mlkem_invntt_loop_765_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -2023,7 +2023,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r) "BNE.N L_mlkem_invntt_loop_765_%=\n\t" #endif "SUB %[r], %[r], #0x200\n\t" - "MOV r3, #0x0\n\t" + "MOV r3, #0\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_mlkem_invntt_loop_4_j:\n\t" @@ -2032,7 +2032,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r) #endif "STR r3, [sp, #4]\n\t" "ADD lr, r1, r3, LSR #4\n\t" - "MOV r2, #0x4\n\t" + "MOV r2, #4\n\t" "LDR lr, [lr, #224]\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -2214,8 +2214,8 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r) "STR r8, [%[r], #96]\n\t" "STR r9, [%[r], #112]\n\t" "LDRD r2, r3, [sp]\n\t" - "SUBS r2, r2, #0x1\n\t" - "ADD %[r], %[r], #0x4\n\t" + "SUBS r2, r2, #1\n\t" + "ADD %[r], %[r], #4\n\t" #if defined(__GNUC__) "BNE L_mlkem_invntt_loop_4_i_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -2234,7 +2234,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r) "BNE.N L_mlkem_invntt_loop_4_j_%=\n\t" #endif "SUB %[r], %[r], #0x200\n\t" - "MOV r2, #0x10\n\t" + "MOV r2, #16\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_mlkem_invntt_loop_321:\n\t" @@ -3047,8 +3047,8 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r) "STR r8, [%[r], #384]\n\t" "STR r9, [%[r], #448]\n\t" "LDR r2, [sp]\n\t" - "SUBS r2, r2, #0x1\n\t" - "ADD %[r], %[r], #0x4\n\t" + "SUBS r2, r2, #1\n\t" + "ADD %[r], %[r], #4\n\t" #if defined(__GNUC__) "BNE L_mlkem_invntt_loop_321_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3056,7 +3056,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r) #else "BNE.N L_mlkem_invntt_loop_321_%=\n\t" #endif - "ADD sp, sp, #0x8\n\t" + "ADD sp, sp, #8\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [L_mlkem_invntt_zetas_inv] "+r" (L_mlkem_invntt_zetas_inv_c) @@ -3116,7 +3116,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont(sword16* r, "MOV r12, #0xd01\n\t" "MOVT r12, #0xcff\n\t" #endif /* !WOLFSSL_ARM_ARCH_7M */ - "MOV r8, #0x0\n\t" + "MOV r8, #0\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_mlkem_basemul_mont_loop:\n\t" @@ -3126,7 +3126,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont(sword16* r, "LDM %[a]!, {r4, r5}\n\t" "LDM %[b]!, {r6, r7}\n\t" "LDR lr, [r3, r8]\n\t" - "ADD r8, r8, #0x2\n\t" + "ADD r8, r8, #2\n\t" "PUSH {r8}\n\t" "CMP r8, #0x80\n\t" #ifndef WOLFSSL_ARM_ARCH_7M @@ -3136,7 +3136,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont(sword16* r, "SMULTB r11, r12, r10\n\t" "SMLABB r8, r12, r9, r8\n\t" "SMLABB r10, r12, r11, r10\n\t" - "RSB r11, lr, #0x0\n\t" + "RSB r11, lr, #0\n\t" "SMULBT r8, lr, r8\n\t" "SMULBT r10, r11, r10\n\t" "SMLABB r8, r4, r6, r8\n\t" @@ -3172,7 +3172,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont(sword16* r, "SBFX r11, r11, #0, #16\n\t" "MLA r8, r12, r9, r8\n\t" "MLA r10, r12, r11, r10\n\t" - "RSB r11, lr, #0x0\n\t" + "RSB r11, lr, #0\n\t" "SBFX r9, lr, #0, #16\n\t" "SBFX r11, r11, #0, #16\n\t" "ASR r8, r8, #16\n\t" @@ -3271,7 +3271,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont_add(sword16* r, "MOV r12, #0xd01\n\t" "MOVT r12, #0xcff\n\t" #endif /* !WOLFSSL_ARM_ARCH_7M */ - "MOV r8, #0x0\n\t" + "MOV r8, #0\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_mlkem_thumb2_basemul_mont_add_loop:\n\t" @@ -3281,7 +3281,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont_add(sword16* r, "LDM %[a]!, {r4, r5}\n\t" "LDM %[b]!, {r6, r7}\n\t" "LDR lr, [r3, r8]\n\t" - "ADD r8, r8, #0x2\n\t" + "ADD r8, r8, #2\n\t" "PUSH {r8}\n\t" "CMP r8, #0x80\n\t" #ifndef WOLFSSL_ARM_ARCH_7M @@ -3291,7 +3291,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont_add(sword16* r, "SMULTB r11, r12, r10\n\t" "SMLABB r8, r12, r9, r8\n\t" "SMLABB r10, r12, r11, r10\n\t" - "RSB r11, lr, #0x0\n\t" + "RSB r11, lr, #0\n\t" "SMULBT r8, lr, r8\n\t" "SMULBT r10, r11, r10\n\t" "SMLABB r8, r4, r6, r8\n\t" @@ -3330,7 +3330,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont_add(sword16* r, "SBFX r11, r11, #0, #16\n\t" "MLA r8, r12, r9, r8\n\t" "MLA r10, r12, r11, r10\n\t" - "RSB r11, lr, #0x0\n\t" + "RSB r11, lr, #0\n\t" "SBFX r9, lr, #0, #16\n\t" "SBFX r11, r11, #0, #16\n\t" "ASR r8, r8, #16\n\t" @@ -3507,7 +3507,7 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_csubq(sword16* p) "BFI r5, r10, #0, #16\n\t" #endif /* !WOLFSSL_ARM_ARCH_7M */ "STM %[p]!, {r2, r3, r4, r5}\n\t" - "SUBS r1, r1, #0x8\n\t" + "SUBS r1, r1, #8\n\t" #if defined(__GNUC__) "BNE L_mlkem_thumb2_csubq_loop_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3551,14 +3551,14 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p, __asm__ __volatile__ ( "MOV r8, #0xd01\n\t" - "MOV r9, #0x0\n\t" + "MOV r9, #0\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_mlkem_thumb2_rej_uniform_loop_no_fail:\n\t" #else "L_mlkem_thumb2_rej_uniform_loop_no_fail_%=:\n\t" #endif - "CMP %[len], #0x8\n\t" + "CMP %[len], #8\n\t" #if defined(__GNUC__) "BLT L_mlkem_thumb2_rej_uniform_done_no_fail_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3617,7 +3617,7 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p, "LSR r10, r10, #31\n\t" "SUB %[len], %[len], r10\n\t" "ADD r9, r9, r10, LSL #1\n\t" - "SUBS %[rLen], %[rLen], #0xc\n\t" + "SUBS %[rLen], %[rLen], #12\n\t" #if defined(__GNUC__) "BNE L_mlkem_thumb2_rej_uniform_loop_no_fail_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3638,7 +3638,7 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p, #else "L_mlkem_thumb2_rej_uniform_done_no_fail_%=:\n\t" #endif - "CMP %[len], #0x0\n\t" + "CMP %[len], #0\n\t" #if defined(__GNUC__) "BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3663,8 +3663,8 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p, "BGE.N L_mlkem_thumb2_rej_uniform_fail_0_%=\n\t" #endif "STRH r7, [%[p], r9]\n\t" - "SUBS %[len], %[len], #0x1\n\t" - "ADD r9, r9, #0x2\n\t" + "SUBS %[len], %[len], #1\n\t" + "ADD r9, r9, #2\n\t" #if defined(__GNUC__) "BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3688,8 +3688,8 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p, "BGE.N L_mlkem_thumb2_rej_uniform_fail_1_%=\n\t" #endif "STRH r7, [%[p], r9]\n\t" - "SUBS %[len], %[len], #0x1\n\t" - "ADD r9, r9, #0x2\n\t" + "SUBS %[len], %[len], #1\n\t" + "ADD r9, r9, #2\n\t" #if defined(__GNUC__) "BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3714,8 +3714,8 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p, "BGE.N L_mlkem_thumb2_rej_uniform_fail_2_%=\n\t" #endif "STRH r7, [%[p], r9]\n\t" - "SUBS %[len], %[len], #0x1\n\t" - "ADD r9, r9, #0x2\n\t" + "SUBS %[len], %[len], #1\n\t" + "ADD r9, r9, #2\n\t" #if defined(__GNUC__) "BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3739,8 +3739,8 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p, "BGE.N L_mlkem_thumb2_rej_uniform_fail_3_%=\n\t" #endif "STRH r7, [%[p], r9]\n\t" - "SUBS %[len], %[len], #0x1\n\t" - "ADD r9, r9, #0x2\n\t" + "SUBS %[len], %[len], #1\n\t" + "ADD r9, r9, #2\n\t" #if defined(__GNUC__) "BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3764,8 +3764,8 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p, "BGE.N L_mlkem_thumb2_rej_uniform_fail_4_%=\n\t" #endif "STRH r7, [%[p], r9]\n\t" - "SUBS %[len], %[len], #0x1\n\t" - "ADD r9, r9, #0x2\n\t" + "SUBS %[len], %[len], #1\n\t" + "ADD r9, r9, #2\n\t" #if defined(__GNUC__) "BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3790,8 +3790,8 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p, "BGE.N L_mlkem_thumb2_rej_uniform_fail_5_%=\n\t" #endif "STRH r7, [%[p], r9]\n\t" - "SUBS %[len], %[len], #0x1\n\t" - "ADD r9, r9, #0x2\n\t" + "SUBS %[len], %[len], #1\n\t" + "ADD r9, r9, #2\n\t" #if defined(__GNUC__) "BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3815,8 +3815,8 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p, "BGE.N L_mlkem_thumb2_rej_uniform_fail_6_%=\n\t" #endif "STRH r7, [%[p], r9]\n\t" - "SUBS %[len], %[len], #0x1\n\t" - "ADD r9, r9, #0x2\n\t" + "SUBS %[len], %[len], #1\n\t" + "ADD r9, r9, #2\n\t" #if defined(__GNUC__) "BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3840,8 +3840,8 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p, "BGE.N L_mlkem_thumb2_rej_uniform_fail_7_%=\n\t" #endif "STRH r7, [%[p], r9]\n\t" - "SUBS %[len], %[len], #0x1\n\t" - "ADD r9, r9, #0x2\n\t" + "SUBS %[len], %[len], #1\n\t" + "ADD r9, r9, #2\n\t" #if defined(__GNUC__) "BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -3855,7 +3855,7 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p, #else "L_mlkem_thumb2_rej_uniform_fail_7_%=:\n\t" #endif - "SUBS %[rLen], %[rLen], #0xc\n\t" + "SUBS %[rLen], %[rLen], #12\n\t" #if defined(__GNUC__) "BGT L_mlkem_thumb2_rej_uniform_loop_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) diff --git a/wolfcrypt/src/port/arm/thumb2-poly1305-asm.S b/wolfcrypt/src/port/arm/thumb2-poly1305-asm.S index 2bd38c14b7e..26880f21add 100644 --- a/wolfcrypt/src/port/arm/thumb2-poly1305-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-poly1305-asm.S @@ -39,17 +39,17 @@ .type poly1305_blocks_thumb2_16, %function poly1305_blocks_thumb2_16: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} - SUB sp, sp, #0x1c - CMP r2, #0x0 + SUB sp, sp, #28 + CMP r2, #0 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_poly1305_thumb2_16_done #else BEQ.N L_poly1305_thumb2_16_done #endif - ADD lr, sp, #0xc + ADD lr, sp, #12 STM lr, {r0, r1, r2, r3} /* Get h pointer */ - ADD lr, r0, #0x10 + ADD lr, r0, #16 LDM lr, {r4, r5, r6, r7, r8} L_poly1305_thumb2_16_loop: /* Add m to h */ @@ -63,7 +63,7 @@ L_poly1305_thumb2_16_loop: ADCS r5, r5, r3 ADCS r6, r6, r9 ADCS r7, r7, r10 - ADD r1, r1, #0x10 + ADD r1, r1, #16 ADC r8, r8, r11 #ifdef WOLFSSL_ARM_ARCH_7M STM lr, {r4, r5, r6, r7, r8} @@ -205,7 +205,7 @@ L_poly1305_thumb2_16_loop: LDR r5, [lr, #16] /* r[3] * h[3] */ UMAAL r10, r11, r3, r4 - MOV r12, #0x0 + MOV r12, #0 /* r[0] * h[4] */ UMAAL r8, r12, r0, r5 /* r[1] * h[4] */ @@ -221,8 +221,8 @@ L_poly1305_thumb2_16_loop: /* Load length */ LDR r2, [sp, #20] /* Reduce mod 2^130 - 5 */ - BIC r3, r8, #0x3 - AND r8, r8, #0x3 + BIC r3, r8, #3 + AND r8, r8, #3 ADDS r4, r4, r3 LSR r3, r3, #2 ADCS r5, r5, r9 @@ -241,7 +241,7 @@ L_poly1305_thumb2_16_loop: ADCS r7, r7, r11 ADC r8, r8, r12 /* Sub 16 from length. */ - SUBS r2, r2, #0x10 + SUBS r2, r2, #16 /* Store length. */ STR r2, [sp, #20] /* Loop again if more message to do. */ @@ -252,7 +252,7 @@ L_poly1305_thumb2_16_loop: #endif STM lr, {r4, r5, r6, r7, r8} L_poly1305_thumb2_16_done: - ADD sp, sp, #0x1c + ADD sp, sp, #28 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 250 */ .size poly1305_blocks_thumb2_16,.-poly1305_blocks_thumb2_16 @@ -285,7 +285,7 @@ poly1305_set_key: LDR r3, [r1, #20] LDR r4, [r1, #24] LDR r5, [r1, #28] - ADD r10, r0, #0x24 + ADD r10, r0, #36 STM r10, {r2, r3, r4, r5} /* Load, mask and store r. */ LDR r2, [r1] @@ -296,14 +296,14 @@ poly1305_set_key: AND r3, r3, r7 AND r4, r4, r8 AND r5, r5, r9 - ADD r10, r0, #0x0 + ADD r10, r0, #0 STM r10, {r2, r3, r4, r5} /* h (accumulator) = 0 */ EOR r6, r6, r6 EOR r7, r7, r7 EOR r8, r8, r8 EOR r9, r9, r9 - ADD r10, r0, #0x10 + ADD r10, r0, #16 EOR r5, r5, r5 STM r10, {r5, r6, r7, r8, r9} /* Zero leftover */ @@ -317,25 +317,25 @@ poly1305_set_key: .type poly1305_final, %function poly1305_final: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} - ADD r11, r0, #0x10 + ADD r11, r0, #16 LDM r11, {r2, r3, r4, r5, r6} /* Add 5 and check for h larger than p. */ - ADDS r7, r2, #0x5 - ADCS r7, r3, #0x0 - ADCS r7, r4, #0x0 - ADCS r7, r5, #0x0 - ADC r7, r6, #0x0 - SUB r7, r7, #0x4 + ADDS r7, r2, #5 + ADCS r7, r3, #0 + ADCS r7, r4, #0 + ADCS r7, r5, #0 + ADC r7, r6, #0 + SUB r7, r7, #4 LSR r7, r7, #31 - SUB r7, r7, #0x1 - AND r7, r7, #0x5 + SUB r7, r7, #1 + AND r7, r7, #5 /* Add 0/5 to h. */ ADDS r2, r2, r7 - ADCS r3, r3, #0x0 - ADCS r4, r4, #0x0 - ADC r5, r5, #0x0 + ADCS r3, r3, #0 + ADCS r4, r4, #0 + ADC r5, r5, #0 /* Add padding */ - ADD r11, r0, #0x24 + ADD r11, r0, #36 LDM r11, {r7, r8, r9, r10} ADDS r2, r2, r7 ADCS r3, r3, r8 @@ -352,13 +352,13 @@ poly1305_final: EOR r4, r4, r4 EOR r5, r5, r5 EOR r6, r6, r6 - ADD r11, r0, #0x10 + ADD r11, r0, #16 STM r11, {r2, r3, r4, r5, r6} /* Zero out r. */ - ADD r11, r0, #0x0 + ADD r11, r0, #0 STM r11, {r2, r3, r4, r5} /* Zero out padding. */ - ADD r11, r0, #0x24 + ADD r11, r0, #36 STM r11, {r2, r3, r4, r5} POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 82 */ diff --git a/wolfcrypt/src/port/arm/thumb2-poly1305-asm_c.c b/wolfcrypt/src/port/arm/thumb2-poly1305-asm_c.c index c4b607a5c06..b8ff2f7c2ec 100644 --- a/wolfcrypt/src/port/arm/thumb2-poly1305-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-poly1305-asm_c.c @@ -66,8 +66,8 @@ WC_OMIT_FRAME_POINTER void poly1305_blocks_thumb2_16(Poly1305* ctx, #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "SUB sp, sp, #0x1c\n\t" - "CMP %[len], #0x0\n\t" + "SUB sp, sp, #28\n\t" + "CMP %[len], #0\n\t" #if defined(__GNUC__) "BEQ L_poly1305_thumb2_16_done_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -75,10 +75,10 @@ WC_OMIT_FRAME_POINTER void poly1305_blocks_thumb2_16(Poly1305* ctx, #else "BEQ.N L_poly1305_thumb2_16_done_%=\n\t" #endif - "ADD lr, sp, #0xc\n\t" + "ADD lr, sp, #12\n\t" "STM lr, {%[ctx], %[m], %[len], %[notLast]}\n\t" /* Get h pointer */ - "ADD lr, %[ctx], #0x10\n\t" + "ADD lr, %[ctx], #16\n\t" "LDM lr, {r4, r5, r6, r7, r8}\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -97,7 +97,7 @@ WC_OMIT_FRAME_POINTER void poly1305_blocks_thumb2_16(Poly1305* ctx, "ADCS r5, r5, %[notLast]\n\t" "ADCS r6, r6, r9\n\t" "ADCS r7, r7, r10\n\t" - "ADD %[m], %[m], #0x10\n\t" + "ADD %[m], %[m], #16\n\t" "ADC r8, r8, r11\n\t" #ifdef WOLFSSL_ARM_ARCH_7M "STM lr, {r4, r5, r6, r7, r8}\n\t" @@ -239,7 +239,7 @@ WC_OMIT_FRAME_POINTER void poly1305_blocks_thumb2_16(Poly1305* ctx, "LDR r5, [lr, #16]\n\t" /* r[3] * h[3] */ "UMAAL r10, r11, %[notLast], r4\n\t" - "MOV r12, #0x0\n\t" + "MOV r12, #0\n\t" /* r[0] * h[4] */ "UMAAL r8, r12, %[ctx], r5\n\t" /* r[1] * h[4] */ @@ -255,8 +255,8 @@ WC_OMIT_FRAME_POINTER void poly1305_blocks_thumb2_16(Poly1305* ctx, /* Load length */ "LDR %[len], [sp, #20]\n\t" /* Reduce mod 2^130 - 5 */ - "BIC %[notLast], r8, #0x3\n\t" - "AND r8, r8, #0x3\n\t" + "BIC %[notLast], r8, #3\n\t" + "AND r8, r8, #3\n\t" "ADDS r4, r4, %[notLast]\n\t" "LSR %[notLast], %[notLast], #2\n\t" "ADCS r5, r5, r9\n\t" @@ -275,7 +275,7 @@ WC_OMIT_FRAME_POINTER void poly1305_blocks_thumb2_16(Poly1305* ctx, "ADCS r7, r7, r11\n\t" "ADC r8, r8, r12\n\t" /* Sub 16 from length. */ - "SUBS %[len], %[len], #0x10\n\t" + "SUBS %[len], %[len], #16\n\t" /* Store length. */ "STR %[len], [sp, #20]\n\t" /* Loop again if more message to do. */ @@ -293,7 +293,7 @@ WC_OMIT_FRAME_POINTER void poly1305_blocks_thumb2_16(Poly1305* ctx, #else "L_poly1305_thumb2_16_done_%=:\n\t" #endif - "ADD sp, sp, #0x1c\n\t" + "ADD sp, sp, #28\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [ctx] "+r" (ctx), [m] "+r" (m), [len] "+r" (len), [notLast] "+r" (notLast) @@ -337,7 +337,7 @@ WC_OMIT_FRAME_POINTER void poly1305_set_key(Poly1305* ctx, const byte* key) "LDR r3, [%[key], #20]\n\t" "LDR r4, [%[key], #24]\n\t" "LDR r5, [%[key], #28]\n\t" - "ADD r10, %[ctx], #0x24\n\t" + "ADD r10, %[ctx], #36\n\t" "STM r10, {r2, r3, r4, r5}\n\t" /* Load, mask and store r. */ "LDR r2, [%[key]]\n\t" @@ -348,14 +348,14 @@ WC_OMIT_FRAME_POINTER void poly1305_set_key(Poly1305* ctx, const byte* key) "AND r3, r3, r7\n\t" "AND r4, r4, r8\n\t" "AND r5, r5, r9\n\t" - "ADD r10, %[ctx], #0x0\n\t" + "ADD r10, %[ctx], #0\n\t" "STM r10, {r2, r3, r4, r5}\n\t" /* h (accumulator) = 0 */ "EOR r6, r6, r6\n\t" "EOR r7, r7, r7\n\t" "EOR r8, r8, r8\n\t" "EOR r9, r9, r9\n\t" - "ADD r10, %[ctx], #0x10\n\t" + "ADD r10, %[ctx], #16\n\t" "EOR r5, r5, r5\n\t" "STM r10, {r5, r6, r7, r8, r9}\n\t" /* Zero leftover */ @@ -385,25 +385,25 @@ WC_OMIT_FRAME_POINTER void poly1305_final(Poly1305* ctx, byte* mac) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( - "ADD r11, %[ctx], #0x10\n\t" + "ADD r11, %[ctx], #16\n\t" "LDM r11, {r2, r3, r4, r5, r6}\n\t" /* Add 5 and check for h larger than p. */ - "ADDS r7, r2, #0x5\n\t" - "ADCS r7, r3, #0x0\n\t" - "ADCS r7, r4, #0x0\n\t" - "ADCS r7, r5, #0x0\n\t" - "ADC r7, r6, #0x0\n\t" - "SUB r7, r7, #0x4\n\t" + "ADDS r7, r2, #5\n\t" + "ADCS r7, r3, #0\n\t" + "ADCS r7, r4, #0\n\t" + "ADCS r7, r5, #0\n\t" + "ADC r7, r6, #0\n\t" + "SUB r7, r7, #4\n\t" "LSR r7, r7, #31\n\t" - "SUB r7, r7, #0x1\n\t" - "AND r7, r7, #0x5\n\t" + "SUB r7, r7, #1\n\t" + "AND r7, r7, #5\n\t" /* Add 0/5 to h. */ "ADDS r2, r2, r7\n\t" - "ADCS r3, r3, #0x0\n\t" - "ADCS r4, r4, #0x0\n\t" - "ADC r5, r5, #0x0\n\t" + "ADCS r3, r3, #0\n\t" + "ADCS r4, r4, #0\n\t" + "ADC r5, r5, #0\n\t" /* Add padding */ - "ADD r11, %[ctx], #0x24\n\t" + "ADD r11, %[ctx], #36\n\t" "LDM r11, {r7, r8, r9, r10}\n\t" "ADDS r2, r2, r7\n\t" "ADCS r3, r3, r8\n\t" @@ -420,13 +420,13 @@ WC_OMIT_FRAME_POINTER void poly1305_final(Poly1305* ctx, byte* mac) "EOR r4, r4, r4\n\t" "EOR r5, r5, r5\n\t" "EOR r6, r6, r6\n\t" - "ADD r11, %[ctx], #0x10\n\t" + "ADD r11, %[ctx], #16\n\t" "STM r11, {r2, r3, r4, r5, r6}\n\t" /* Zero out r. */ - "ADD r11, %[ctx], #0x0\n\t" + "ADD r11, %[ctx], #0\n\t" "STM r11, {r2, r3, r4, r5}\n\t" /* Zero out padding. */ - "ADD r11, %[ctx], #0x24\n\t" + "ADD r11, %[ctx], #36\n\t" "STM r11, {r2, r3, r4, r5}\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [ctx] "+r" (ctx), [mac] "+r" (mac) diff --git a/wolfcrypt/src/port/arm/thumb2-sha256-asm.S b/wolfcrypt/src/port/arm/thumb2-sha256-asm.S index 3daa7a93e1b..3b46daf07b1 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha256-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-sha256-asm.S @@ -128,7 +128,7 @@ L_SHA256_transform_len_begin: LDR r4, [r0, #8] EOR r11, r11, r4 #ifndef WOLFSSL_ARMASM_SHA256_SMALL - MOV r3, #0x3 + MOV r3, #3 /* Start of 16 rounds */ L_SHA256_transform_len_start_fast: /* Round 0 */ @@ -884,7 +884,7 @@ L_SHA256_transform_len_start_fast: ADD r9, r9, r4 STR r9, [sp, #60] ADD r12, r12, #0x40 - SUBS r3, r3, #0x1 + SUBS r3, r3, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_SHA256_transform_len_start_fast #else @@ -1403,10 +1403,10 @@ L_SHA256_transform_len_start_fast: STR r8, [r0, #16] STR r9, [r0] #else - MOV r3, #0x4 + MOV r3, #4 /* Start of 16 rounds */ L_SHA256_transform_len_start_small: - SUB r3, r3, #0x1 + SUB r3, r3, #1 /* Round 0 */ LDR r5, [r0, #16] LDR r6, [r0, #20] @@ -1439,7 +1439,7 @@ L_SHA256_transform_len_start_small: ADD r9, r9, r11 STR r8, [r0, #12] STR r9, [r0, #28] - CMP r3, #0x0 + CMP r3, #0 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_SHA256_transform_len_blk_end_0 #else @@ -1493,7 +1493,7 @@ L_SHA256_transform_len_blk_end_0: ADD r9, r9, r10 STR r8, [r0, #8] STR r9, [r0, #24] - CMP r3, #0x0 + CMP r3, #0 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_SHA256_transform_len_blk_end_1 #else @@ -1547,7 +1547,7 @@ L_SHA256_transform_len_blk_end_1: ADD r9, r9, r11 STR r8, [r0, #4] STR r9, [r0, #20] - CMP r3, #0x0 + CMP r3, #0 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_SHA256_transform_len_blk_end_2 #else @@ -1601,7 +1601,7 @@ L_SHA256_transform_len_blk_end_2: ADD r9, r9, r10 STR r8, [r0] STR r9, [r0, #16] - CMP r3, #0x0 + CMP r3, #0 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_SHA256_transform_len_blk_end_3 #else @@ -1655,7 +1655,7 @@ L_SHA256_transform_len_blk_end_3: ADD r9, r9, r11 STR r8, [r0, #28] STR r9, [r0, #12] - CMP r3, #0x0 + CMP r3, #0 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_SHA256_transform_len_blk_end_4 #else @@ -1709,7 +1709,7 @@ L_SHA256_transform_len_blk_end_4: ADD r9, r9, r10 STR r8, [r0, #24] STR r9, [r0, #8] - CMP r3, #0x0 + CMP r3, #0 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_SHA256_transform_len_blk_end_5 #else @@ -1763,7 +1763,7 @@ L_SHA256_transform_len_blk_end_5: ADD r9, r9, r11 STR r8, [r0, #20] STR r9, [r0, #4] - CMP r3, #0x0 + CMP r3, #0 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_SHA256_transform_len_blk_end_6 #else @@ -1817,7 +1817,7 @@ L_SHA256_transform_len_blk_end_6: ADD r9, r9, r10 STR r8, [r0, #16] STR r9, [r0] - CMP r3, #0x0 + CMP r3, #0 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_SHA256_transform_len_blk_end_7 #else @@ -1871,7 +1871,7 @@ L_SHA256_transform_len_blk_end_7: ADD r9, r9, r11 STR r8, [r0, #12] STR r9, [r0, #28] - CMP r3, #0x0 + CMP r3, #0 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_SHA256_transform_len_blk_end_8 #else @@ -1925,7 +1925,7 @@ L_SHA256_transform_len_blk_end_8: ADD r9, r9, r10 STR r8, [r0, #8] STR r9, [r0, #24] - CMP r3, #0x0 + CMP r3, #0 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_SHA256_transform_len_blk_end_9 #else @@ -1979,7 +1979,7 @@ L_SHA256_transform_len_blk_end_9: ADD r9, r9, r11 STR r8, [r0, #4] STR r9, [r0, #20] - CMP r3, #0x0 + CMP r3, #0 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_SHA256_transform_len_blk_end_10 #else @@ -2033,7 +2033,7 @@ L_SHA256_transform_len_blk_end_10: ADD r9, r9, r10 STR r8, [r0] STR r9, [r0, #16] - CMP r3, #0x0 + CMP r3, #0 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_SHA256_transform_len_blk_end_11 #else @@ -2087,7 +2087,7 @@ L_SHA256_transform_len_blk_end_11: ADD r9, r9, r11 STR r8, [r0, #28] STR r9, [r0, #12] - CMP r3, #0x0 + CMP r3, #0 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_SHA256_transform_len_blk_end_12 #else @@ -2141,7 +2141,7 @@ L_SHA256_transform_len_blk_end_12: ADD r9, r9, r10 STR r8, [r0, #24] STR r9, [r0, #8] - CMP r3, #0x0 + CMP r3, #0 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_SHA256_transform_len_blk_end_13 #else @@ -2195,7 +2195,7 @@ L_SHA256_transform_len_blk_end_13: ADD r9, r9, r11 STR r8, [r0, #20] STR r9, [r0, #4] - CMP r3, #0x0 + CMP r3, #0 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_SHA256_transform_len_blk_end_14 #else @@ -2249,7 +2249,7 @@ L_SHA256_transform_len_blk_end_14: ADD r9, r9, r10 STR r8, [r0, #16] STR r9, [r0] - CMP r3, #0x0 + CMP r3, #0 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BEQ L_SHA256_transform_len_blk_end_15 #else @@ -2271,7 +2271,7 @@ L_SHA256_transform_len_blk_end_14: ADD r9, r9, r4 STR r9, [sp, #60] L_SHA256_transform_len_blk_end_15: - CMP r3, #0x0 + CMP r3, #0 ADD r12, r12, #0x40 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_SHA256_transform_len_start_small diff --git a/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c b/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c index cb657fcc71c..5f26fbf4f9c 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c @@ -155,7 +155,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "LDR r4, [%[sha256], #8]\n\t" "EOR r11, r11, r4\n\t" #ifndef WOLFSSL_ARMASM_SHA256_SMALL - "MOV r3, #0x3\n\t" + "MOV r3, #3\n\t" /* Start of 16 rounds */ "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -916,7 +916,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "ADD r9, r9, r4\n\t" "STR r9, [sp, #60]\n\t" "ADD r12, r12, #0x40\n\t" - "SUBS r3, r3, #0x1\n\t" + "SUBS r3, r3, #1\n\t" #if defined(__GNUC__) "BNE L_SHA256_transform_len_start_fast_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -1437,7 +1437,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "STR r8, [%[sha256], #16]\n\t" "STR r9, [%[sha256]]\n\t" #else - "MOV r3, #0x4\n\t" + "MOV r3, #4\n\t" /* Start of 16 rounds */ "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -1445,7 +1445,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, #else "L_SHA256_transform_len_start_small_%=:\n\t" #endif - "SUB r3, r3, #0x1\n\t" + "SUB r3, r3, #1\n\t" /* Round 0 */ "LDR r5, [%[sha256], #16]\n\t" "LDR r6, [%[sha256], #20]\n\t" @@ -1478,7 +1478,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "ADD r9, r9, r11\n\t" "STR r8, [%[sha256], #12]\n\t" "STR r9, [%[sha256], #28]\n\t" - "CMP r3, #0x0\n\t" + "CMP r3, #0\n\t" #if defined(__GNUC__) "BEQ L_SHA256_transform_len_blk_end_0_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -1539,7 +1539,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "ADD r9, r9, r10\n\t" "STR r8, [%[sha256], #8]\n\t" "STR r9, [%[sha256], #24]\n\t" - "CMP r3, #0x0\n\t" + "CMP r3, #0\n\t" #if defined(__GNUC__) "BEQ L_SHA256_transform_len_blk_end_1_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -1600,7 +1600,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "ADD r9, r9, r11\n\t" "STR r8, [%[sha256], #4]\n\t" "STR r9, [%[sha256], #20]\n\t" - "CMP r3, #0x0\n\t" + "CMP r3, #0\n\t" #if defined(__GNUC__) "BEQ L_SHA256_transform_len_blk_end_2_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -1661,7 +1661,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "ADD r9, r9, r10\n\t" "STR r8, [%[sha256]]\n\t" "STR r9, [%[sha256], #16]\n\t" - "CMP r3, #0x0\n\t" + "CMP r3, #0\n\t" #if defined(__GNUC__) "BEQ L_SHA256_transform_len_blk_end_3_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -1722,7 +1722,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "ADD r9, r9, r11\n\t" "STR r8, [%[sha256], #28]\n\t" "STR r9, [%[sha256], #12]\n\t" - "CMP r3, #0x0\n\t" + "CMP r3, #0\n\t" #if defined(__GNUC__) "BEQ L_SHA256_transform_len_blk_end_4_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -1783,7 +1783,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "ADD r9, r9, r10\n\t" "STR r8, [%[sha256], #24]\n\t" "STR r9, [%[sha256], #8]\n\t" - "CMP r3, #0x0\n\t" + "CMP r3, #0\n\t" #if defined(__GNUC__) "BEQ L_SHA256_transform_len_blk_end_5_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -1844,7 +1844,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "ADD r9, r9, r11\n\t" "STR r8, [%[sha256], #20]\n\t" "STR r9, [%[sha256], #4]\n\t" - "CMP r3, #0x0\n\t" + "CMP r3, #0\n\t" #if defined(__GNUC__) "BEQ L_SHA256_transform_len_blk_end_6_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -1905,7 +1905,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "ADD r9, r9, r10\n\t" "STR r8, [%[sha256], #16]\n\t" "STR r9, [%[sha256]]\n\t" - "CMP r3, #0x0\n\t" + "CMP r3, #0\n\t" #if defined(__GNUC__) "BEQ L_SHA256_transform_len_blk_end_7_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -1966,7 +1966,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "ADD r9, r9, r11\n\t" "STR r8, [%[sha256], #12]\n\t" "STR r9, [%[sha256], #28]\n\t" - "CMP r3, #0x0\n\t" + "CMP r3, #0\n\t" #if defined(__GNUC__) "BEQ L_SHA256_transform_len_blk_end_8_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -2027,7 +2027,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "ADD r9, r9, r10\n\t" "STR r8, [%[sha256], #8]\n\t" "STR r9, [%[sha256], #24]\n\t" - "CMP r3, #0x0\n\t" + "CMP r3, #0\n\t" #if defined(__GNUC__) "BEQ L_SHA256_transform_len_blk_end_9_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -2088,7 +2088,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "ADD r9, r9, r11\n\t" "STR r8, [%[sha256], #4]\n\t" "STR r9, [%[sha256], #20]\n\t" - "CMP r3, #0x0\n\t" + "CMP r3, #0\n\t" #if defined(__GNUC__) "BEQ L_SHA256_transform_len_blk_end_10_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -2149,7 +2149,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "ADD r9, r9, r10\n\t" "STR r8, [%[sha256]]\n\t" "STR r9, [%[sha256], #16]\n\t" - "CMP r3, #0x0\n\t" + "CMP r3, #0\n\t" #if defined(__GNUC__) "BEQ L_SHA256_transform_len_blk_end_11_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -2210,7 +2210,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "ADD r9, r9, r11\n\t" "STR r8, [%[sha256], #28]\n\t" "STR r9, [%[sha256], #12]\n\t" - "CMP r3, #0x0\n\t" + "CMP r3, #0\n\t" #if defined(__GNUC__) "BEQ L_SHA256_transform_len_blk_end_12_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -2271,7 +2271,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "ADD r9, r9, r10\n\t" "STR r8, [%[sha256], #24]\n\t" "STR r9, [%[sha256], #8]\n\t" - "CMP r3, #0x0\n\t" + "CMP r3, #0\n\t" #if defined(__GNUC__) "BEQ L_SHA256_transform_len_blk_end_13_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -2332,7 +2332,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "ADD r9, r9, r11\n\t" "STR r8, [%[sha256], #20]\n\t" "STR r9, [%[sha256], #4]\n\t" - "CMP r3, #0x0\n\t" + "CMP r3, #0\n\t" #if defined(__GNUC__) "BEQ L_SHA256_transform_len_blk_end_14_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -2393,7 +2393,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "ADD r9, r9, r10\n\t" "STR r8, [%[sha256], #16]\n\t" "STR r9, [%[sha256]]\n\t" - "CMP r3, #0x0\n\t" + "CMP r3, #0\n\t" #if defined(__GNUC__) "BEQ L_SHA256_transform_len_blk_end_15_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -2422,7 +2422,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, #else "L_SHA256_transform_len_blk_end_15_%=:\n\t" #endif - "CMP r3, #0x0\n\t" + "CMP r3, #0\n\t" "ADD r12, r12, #0x40\n\t" #if defined(__GNUC__) "BNE L_SHA256_transform_len_start_small_%=\n\t" diff --git a/wolfcrypt/src/port/arm/thumb2-sha3-asm.S b/wolfcrypt/src/port/arm/thumb2-sha3-asm.S index ab4254dee9d..27c111674bb 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha3-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-sha3-asm.S @@ -67,7 +67,7 @@ BlockSha3: PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} SUB sp, sp, #0xcc ADR r1, L_sha3_thumb2_rt - MOV r2, #0xc + MOV r2, #12 L_sha3_thumb2_begin: STR r2, [sp, #200] /* Round even */ @@ -336,7 +336,7 @@ L_sha3_thumb2_begin: STR lr, [sp, #36] /* Get constant */ LDRD r10, r11, [r1] - ADD r1, r1, #0x8 + ADD r1, r1, #8 BIC r12, r6, r4 BIC lr, r7, r5 EOR r12, r12, r2 @@ -864,7 +864,7 @@ L_sha3_thumb2_begin: STR lr, [r0, #36] /* Get constant */ LDRD r10, r11, [r1] - ADD r1, r1, #0x8 + ADD r1, r1, #8 BIC r12, r6, r4 BIC lr, r7, r5 EOR r12, r12, r2 @@ -1127,7 +1127,7 @@ L_sha3_thumb2_begin: STR r12, [r0, #160] STR lr, [r0, #164] LDR r2, [sp, #200] - SUBS r2, r2, #0x1 + SUBS r2, r2, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_sha3_thumb2_begin #else diff --git a/wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c b/wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c index e0c6d065d1a..85ea9b4e2fb 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c @@ -82,7 +82,7 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state) __asm__ __volatile__ ( "SUB sp, sp, #0xcc\n\t" "MOV r1, %[L_sha3_thumb2_rt]\n\t" - "MOV r2, #0xc\n\t" + "MOV r2, #12\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) "L_sha3_thumb2_begin:\n\t" @@ -356,7 +356,7 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state) "STR lr, [sp, #36]\n\t" /* Get constant */ "LDRD r10, r11, [r1]\n\t" - "ADD r1, r1, #0x8\n\t" + "ADD r1, r1, #8\n\t" "BIC r12, r6, r4\n\t" "BIC lr, r7, r5\n\t" "EOR r12, r12, r2\n\t" @@ -884,7 +884,7 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state) "STR lr, [%[state], #36]\n\t" /* Get constant */ "LDRD r10, r11, [r1]\n\t" - "ADD r1, r1, #0x8\n\t" + "ADD r1, r1, #8\n\t" "BIC r12, r6, r4\n\t" "BIC lr, r7, r5\n\t" "EOR r12, r12, r2\n\t" @@ -1147,7 +1147,7 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state) "STR r12, [%[state], #160]\n\t" "STR lr, [%[state], #164]\n\t" "LDR r2, [sp, #200]\n\t" - "SUBS r2, r2, #0x1\n\t" + "SUBS r2, r2, #1\n\t" #if defined(__GNUC__) "BNE L_sha3_thumb2_begin_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) diff --git a/wolfcrypt/src/port/arm/thumb2-sha512-asm.S b/wolfcrypt/src/port/arm/thumb2-sha512-asm.S index f05da4f8cad..3f8e1de9d9c 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha512-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-sha512-asm.S @@ -217,7 +217,7 @@ L_SHA512_transform_len_begin: LDRD r4, r5, [r0, #16] EOR r10, r10, r4 EOR r11, r11, r5 - MOV r12, #0x4 + MOV r12, #4 /* Start of 16 rounds */ L_SHA512_transform_len_start: /* Round 0 */ @@ -2205,7 +2205,7 @@ L_SHA512_transform_len_start: ADC r5, r5, r7 STRD r4, r5, [sp, #120] ADD r3, r3, #0x80 - SUBS r12, r12, #0x1 + SUBS r12, r12, #1 #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) BNE L_SHA512_transform_len_start #else diff --git a/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c b/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c index 4f87445f522..768a61c06c6 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c @@ -244,7 +244,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha512_Len_base(wc_Sha512* sha512, "LDRD r4, r5, [%[sha512], #16]\n\t" "EOR r10, r10, r4\n\t" "EOR r11, r11, r5\n\t" - "MOV r12, #0x4\n\t" + "MOV r12, #4\n\t" /* Start of 16 rounds */ "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) @@ -2237,7 +2237,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha512_Len_base(wc_Sha512* sha512, "ADC r5, r5, r7\n\t" "STRD r4, r5, [sp, #120]\n\t" "ADD r3, r3, #0x80\n\t" - "SUBS r12, r12, #0x1\n\t" + "SUBS r12, r12, #1\n\t" #if defined(__GNUC__) "BNE L_SHA512_transform_len_start_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)