Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions BIBLIOGRAPHY.md
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,8 @@ source code and documentation.
- [dev/x86_64/src/poly_chknorm_avx2.c](dev/x86_64/src/poly_chknorm_avx2.c)
- [dev/x86_64/src/poly_decompose_32_avx2.c](dev/x86_64/src/poly_decompose_32_avx2.c)
- [dev/x86_64/src/poly_decompose_88_avx2.c](dev/x86_64/src/poly_decompose_88_avx2.c)
- [dev/x86_64/src/poly_use_hint_32_avx2.c](dev/x86_64/src/poly_use_hint_32_avx2.c)
- [dev/x86_64/src/poly_use_hint_88_avx2.c](dev/x86_64/src/poly_use_hint_88_avx2.c)
- [dev/x86_64/src/poly_use_hint_32_avx2.S](dev/x86_64/src/poly_use_hint_32_avx2.S)
- [dev/x86_64/src/poly_use_hint_88_avx2.S](dev/x86_64/src/poly_use_hint_88_avx2.S)
- [dev/x86_64/src/polyz_unpack_17_avx2.c](dev/x86_64/src/polyz_unpack_17_avx2.c)
- [dev/x86_64/src/polyz_unpack_19_avx2.c](dev/x86_64/src/polyz_unpack_19_avx2.c)
- [dev/x86_64/src/rej_uniform_avx2.c](dev/x86_64/src/rej_uniform_avx2.c)
Expand All @@ -241,8 +241,8 @@ source code and documentation.
- [mldsa/src/native/x86_64/src/poly_chknorm_avx2.c](mldsa/src/native/x86_64/src/poly_chknorm_avx2.c)
- [mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c](mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c)
- [mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c](mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c)
- [mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c](mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c)
- [mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c](mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c)
- [mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.S](mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.S)
- [mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.S](mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.S)
- [mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c](mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c)
- [mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c](mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c)
- [mldsa/src/native/x86_64/src/rej_uniform_avx2.c](mldsa/src/native/x86_64/src/rej_uniform_avx2.c)
Expand Down
139 changes: 139 additions & 0 deletions dev/x86_64/src/poly_use_hint_32_avx2.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
/*
* Copyright (c) The mldsa-native project authors
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
*/

/* References
* ==========
*
* - [REF_AVX2]
* CRYSTALS-Dilithium optimized AVX2 implementation
* Bai, Ducas, Kiltz, Lepoint, Lyubashevsky, Schwabe, Seiler, Stehlé
* https://github.com/pq-crystals/dilithium/tree/master/avx2
*/

/*
* This file is derived from the public domain
* AVX2 Dilithium implementation @[REF_AVX2].
*/


/*************************************************
* Name: mld_poly_use_hint
*
* Description: Use hint polynomial to correct the high bits of a polynomial.
*
* Arguments: - mld_poly *b: pointer to output polynomial with corrected high
* bits
* - const mld_poly *a: pointer to input polynomial
* - const mld_poly *hint: pointer to input hint polynomial
**************************************************/



#include "../../../common.h"

#if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \
!defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
(defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || \
(MLD_CONFIG_PARAMETER_SET == 65 || MLD_CONFIG_PARAMETER_SET == 87))


/* simpasm: header-end */

/* Reference:
* - @[REF_AVX2] calls poly_decompose to compute all a1, a0 before the loop.
* - Our implementation of decompose() is slightly different from that in
* @[REF_AVX2]. See poly_decompose_32_avx2 for more information.
*/

// a aliased with a0
.macro decompose32_avx2 a1, a, temp1, temp2, temp3
/* a1, a0 = decompose(a)
* See poly_decompose_32_avx2 for more information. */
vpaddd \a, %ymm5, \temp1
vpsrld $7, \temp1, \temp1
vpmulhuw %ymm8, \temp1, \temp1
vpmulhrsw %ymm7, \temp1, \temp1
/* Check for wrap-around; set a1 = 0 if required */
vpcmpgtd %ymm4, \a, \temp2
vpandn \temp1, \temp2, \a1
/* Compute remainder a0 */
vpslld $10, \temp1, \temp3
vpsubd \temp1, \temp3, \temp1
vpslld $9, \temp1, \temp1
vpsubd \temp1, \a, \a
/* If wrap-around is required, a0 -= 1 */
vpaddd \temp2, \a, \a
.endm

/* Reference: The reference avx2 implementation checks a0 >= 0, which is
* different from the specification and the reference C implementation. We
* follow the specification and check a0 > 0.
*/

// a aliased with delta
.macro use_hint32_avx2 b, a, h, a1, temp1, temp2, temp3
decompose32_avx2 \a1, \a, \temp1, \temp2, \temp3

/* h = (a0 > 0) ? h : -h */
vpcmpgtd %ymm6, \a, \a
vpandn \h, \a, \a
vpslld $1, \a, \a
vpsubd \a, \h, \h

/* b = (a1 + h) % 16 */
vpaddd \a1, \h, \b
vpand %ymm3, \b, \b
.endm

.text
.balign 16
.global MLD_ASM_NAMESPACE(mld_poly_use_hint_32_avx2)
MLD_ASM_FN_SYMBOL(mld_poly_use_hint_32_avx2)

// Initialize constants
movl $127, %ecx

/* check-magic: 1025 == floor(2^22 / 4092) */
movl $1025, %r8d
vmovd %r8d, %xmm8
vpbroadcastd %xmm8, %ymm8

xorl %eax, %eax
vpxor %xmm6, %xmm6, %xmm6
vmovd %ecx, %xmm5

/* 87 * ((Q-1) / 32), wrap-around threshold */
movl $22784256, %ecx

/* round(x * 2^9 / 2^15) => round(x / 2^6), for f1 = round(f1''/ 2^6)*/
movl $512, %r9d
vmovd %r9d, %xmm7
vpbroadcastd %xmm7, %ymm7

vmovd %ecx, %xmm4
movl $15, %ecx
vpbroadcastd %xmm5, %ymm5
vmovd %ecx, %xmm3
vpbroadcastd %xmm4, %ymm4
vpbroadcastd %xmm3, %ymm3


mld_poly_use_hint_32_avx2_loop:
vmovdqa (%rsi,%rax), %ymm0
vmovdqa (%rdx,%rax), %ymm2

use_hint32_avx2 %ymm2, %ymm0, %ymm2, %ymm9, %ymm1, %ymm11, %ymm10

vmovdqa %ymm2, (%rdi,%rax)
addq $32, %rax
cmpq $1024, %rax
jne mld_poly_use_hint_32_avx2_loop
ret

/* simpasm: footer-start */

#endif /* MLD_ARITH_BACKEND_X86_64_DEFAULT && !MLD_CONFIG_MULTILEVEL_NO_SHARED \
&& (MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLD_CONFIG_PARAMETER_SET == \
65 || MLD_CONFIG_PARAMETER_SET == 87) */
102 changes: 0 additions & 102 deletions dev/x86_64/src/poly_use_hint_32_avx2.c

This file was deleted.

Loading
Loading