/* { dg-do compile { target { ! ia32 } } } */ /* { dg-options "-Ofast -mavx512vl" } */ #include __attribute__((noinline, noclone)) double f1 (double x) { register double a __asm__ ("xmm16") = __builtin_round (x); __asm__ ("" : "+v" (a)); return a; } __attribute__((noinline, noclone)) float f2 (float x) { register float a __asm__ ("xmm16") = __builtin_roundf (x); __asm__ ("" : "+v" (a)); return a; } __attribute__((noinline, noclone)) __m128d f3 (__m128d x, __m128d y) { register __m128d a __asm__ ("xmm16") = x, b __asm__ ("xmm17") = y; __asm__ ("" : "+v" (a), "+v" (b)); a = _mm_round_sd (a, b, _MM_FROUND_NINT); __asm__ ("" : "+v" (a)); return a; } __attribute__((noinline, noclone)) __m128 f4 (__m128 x, __m128 y) { register __m128 a __asm__ ("xmm16") = x, b __asm__ ("xmm17") = y; __asm__ ("" : "+v" (a), "+v" (b)); a = _mm_round_ss (a, b, _MM_FROUND_NINT); __asm__ ("" : "+v" (a)); return a; } __attribute__((noinline, noclone)) __m128d f5 (__m128d x) { register __m128d a __asm__ ("xmm16") = x; __asm__ ("" : "+v" (a)); a = _mm_round_pd (a, _MM_FROUND_NINT); __asm__ ("" : "+v" (a)); return a; } __attribute__((noinline, noclone)) __m128 f6 (__m128 x) { register __m128 a __asm__ ("xmm16") = x; __asm__ ("" : "+v" (a)); a = _mm_round_ps (a, _MM_FROUND_NINT); __asm__ ("" : "+v" (a)); return a; } __attribute__((noinline, noclone)) __m256d f7 (__m256d x) { register __m256d a __asm__ ("xmm16") = x; __asm__ ("" : "+v" (a)); a = _mm256_round_pd (a, _MM_FROUND_NINT); __asm__ ("" : "+v" (a)); return a; } __attribute__((noinline, noclone)) __m256 f8 (__m256 x) { register __m256 a __asm__ ("xmm16") = x; __asm__ ("" : "+v" (a)); a = _mm256_round_ps (a, _MM_FROUND_NINT); __asm__ ("" : "+v" (a)); return a; } /* Instead of vround{sd,ss,pd,ps} this should use vrndscale{sd,ss,pd,ps} counterparts, so that [xy]mm1[67] can be referenced directly in the instructions. */ /* { dg-final { scan-assembler-times "vrndscalesd\[^\n\r\]*xmm" 2 } } */ /* { dg-final { scan-assembler-times "vrndscaless\[^\n\r\]*xmm" 2 } } */ /* { dg-final { scan-assembler-times "vrndscalepd\[^\n\r\]*xmm" 1 } } */ /* { dg-final { scan-assembler-times "vrndscaleps\[^\n\r\]*xmm" 1 } } */ /* { dg-final { scan-assembler-times "vrndscalepd\[^\n\r\]*ymm" 1 } } */ /* { dg-final { scan-assembler-times "vrndscaleps\[^\n\r\]*ymm" 1 } } */ /* { dg-final { scan-assembler-not "vroundsd\[^\n\r\]*xmm" } } */ /* { dg-final { scan-assembler-not "vroundss\[^\n\r\]*xmm" } } */ /* { dg-final { scan-assembler-not "vroundpd\[^\n\r\]*xmm" } } */ /* { dg-final { scan-assembler-not "vroundps\[^\n\r\]*xmm" } } */ /* { dg-final { scan-assembler-not "vroundpd\[^\n\r\]*ymm" } } */ /* { dg-final { scan-assembler-not "vroundps\[^\n\r\]*ymm" } } */