Brief: Binary ANDs two vector registers.
Call signature: binary_and<Vec>(const typename Vec::register_type a, const typename Vec::register_type b) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Binary ORs two vector registers.
Call signature: binary_or<Vec>(const typename Vec::register_type a, const typename Vec::register_type b) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Binary XORs two vector registers.
Call signature: binary_xor<Vec>(const typename Vec::register_type a, const typename Vec::register_type b) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Arithmetic shift of data to the left by n bits.
Call signature: shift_left<Vec>(const typename Vec::register_type data, const unsigned int shift) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | - | + | - | - | - | - | - | - |
double | - | + | - | - | - | - | - | - |
Brief: Shifts data to left by n bits (shifting in 0).
Call signature: shift_left<Vec>(const typename Vec::register_type data, const typename Vec::register_type shift) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Arithmetic shift of data to the right by n bits.
Call signature: shift_right<Vec>(const typename Vec::register_type data, const unsigned int shift) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Arithmetic shift of data to the right by n bits.
Call signature: shift_right<Vec>(const typename Vec::register_type data, const typename Vec::register_type shift) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Arithmetic shift of data to the right by n bits.
Call signature: shift_right<Vec>(const typename Vec::imask_type data, const unsigned int shift) -> typename Vec::imask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | + | - | - | + | + | + | - | - |
double | + | - | - | + | + | + | - | - |
Brief: Population counter.
Call signature: popcnt<Vec>(const typename Vec::register_type data) -> typename Vec::offset_base_register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: todo.
Call signature: lzc<Vec>(const typename Vec::base_type data) -> typename Vec::offset_base_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Leading zeros counter.
Call signature: lzc<Vec>(const typename Vec::register_type data) -> typename Vec::offset_base_register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | + | - |
int8_t | + | - | - | + | + | + | + | - |
uint16_t | + | - | - | + | + | + | + | - |
int16_t | + | - | - | + | + | + | + | - |
uint32_t | + | - | - | + | + | + | + | + |
int32_t | + | - | - | + | + | + | + | + |
uint64_t | + | - | - | + | + | + | + | - |
int64_t | + | - | - | + | + | + | + | - |
float | - | - | - | - | - | - | + | + |
double | - | - | - | - | - | - | + | - |
Brief: Leading zeros counter.
Call signature: lzc_alt<Vec>(const typename Vec::register_type data) -> typename Vec::offset_base_register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | - | - | + | - |
int8_t | - | - | - | - | - | - | + | - |
uint16_t | - | - | - | - | - | - | + | - |
int16_t | - | - | - | - | - | - | + | - |
uint32_t | - | - | - | - | - | - | + | - |
int32_t | - | - | - | - | - | - | + | - |
uint64_t | - | - | - | - | - | - | + | - |
int64_t | - | - | - | - | - | - | + | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Leading zeros counter.
Call signature: lzc_alt1<Vec>(const typename Vec::register_type data) -> typename Vec::offset_base_register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | - | - | + | - |
int8_t | - | - | - | - | - | - | + | - |
uint16_t | - | - | - | - | - | - | + | - |
int16_t | - | - | - | - | - | - | + | - |
uint32_t | - | - | - | - | - | - | + | - |
int32_t | - | - | - | - | - | - | + | - |
uint64_t | - | - | - | - | - | - | + | - |
int64_t | - | - | - | - | - | - | + | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Leading zeros counter.
Call signature: lzc_alt2<Vec>(const typename Vec::register_type data) -> typename Vec::offset_base_register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | - | - | + | - |
int8_t | - | - | - | - | - | - | + | - |
uint16_t | - | - | - | - | - | - | + | - |
int16_t | - | - | - | - | - | - | + | - |
uint32_t | - | - | - | - | - | - | + | - |
int32_t | - | - | - | - | - | - | + | - |
uint64_t | - | - | - | - | - | - | + | - |
int64_t | - | - | - | - | - | - | + | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Leading zeros counter.
Call signature: lzc_alt3<Vec>(const typename Vec::register_type data) -> typename Vec::offset_base_register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | - | - | + | - |
int8_t | - | - | - | - | - | - | + | - |
uint16_t | - | - | - | - | - | - | + | - |
int16_t | - | - | - | - | - | - | + | - |
uint32_t | - | - | - | - | - | - | + | - |
int32_t | - | - | - | - | - | - | + | - |
uint64_t | - | - | - | - | - | - | + | - |
int64_t | - | - | - | - | - | - | + | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: todo.
Call signature: lzc<Vec>(const typename Vec::imask_type data) -> typename Vec::imask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: todo.
Call signature: tzc<Vec>(const typename Vec::imask_type data) -> typename Vec::imask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Operates horizontal OR on vector register
Call signature: hor<Vec>(const typename Vec::register_type vec) -> typename Vec::base_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | + | + |
int8_t | + | - | - | + | + | + | + | + |
uint16_t | + | - | - | + | + | + | + | + |
int16_t | + | - | - | + | + | + | + | + |
uint32_t | + | - | - | + | + | + | + | + |
int32_t | + | - | - | + | + | + | + | + |
uint64_t | + | - | - | + | + | + | + | + |
int64_t | + | - | - | + | + | + | + | + |
float | + | - | - | + | + | + | + | + |
double | + | - | - | + | + | + | + | + |
Brief: Bitwise invertion values in vector Register.
Call signature: inv<Vec>(const typename Vec::register_type vec) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | + | + |
int8_t | + | - | - | + | + | + | + | + |
uint16_t | + | - | - | + | + | + | + | + |
int16_t | + | - | - | + | + | + | + | + |
uint32_t | + | - | - | + | + | + | + | + |
int32_t | + | - | - | + | + | + | + | + |
uint64_t | + | - | - | + | + | + | + | + |
int64_t | + | - | - | + | + | + | + | + |
float | + | - | - | + | + | + | - | - |
double | + | - | - | + | + | + | - | - |
Brief: Adds two vector registers.
Call signature: add<Vec>(const typename Vec::register_type vec_a, const typename Vec::register_type vec_b) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | + | + | + | + | + | + |
int8_t | + | + | + | + | + | + | + | + |
uint16_t | + | + | + | + | + | + | + | + |
int16_t | + | + | + | + | + | + | + | + |
uint32_t | + | + | + | + | + | + | + | + |
int32_t | + | + | + | + | + | + | + | + |
uint64_t | + | + | + | + | + | + | + | + |
int64_t | + | + | + | + | + | + | + | + |
float | + | + | + | + | + | + | + | + |
double | + | + | + | + | + | + | + | + |
Brief: Subtracts two vector registers.
Call signature: sub<Vec>(const typename Vec::register_type vec_a, const typename Vec::register_type vec_b) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | + | + | + | + | + | + |
int8_t | + | + | + | + | + | + | + | + |
uint16_t | + | + | + | + | + | + | + | + |
int16_t | + | + | + | + | + | + | + | + |
uint32_t | + | + | + | + | + | + | + | + |
int32_t | + | + | + | + | + | + | + | + |
uint64_t | + | + | + | + | + | + | + | + |
int64_t | + | + | + | + | + | + | + | + |
float | + | + | + | + | + | + | + | + |
double | + | + | + | + | + | + | + | + |
Brief: Adds two vector registers, depending on a mask: result[*] = (m[*])? vec_a[*]+vec_b[*] : vec_a[*].
Call signature: add<Vec>(const typename Vec::mask_type mask, const typename Vec::register_type vec_a, const typename Vec::register_type vec_b) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Adds two vector registers, depending on a mask: result[*] = (m[*])? vec_a[*]+vec_b[*] : vec_a[*].
Call signature: add<Vec>(const typename Vec::imask_type mask, const typename Vec::register_type vec_a, const typename Vec::register_type vec_b) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Subtracts two vector registers, depending on a mask: result[*] = (m[*])? vec_a[*]-vec_b[*] : vec_a[*].
Call signature: sub<Vec>(const typename Vec::mask_type mask, const typename Vec::register_type vec_a, const typename Vec::register_type vec_b) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | + | - | - | + | + | + | - | - |
double | + | - | - | + | + | + | - | - |
Brief: Subtracts two vector registers, depending on a mask: result[*] = (m[*])? vec_a[*]-vec_b[*] : vec_a[*].
Call signature: sub<Vec>(const typename Vec::imask_type mask, const typename Vec::register_type vec_a, const typename Vec::register_type vec_b) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | + | - | - | + | + | + | - | - |
double | + | - | - | + | + | + | - | - |
Brief: Multiplies two vector registers.
Call signature: mul<Vec>(const typename Vec::register_type vec_a, const typename Vec::register_type vec_b) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: Multiplies a vector register with a constant.
Call signature: mul<Vec>(const typename Vec::register_type vec_a, const typename Vec::base_type mul_var) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | + | + |
int8_t | + | - | - | + | + | + | + | + |
uint16_t | + | - | - | + | + | + | + | + |
int16_t | + | - | - | + | + | + | + | + |
uint32_t | + | - | - | + | + | + | + | + |
int32_t | + | - | - | + | + | + | + | + |
uint64_t | + | - | - | + | + | + | + | + |
int64_t | + | - | - | + | + | + | + | + |
float | + | - | - | + | + | + | + | + |
double | + | - | - | + | + | + | + | + |
Brief: Reduces the elements to a sum.
Call signature: hadd<Vec>(const typename Vec::register_type value) -> typename Vec::base_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: compares the values of 2 vectors and returns a vector with the minimum of each corrisponding values
Call signature: min<Vec>(const typename Vec::register_type vec_a, const typename Vec::register_type vec_b) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | - | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | - | - | + | + | + | + | + |
int64_t | + | - | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: Divides two vector registers.
Call signature: div<Vec>(const typename Vec::register_type vec_a, const typename Vec::register_type vec_b) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: Operates the modulo operation on one datavector modulo another data vector.
Call signature: mod<Vec>(const typename Vec::register_type vec_data, const typename Vec::register_type vec_mod) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | + | + |
int8_t | + | - | - | + | + | + | + | + |
uint16_t | + | - | - | + | + | + | + | + |
int16_t | + | - | - | + | + | + | + | + |
uint32_t | + | - | - | + | + | + | + | + |
int32_t | + | - | - | + | + | + | + | + |
uint64_t | + | - | - | + | + | + | + | + |
int64_t | + | - | - | + | + | + | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Operates the modulo operation on one datavector modulo one input value.
Call signature: mod<Vec>(const typename Vec::register_type vec, const typename Vec::base_type val) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | + | + |
int8_t | + | - | - | + | + | + | + | + |
uint16_t | + | - | - | + | + | + | + | + |
int16_t | + | - | - | + | + | + | + | + |
uint32_t | + | - | - | + | + | + | + | + |
int32_t | + | - | - | + | + | + | + | + |
uint64_t | + | - | - | + | + | + | + | + |
int64_t | + | - | - | + | + | + | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Operates the modulo operation on one datavector modulo one input value.
Call signature: mod_safe<Vec>(const typename Vec::register_type vec, const typename Vec::base_type val) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Reduces the elements to the maximum value.
Call signature: hmax<Vec>(const typename Vec::register_type data) -> typename Vec::base_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | + | + |
int8_t | + | - | - | + | + | + | + | + |
uint16_t | + | - | - | + | + | + | + | + |
int16_t | + | - | - | + | + | + | + | + |
uint32_t | + | - | - | + | + | + | + | + |
int32_t | + | - | - | + | + | + | + | + |
uint64_t | + | - | - | + | + | + | + | + |
int64_t | + | - | - | + | + | + | + | + |
float | + | - | - | + | + | + | + | + |
double | + | - | - | + | + | + | + | + |
Brief: Reduces the elements to the minimum value.
Call signature: hmin<Vec>(const typename Vec::register_type data) -> typename Vec::base_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | + | + |
int8_t | + | - | - | + | + | + | + | + |
uint16_t | + | - | - | + | + | + | + | + |
int16_t | + | - | - | + | + | + | + | + |
uint32_t | + | - | - | + | + | + | + | + |
int32_t | + | - | - | + | + | + | + | + |
uint64_t | + | - | - | + | + | + | + | + |
int64_t | + | - | - | + | + | + | + | + |
float | + | - | - | + | + | + | + | + |
double | + | - | - | + | + | + | + | + |
Brief: compares the values of 2 vectors and returns a vector with the maximum of each corrisponding values
Call signature: max<Vec>(const typename Vec::register_type vec_a, const typename Vec::register_type vec_b) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | + | - | - | + | + | + | - | - |
double | + | - | - | + | + | + | - | - |
Brief: Compares two vector registers for equality.
Call signature: equal<Vec>(const typename Vec::register_type vec_a, const typename Vec::register_type vec_b) -> typename Vec::mask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: Compares two vector registers for equality.
Call signature: equal<Vec>(const typename Vec::mask_type mask, const typename Vec::register_type vec_a, const typename Vec::register_type vec_b) -> typename Vec::mask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | + | - | - | + | + | + | - | - |
double | + | - | - | + | + | + | - | - |
Brief: Compares two imasks for equality.
Call signature: equal<Vec>(const typename Vec::imask_type mask_a, const typename Vec::imask_type mask_b) -> bool
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | + | - | - | + | + | + | - | - |
double | + | - | - | + | + | + | - | - |
Brief: Compares two registers for equality and returns an integral mask.
Call signature: equal_as_imask<Vec>(const typename Vec::register_type vec_a, const typename Vec::register_type vec_b) -> typename Vec::imask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | + | - | - | + | + | + | - | - |
double | + | - | - | + | + | + | - | - |
Brief: Compares two registers for equality and returns an integral mask.
Call signature: equal_as_imask<Vec>(const typename Vec::imask_type mask, const typename Vec::register_type vec_a, const typename Vec::register_type vec_b) -> typename Vec::imask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | + | - | - | + | + | + | - | - |
double | + | - | - | + | + | + | - | - |
Brief: Compares two vector registers for inequality.
Call signature: nequal<Vec>(const typename Vec::register_type vec_a, const typename Vec::register_type vec_b) -> typename Vec::mask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | - | + | + | + | + |
double | + | + | - | - | + | + | + | + |
Brief: Compares two vector registers for inequality.
Call signature: nequal<Vec>(const typename Vec::mask_type mask, const typename Vec::register_type vec_a, const typename Vec::register_type vec_b) -> typename Vec::mask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | + | - | - | + | + | + | - | - |
double | + | - | - | + | + | + | - | - |
Brief: Compares two imasks for non-equality.
Call signature: nequal<Vec>(const typename Vec::imask_type mask_a, const typename Vec::imask_type mask_b) -> bool
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | + | - | - | + | + | + | - | - |
double | + | - | - | + | + | + | - | - |
Brief: Compares two registers for not-equality and returns an integral mask.
Call signature: nequal_as_imask<Vec>(const typename Vec::register_type vec_a, const typename Vec::register_type vec_b) -> typename Vec::imask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | + | - | - | + | + | + | - | - |
double | + | - | - | + | + | + | - | - |
Brief: Compares two registers for not-equality and returns an integral mask.
Call signature: nequal_as_imask<Vec>(const typename Vec::imask_type mask, const typename Vec::register_type vec_a, const typename Vec::register_type vec_b) -> typename Vec::imask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | + | - | - | + | + | + | - | - |
double | + | - | - | + | + | + | - | - |
Brief: Tests whether left elements are smaller than the corresponding right ones.
Call signature: less_than<Vec>(const typename Vec::register_type vec_a, const typename Vec::register_type vec_b) -> typename Vec::mask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: Tests whether left elements are larger than or equal to the corresponding right ones.
Call signature: greater_than<Vec>(const typename Vec::register_type vec_a, const typename Vec::register_type vec_b) -> typename Vec::mask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: Tests whether left elements are smaller than or equal to the corresponding right ones.
Call signature: less_than_or_equal<Vec>(const typename Vec::register_type vec_a, const typename Vec::register_type vec_b) -> typename Vec::mask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: Tests whether left elements are larger than the corresponding right ones.
Call signature: greater_than_or_equal<Vec>(const typename Vec::register_type vec_a, const typename Vec::register_type vec_b) -> typename Vec::mask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: Checks if the values of a vector are in a specific range (min[*] <= d[*] <= max[*]).
Call signature: between_inclusive<Vec>(const typename Vec::register_type vec_data, const typename Vec::register_type vec_min, typename Vec::register_type vec_max) -> typename Vec::mask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: Checks if the vector register contains at least one value unequal zero.
Call signature: unequal_zero<Vec>(const typename Vec::register_type vec) -> bool
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | + | - | - | + | + | + | - | - |
double | + | - | - | + | + | + | - | - |
Brief: Counts number of matches of a chosen value within a vector register.
Call signature: count_matches<Vec>(const typename Vec::register_type vec, const typename Vec::base_type val) -> typename Vec::base_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | + | - | - | + | + | + | - | - |
double | + | - | - | + | + | + | - | - |
Brief: Unpacks a coherent AC_INT.
Call signature: unpack_acint<Vec>(ac_int<Vec::vector_size_b(), false> data) -> ac_int<Vec::vector_size_b(), false>
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | - | - | - | - |
int8_t | - | - | - | - | - | - | - | - |
uint16_t | - | - | - | - | - | - | - | - |
int16_t | - | - | - | - | - | - | - | - |
uint32_t | - | - | - | - | - | - | + | + |
int32_t | - | - | - | - | - | - | + | + |
uint64_t | - | - | - | - | - | - | + | + |
int64_t | - | - | - | - | - | - | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief:
Call signature: unpack_merge_acint<Vec>(ac_int<Vec::vector_size_b(), false> source, ac_int<Vec::vector_size_b(), false> const data) -> ac_int<Vec::vector_size_b(), false>
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | - | - | - | - |
int8_t | - | - | - | - | - | - | - | - |
uint16_t | - | - | - | - | - | - | - | - |
int16_t | - | - | - | - | - | - | - | - |
uint32_t | - | - | - | - | - | - | + | + |
int32_t | - | - | - | - | - | - | + | + |
uint64_t | - | - | - | - | - | - | + | + |
int64_t | - | - | - | - | - | - | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Loads consecutive data from memory into a coherent AC_INT.
Call signature: load_acint<Vec>(const typename Vec::base_type* memory) -> ac_int<Vec::vector_size_b(), false>
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | - | - | - | - |
int8_t | - | - | - | - | - | - | - | - |
uint16_t | - | - | - | - | - | - | - | - |
int16_t | - | - | - | - | - | - | - | - |
uint32_t | - | - | - | - | - | - | + | + |
int32_t | - | - | - | - | - | - | + | + |
uint64_t | - | - | - | - | - | - | + | + |
int64_t | - | - | - | - | - | - | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: todo.
Call signature: append_acint<Vec>(ac_int<Vec::vector_size_b(), false> source, ac_int<Vec::vector_size_b(), false> const data) -> ac_int<Vec::vector_size_b(), false>
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | - | - | - | - |
int8_t | - | - | - | - | - | - | - | - |
uint16_t | - | - | - | - | - | - | - | - |
int16_t | - | - | - | - | - | - | - | - |
uint32_t | - | - | - | - | - | - | + | + |
int32_t | - | - | - | - | - | - | + | + |
uint64_t | - | - | - | - | - | - | + | + |
int64_t | - | - | - | - | - | - | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Loads data from memory and packs n-bits from all elements within a vector together.
Call signature: packed_load_acint<Vec>(const typename Vec::base_type* memory) -> ac_int<Vec::vector_size_b(), false>
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | - | - | - | - |
int8_t | - | - | - | - | - | - | - | - |
uint16_t | - | - | - | - | - | - | - | - |
int16_t | - | - | - | - | - | - | - | - |
uint32_t | - | - | - | - | - | - | + | + |
int32_t | - | - | - | - | - | - | + | + |
uint64_t | - | - | - | - | - | - | + | + |
int64_t | - | - | - | - | - | - | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: stores N bit of data. N must be a multiple of sizeof(Vec::base_type).
Call signature: packed_store_acint<Vec>(typename Vec::base_type* memory, ac_int<Vec::vector_size_b(), false> data) -> void
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | - | - | - | - |
int8_t | - | - | - | - | - | - | - | - |
uint16_t | - | - | - | - | - | - | - | - |
int16_t | - | - | - | - | - | - | - | - |
uint32_t | - | - | - | - | - | - | + | + |
int32_t | - | - | - | - | - | - | + | + |
uint64_t | - | - | - | - | - | - | + | + |
int64_t | - | - | - | - | - | - | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief:
Call signature: store_acint<Vec>(typename Vec::base_type* memory, ac_int<Vec::vector_size_b(), false> data) -> void
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | - | - | - | - |
int8_t | - | - | - | - | - | - | - | - |
uint16_t | - | - | - | - | - | - | - | - |
int16_t | - | - | - | - | - | - | - | - |
uint32_t | - | - | - | - | - | - | + | + |
int32_t | - | - | - | - | - | - | + | + |
uint64_t | - | - | - | - | - | - | + | + |
int64_t | - | - | - | - | - | - | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief:
Call signature: packed_shift_left_acint<Vec>(ac_int<Vec::vector_size_b(), false> data, int shift_value) -> ac_int<Vec::vector_size_b(), false>
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | - | - | - | - |
int8_t | - | - | - | - | - | - | - | - |
uint16_t | - | - | - | - | - | - | - | - |
int16_t | - | - | - | - | - | - | - | - |
uint32_t | - | - | - | - | - | - | + | + |
int32_t | - | - | - | - | - | - | + | + |
uint64_t | - | - | - | - | - | - | + | + |
int64_t | - | - | - | - | - | - | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief:
Call signature: slice_packed_shift_left_acint_by_N<Vec>(ac_int<Vec::vector_size_b(), false> data) -> ac_int<Vec::vector_size_b(), false>
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | - | - | - | - |
int8_t | - | - | - | - | - | - | - | - |
uint16_t | - | - | - | - | - | - | - | - |
int16_t | - | - | - | - | - | - | - | - |
uint32_t | - | - | - | - | - | - | + | + |
int32_t | - | - | - | - | - | - | + | + |
uint64_t | - | - | - | - | - | - | + | + |
int64_t | - | - | - | - | - | - | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief:
Call signature: packed_shift_right_acint<Vec>(ac_int<Vec::vector_size_b(), false> data, int shift_value) -> ac_int<Vec::vector_size_b(), false>
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | - | - | - | - |
int8_t | - | - | - | - | - | - | - | - |
uint16_t | - | - | - | - | - | - | - | - |
int16_t | - | - | - | - | - | - | - | - |
uint32_t | - | - | - | - | - | - | + | + |
int32_t | - | - | - | - | - | - | + | + |
uint64_t | - | - | - | - | - | - | + | + |
int64_t | - | - | - | - | - | - | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief:
Call signature: slice_packed_shift_right_acint_by_N<Vec>(ac_int<Vec::vector_size_b(), false> data) -> ac_int<Vec::vector_size_b(), false>
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | - | - | - | - |
int8_t | - | - | - | - | - | - | - | - |
uint16_t | - | - | - | - | - | - | - | - |
int16_t | - | - | - | - | - | - | - | - |
uint32_t | - | - | - | - | - | - | + | + |
int32_t | - | - | - | - | - | - | + | + |
uint64_t | - | - | - | - | - | - | + | + |
int64_t | - | - | - | - | - | - | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief:
Call signature: packed_or_acint<Vec>(ac_int<Vec::vector_size_b(), false> a, ac_int<Vec::vector_size_b(), false> b) -> ac_int<Vec::vector_size_b(), false>
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | - | - | - | - |
int8_t | - | - | - | - | - | - | - | - |
uint16_t | - | - | - | - | - | - | - | - |
int16_t | - | - | - | - | - | - | - | - |
uint32_t | - | - | - | - | - | - | + | + |
int32_t | - | - | - | - | - | - | + | + |
uint64_t | - | - | - | - | - | - | + | + |
int64_t | - | - | - | - | - | - | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Packs elements from a vector together using a fixed bitwidth.
Call signature: pack_bits_linear<Vec>(const typename Vec::register_type data, const unsigned bitwidth) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | - | - | + | + |
int8_t | - | - | - | - | - | - | + | + |
uint16_t | - | - | - | - | - | - | + | + |
int16_t | - | - | - | - | - | - | + | + |
uint32_t | - | - | - | - | - | - | + | + |
int32_t | - | - | - | - | - | - | + | + |
uint64_t | - | - | - | - | - | - | + | + |
int64_t | - | - | - | - | - | - | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Packs elements from a vector together using a fixed bitwidth.
Call signature: pack_bits_treelike<Vec>(const typename Vec::register_type data, const unsigned bitwidth) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | - | - | + | + |
int8_t | - | - | - | - | - | - | + | + |
uint16_t | - | - | - | - | - | - | + | + |
int16_t | - | - | - | - | - | - | + | + |
uint32_t | - | - | - | - | - | - | + | + |
int32_t | - | - | - | - | - | - | + | + |
uint64_t | - | - | - | - | - | - | + | + |
int64_t | - | - | - | - | - | - | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief:
Call signature: pack_bits_linear_merge<Vec>(const typename Vec::register_type src, const unsigned bit_offset, const typename Vec::register_type data, const unsigned bitwidth) -> std::tuple<typename Vec::register_type, int, typename Vec::register_type>
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | - | - | + | + |
int8_t | - | - | - | - | - | - | + | + |
uint16_t | - | - | - | - | - | - | + | + |
int16_t | - | - | - | - | - | - | + | + |
uint32_t | - | - | - | - | - | - | + | + |
int32_t | - | - | - | - | - | - | + | + |
uint64_t | - | - | - | - | - | - | + | + |
int64_t | - | - | - | - | - | - | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief:
Call signature: pack_bits_treelike_merge<Vec>(const typename Vec::register_type src, const unsigned bit_offset, const typename Vec::register_type data, const unsigned bitwidth) -> std::tuple<typename Vec::register_type, int, typename Vec::register_type>
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | - | - | + | + |
int8_t | - | - | - | - | - | - | + | + |
uint16_t | - | - | - | - | - | - | + | + |
int16_t | - | - | - | - | - | - | + | + |
uint32_t | - | - | - | - | - | - | + | + |
int32_t | - | - | - | - | - | - | + | + |
uint64_t | - | - | - | - | - | - | + | + |
int64_t | - | - | - | - | - | - | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: todo.
Call signature: reinterpret<Vec, ToType>(const typename Vec::register_type data) -> typename ToType::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: todo.
Call signature: cast<Vec, ToType>(const typename Vec::register_type data) -> typename ToType::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | + | + | - | - | - |
int8_t | - | - | - | + | + | - | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: todo.
Call signature: split<Vec, ToType>(const typename Vec::register_type data) -> std::array<typename ToType::register_type, sizeof(typename ToType::base_type)/sizeof(typename Vec::base_type) * Vec::vector_element_count() / ToType::vector_element_count()>
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | - | - | - | - |
int8_t | - | - | - | - | - | - | - | - |
uint16_t | - | - | - | - | - | - | - | - |
int16_t | - | - | - | - | - | - | - | - |
uint32_t | - | - | - | - | - | + | - | - |
int32_t | - | - | - | - | - | + | - | - |
uint64_t | - | - | - | - | - | - | - | - |
int64_t | - | - | - | - | - | - | - | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: todo.
Call signature: merge<Vec, ToType>(std::array<typename Vec::register_type, sizeof(typename Vec::base_type)/sizeof(typename ToType::base_type) * ToType::vector_element_count() / Vec::vector_element_count()> data) -> typename ToType::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | - | - | - | - |
int8_t | - | - | - | - | - | - | - | - |
uint16_t | - | - | - | - | - | - | - | - |
int16_t | - | - | - | - | - | - | - | - |
uint32_t | - | - | - | - | + | - | - | - |
int32_t | - | - | - | - | + | - | - | - |
uint64_t | - | - | - | - | - | - | - | - |
int64_t | - | - | - | - | - | - | - | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Converts a SIMD register to an array of SIMD registers with a larger base type.
Call signature: convert_up<Vec, ToType>(const typename Vec::register_type data) -> std::array<typename ToType::register_type, Vec::vector_element_count() / ToType::vector_element_count()>
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | - | - | - |
int8_t | + | + | - | + | + | - | - | - |
uint16_t | + | + | - | + | + | - | - | - |
int16_t | + | + | - | + | + | - | - | - |
uint32_t | + | + | - | + | + | - | - | - |
int32_t | + | + | - | + | + | - | - | - |
uint64_t | - | - | - | - | + | - | - | - |
int64_t | - | - | - | - | + | - | - | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: todo.
Call signature: convert_down<Vec, ToType>(std::array<typename Vec::register_type, sizeof(typename Vec::base_type)/sizeof(typename ToType::base_type)> data) -> typename ToType::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | - | - | - | - |
int8_t | - | - | - | - | - | - | - | - |
uint16_t | - | + | - | - | - | - | - | - |
int16_t | - | + | - | - | - | - | - | - |
uint32_t | - | + | - | - | + | - | - | - |
int32_t | - | + | - | - | + | - | - | - |
uint64_t | - | + | - | - | + | - | - | - |
int64_t | - | + | - | - | + | - | - | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Loads data from aligned memory into a vector register.
Call signature: to_ostream<Vec>(std::ostream & out, typename Vec::register_type const data, modifier ostream_modifier) -> std::ostream &
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | + | + | + | - | - |
int8_t | - | - | - | + | + | + | - | - |
uint16_t | - | - | - | + | + | + | - | - |
int16_t | - | - | - | + | + | + | - | - |
uint32_t | - | - | - | + | + | + | - | - |
int32_t | - | - | - | + | + | + | - | - |
uint64_t | - | - | - | + | + | + | - | - |
int64_t | - | + | - | + | + | + | - | - |
float | - | - | - | + | + | + | - | - |
double | - | - | - | + | + | + | - | - |
Brief: Loads data from aligned memory into a vector register.
Call signature: load<Vec>(const typename Vec::base_type* memory) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: Loads data from (un)aligned memory into a vector register.
Call signature: loadu<Vec>(const typename Vec::base_type* memory) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: Stores data from a vector register to aligned memory.
Call signature: store<Vec>(typename Vec::base_type* memory, const typename Vec::register_type data) -> void
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: Stores data from a vector register to (un)aligned memory.
Call signature: storeu<Vec>(typename Vec::base_type* memory, const typename Vec::register_type data) -> void
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: Stores SIMD register to array.
Call signature: to_array<Vec>(const typename Vec::register_type data) -> __attribute__((__aligned__(Vec::vector_alignment()))) std::array<typename Vec::base_type, Vec::vector_element_count()>
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Broadcasts a single value into all lanes of a vector register.
Call signature: set1<Vec>(const typename Vec::base_type value) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: Set all lanes to zero.
Call signature: set_zero<Vec>() -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | + | + | - | + | + | + | + | + |
double | + | + | - | + | + | + | + | + |
Brief: Transfers provided elements into a vector register. This implementation is currently (errorneous) in the reverse order.
Call signature: set<Vec>(Ts args) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Creates a sequence [0..SIMD-Reg-Element-Count].
Call signature: sequence<Vec>() -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | + | - | - | + | + | + | - | - |
double | + | - | - | + | + | + | - | - |
Brief: Creates a sequence.
Call signature: custom_sequence<Vec>(typename Vec::base_type start, typename Vec::base_type stepwidth) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | + | - | - | + | + | + | - | - |
double | + | - | - | + | + | + | - | - |
Brief: Transfers data from arbitrary locations into a vector register.
Call signature: gather<Vec, IndicesType>(const void* memory, const typename IndicesType::register_type index, std::integral_constant<int, N> scale) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Transfers data from a vector register to an arbitrary locations.
Call signature: scatter<Vec>(const typename Vec::register_type data, void* memory, const typename Vec::offset_base_register_type index, std::integral_constant<int, N> scale) -> void
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | - | - | - |
int8_t | + | + | - | + | + | - | - | - |
uint16_t | + | + | - | + | + | - | - | - |
int16_t | + | + | - | + | + | - | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Stores elements from data consecutively, if the corresponding bit in mask is set to 1.
Call signature: compress_store<Vec>(const typename Vec::imask_type mask, typename Vec::base_type* memory, const typename Vec::register_type data) -> void
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Loads contiguos data from a specified memory location and puts the elements using write mask.
Call signature: expand_load<Vec>(const typename Vec::imask_type mask, const typename Vec::register_type src, typename Vec::base_type* memory) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | + | + | - | - |
int8_t | - | - | - | - | + | + | - | - |
uint16_t | - | - | - | - | + | + | - | - |
int16_t | - | - | - | - | + | + | - | - |
uint32_t | - | - | - | - | + | + | - | - |
int32_t | - | - | - | - | + | + | - | - |
uint64_t | - | - | - | - | + | + | - | - |
int64_t | - | - | - | - | + | + | - | - |
float | - | - | - | - | + | + | - | - |
double | - | - | - | - | + | + | - | - |
Brief: todo.
Call signature: load_convert_up<Vec, ToType>(typename Vec::base_type const * memory) -> typename ToType::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | + | - | - | - |
int8_t | - | - | - | - | + | - | - | - |
uint16_t | - | - | - | - | + | - | - | - |
int16_t | - | - | - | - | + | - | - | - |
uint32_t | - | - | - | - | + | - | - | - |
int32_t | - | - | - | - | + | - | - | - |
uint64_t | - | - | - | - | - | - | - | - |
int64_t | - | - | - | - | - | - | - | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Extracts value on given index.
Call signature: extract_value<Vec>(const typename Vec::register_type data) -> typename Vec::base_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | + | - | - | + | + | + | - | - |
double | + | - | - | + | + | + | - | - |
Brief: Forms a mask type from an integral.
Call signature: to_mask<Vec>(const typename Vec::imask_type mask) -> typename Vec::mask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Binary NOT of a vector mask type.
Call signature: mask_binary_not<Vec>(const typename Vec::mask_type mask) -> typename Vec::mask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | + | + | - | - |
int8_t | - | - | - | - | + | + | - | - |
uint16_t | - | - | - | - | + | + | - | - |
int16_t | - | - | - | - | + | + | - | - |
uint32_t | - | - | - | - | + | + | - | - |
int32_t | - | - | - | - | + | + | - | - |
uint64_t | - | - | - | - | + | + | - | - |
int64_t | - | - | - | - | + | + | - | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Binary NOT of a vector integral mask type.
Call signature: mask_binary_not<Vec>(const typename Vec::imask_type mask) -> typename Vec::imask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | + | - | - | + | + | + | - | - |
double | + | - | - | + | + | + | - | - |
Brief: Binary AND of two vector mask types.
Call signature: mask_binary_and<Vec>(const typename Vec::mask_type first, const typename Vec::mask_type second) -> typename Vec::mask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Binary AND of two vector integral mask types.
Call signature: mask_binary_and<Vec>(const typename Vec::imask_type first, const typename Vec::imask_type second) -> typename Vec::imask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Binary OR of two vector mask types.
Call signature: mask_binary_or<Vec>(const typename Vec::mask_type first, const typename Vec::mask_type second) -> typename Vec::mask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | + | - | + | + | + | - | - |
int8_t | - | + | - | + | + | + | - | - |
uint16_t | - | + | - | + | + | + | - | - |
int16_t | - | + | - | + | + | + | - | - |
uint32_t | - | + | - | + | + | + | - | - |
int32_t | - | + | - | + | + | + | - | - |
uint64_t | - | + | - | + | + | + | - | - |
int64_t | - | + | - | + | + | + | - | - |
float | - | + | - | + | + | + | - | - |
double | - | + | - | + | + | + | - | - |
Brief: Binary OR of two vector integral mask types.
Call signature: mask_binary_or<Vec>(const typename Vec::imask_type first, const typename Vec::imask_type second) -> typename Vec::imask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | + | - | + | + | - | - | - |
int8_t | - | + | - | + | + | - | - | - |
uint16_t | - | + | - | + | + | - | - | - |
int16_t | - | + | - | + | + | - | - | - |
uint32_t | - | + | - | + | + | - | - | - |
int32_t | - | + | - | + | + | - | - | - |
uint64_t | - | + | - | + | + | - | - | - |
int64_t | - | + | - | + | + | - | - | - |
float | - | + | - | + | + | - | - | - |
double | - | + | - | + | + | - | - | - |
Brief: Binary XOR of two vector mask types.
Call signature: mask_binary_xor<Vec>(const typename Vec::mask_type first, const typename Vec::mask_type second) -> typename Vec::mask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | + | - | + | + | + | - | - |
int8_t | - | + | - | + | + | + | - | - |
uint16_t | - | + | - | + | + | + | - | - |
int16_t | - | + | - | + | + | + | - | - |
uint32_t | - | + | - | + | + | + | - | - |
int32_t | - | + | - | + | + | + | - | - |
uint64_t | - | + | - | + | + | + | - | - |
int64_t | - | + | - | + | + | + | - | - |
float | - | + | - | + | + | + | - | - |
double | - | + | - | + | + | + | - | - |
Brief: Binary XOR of two vector integral mask types.
Call signature: mask_binary_xor<Vec>(const typename Vec::imask_type first, const typename Vec::imask_type second) -> typename Vec::imask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | + | - | + | + | - | - | - |
int8_t | - | + | - | + | + | - | - | - |
uint16_t | - | + | - | + | + | - | - | - |
int16_t | - | + | - | + | + | - | - | - |
uint32_t | - | + | - | + | + | - | - | - |
int32_t | - | + | - | + | + | - | - | - |
uint64_t | - | + | - | + | + | - | - | - |
int64_t | - | + | - | + | + | - | - | - |
float | - | + | - | + | + | - | - | - |
double | - | + | - | + | + | - | - | - |
Brief: todo.
Call signature: mask_population_count<Vec>(const typename Vec::imask_type mask) -> unsigned int
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: todo.
Call signature: integral_all_true<Vec>() -> typename Vec::imask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: todo.
Call signature: integral_all_false<Vec>() -> typename Vec::imask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Tests whether a specific bit is set to 1.
Call signature: test_mask<Vec>(typename Vec::imask_type mask, int position) -> bool
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Merges two masks. result[0:i-1] = mask_a[0:i-1]; result[i:N-1] = mask_b[0:N-1] where N is the number of effective bits in mask.
Call signature: insert_mask<Vec>(typename Vec::imask_type mask_a, typename Vec::imask_type mask_b, int position) -> typename Vec::imask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Extracts one mask from another.
Call signature: extract_mask<Vec>(typename Vec::imask_type mask, int position) -> typename Vec::imask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | + | - | - | + | + | + | - | - |
double | + | - | - | + | + | + | - | - |
Brief: Forms an integral value from the most significant bits of every lane in a vector mask register.
Call signature: to_integral<Vec>(const typename Vec::mask_type vec_mask) -> typename Vec::imask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Forms an vector register from an integral where all bits are set in a lane if the corresponding mask bit is set to 1.
Call signature: to_vector<Vec>(const typename Vec::mask_type mask) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | + | + | + | + | + |
int8_t | - | - | - | + | + | + | + | + |
uint16_t | - | - | - | + | + | + | + | + |
int16_t | - | - | - | + | + | + | + | + |
uint32_t | - | - | - | + | + | + | + | + |
int32_t | - | - | - | + | + | + | + | + |
uint64_t | - | - | - | + | + | + | + | + |
int64_t | - | + | - | + | + | + | + | + |
float | - | - | - | + | + | + | + | + |
double | - | - | - | + | + | + | + | + |
Brief: Stores data from a vector register to (un)aligned memory.
Call signature: storeu<Vec>(const typename Vec::mask_type mask, typename Vec::base_type* memory, const typename Vec::register_type data) -> void
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Stores data from a vector register to (un)aligned memory.
Call signature: storeu<Vec>(const typename Vec::imask_type mask, typename Vec::base_type* memory, const typename Vec::register_type data) -> void
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: If mask[i] is 1, load memory[index[i] * scale], otherwise use source[i]
Call signature: gather<Vec, IndicesType>(const typename Vec::mask_type mask, const typename Vec::register_type source, const void* memory, const typename IndicesType::offset_base_register_type index, std::integral_constant<int, N> scale) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | - | - | - | - |
int8_t | + | + | - | + | - | - | - | - |
uint16_t | + | + | - | + | - | - | - | - |
int16_t | + | + | - | + | - | - | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Transfers data from a vector register to an arbitrary locations.
Call signature: scatter<Vec>(const typename Vec::mask_type mask, const typename Vec::register_type data, void* memory, const typename Vec::offset_base_register_type index, std::integral_constant<int, N> scale) -> void
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | - | - | - |
int8_t | + | + | - | + | + | - | - | - |
uint16_t | + | + | - | + | + | - | - | - |
int16_t | + | + | - | + | + | - | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Partially override a Vector with a single value.
Call signature: masked_set1<Vec>(const typename Vec::register_type src, const typename Vec::imask_type mask, const typename Vec::base_type value) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | + | - | - | + | + | + | - | - |
double | + | - | - | + | + | + | - | - |
Brief: Loads data from memory to a mask.
Call signature: load_mask<Vec>(typename Vec::imask_type const* memory) -> typename Vec::mask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | + | - | - | + | + | + | - | - |
double | + | - | - | + | + | + | - | - |
Brief: Loads data from memory to a mask.
Call signature: load_imask<Vec>(typename Vec::imask_type const* memory) -> typename Vec::imask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Stores an integral mask to memory.
Call signature: store_imask<Vec>(typename Vec::imask_type * memory, typename Vec::imask_type mask) -> void
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Allocates (unaligned) contiguous memory.
Call signature: allocate<Vec>(std::size_t count_bytes) -> typename Vec::base_type*
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Allocates aligned contiguous memory.
Call signature: allocate_aligned<Vec>(std::size_t count_bytes, std::size_t alignment) -> typename Vec::base_type*
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Deallocates (possibly aligned) contiguous memory.
Call signature: deallocate<Vec>(typename Vec::base_type* ptr) -> void
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Copy memory.
Call signature: memory_cp<Vec>(typename Vec::base_type* dst, typename Vec::base_type const* src, std::size_t count_bytes, int copy_kind) -> void
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Checks whether all elements are unique in a register.
Call signature: conflict<Vec>(const typename Vec::register_type data) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | + | + |
int8_t | + | + | - | + | + | + | + | + |
uint16_t | + | + | - | + | + | + | + | + |
int16_t | + | + | - | + | + | + | + | + |
uint32_t | + | + | - | + | + | + | + | + |
int32_t | + | + | - | + | + | + | + | + |
uint64_t | + | + | - | + | + | + | + | + |
int64_t | + | + | - | + | + | + | + | + |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Checks whether all elements are unique in a register and returns a mask indicating which elements don't have preceeding conflicts.
Call signature: conflict_free<Vec>(const typename Vec::imask_type mask, const typename Vec::register_type data) -> typename Vec::imask_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Blends two registers using provided bitmask.
Call signature: blend<Vec>(const typename Vec::mask_type control, const typename Vec::register_type left, const typename Vec::register_type right) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |
Brief: Blends or add two registers using provided bitmask
Call signature: blend_add<Vec>(const typename Vec::mask_type control, const typename Vec::register_type left, const typename Vec::register_type right, const typename Vec::register_type adder) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | - | + | + | - | - |
int8_t | - | - | - | - | + | + | - | - |
uint16_t | - | - | - | - | + | + | - | - |
int16_t | - | - | - | - | + | + | - | - |
uint32_t | - | - | - | - | + | + | - | - |
int32_t | - | - | - | - | + | + | - | - |
uint64_t | - | - | - | - | + | + | - | - |
int64_t | - | - | - | - | + | + | - | - |
float | - | - | - | - | - | - | - | - |
double | - | - | - | - | - | - | - | - |
Brief: Returns a vector register with undefined data inside.
Call signature: undefined<Vec>() -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | + | + | + | - | - |
int8_t | - | - | - | + | + | + | - | - |
uint16_t | - | - | - | + | + | + | - | - |
int16_t | - | - | - | + | + | + | - | - |
uint32_t | - | - | - | + | + | + | - | - |
int32_t | - | - | - | + | + | + | - | - |
uint64_t | - | - | - | + | + | + | - | - |
int64_t | - | - | - | + | + | + | - | - |
float | - | - | - | + | + | + | - | - |
double | - | - | - | + | + | + | - | - |
Brief: Copy elements from a vector, where the mask bit it set, otherwise write zero
Call signature: maskz_mov<Vec>(const typename Vec::mask_type mask, const typename Vec::register_type src) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | - | - | - | + | + | - | - | - |
int8_t | - | - | - | + | + | - | - | - |
uint16_t | - | - | - | + | + | - | - | - |
int16_t | - | - | - | + | + | - | - | - |
uint32_t | - | - | - | + | + | - | - | - |
int32_t | - | - | - | + | + | - | - | - |
uint64_t | - | - | - | + | + | - | - | - |
int64_t | - | - | - | + | + | - | - | - |
float | - | - | - | + | + | - | - | - |
double | - | - | - | + | + | - | - | - |
Brief: Copy elements from a vector, where the mask bit it set, otherwise write zero
Call signature: maskz_mov<Vec>(const typename Vec::imask_type mask, const typename Vec::register_type src) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | + | - | - | + | + | + | - | - |
double | + | - | - | + | + | + | - | - |
Brief: Merge two vectors while picking the source of each element based on the corresponding mask bit
Call signature: mask_mov<Vec>(const typename Vec::register_type src, const typename Vec::imask_type mask, const typename Vec::register_type data) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | - | - | + | + | + | - | - |
int8_t | + | - | - | + | + | + | - | - |
uint16_t | + | - | - | + | + | + | - | - |
int16_t | + | - | - | + | + | + | - | - |
uint32_t | + | - | - | + | + | + | - | - |
int32_t | + | - | - | + | + | + | - | - |
uint64_t | + | - | - | + | + | + | - | - |
int64_t | + | - | - | + | + | + | - | - |
float | + | - | - | + | + | + | - | - |
double | + | - | - | + | + | + | - | - |
Brief: Compress valid elements in a register (if the corresponding bit in mask is set to 1).
Call signature: compress<Vec>(const typename Vec::imask_type mask, const typename Vec::register_type data) -> typename Vec::register_type
scalar | neon | cuda | sse | avx2 | avx512 | oneAPIfpga | oneAPIfpgaRTL | |
uint8_t | + | + | - | + | + | + | - | - |
int8_t | + | + | - | + | + | + | - | - |
uint16_t | + | + | - | + | + | + | - | - |
int16_t | + | + | - | + | + | + | - | - |
uint32_t | + | + | - | + | + | + | - | - |
int32_t | + | + | - | + | + | + | - | - |
uint64_t | + | + | - | + | + | + | - | - |
int64_t | + | + | - | + | + | + | - | - |
float | + | + | - | + | + | + | - | - |
double | + | + | - | + | + | + | - | - |