77 lines
3.5 KiB
C
77 lines
3.5 KiB
C
#include "vec.h"
|
|
|
|
#define IMPLEMENT_FUNCTIONS(MACRO) \
|
|
MACRO(u8) MACRO(u16) MACRO(u32) MACRO(u64) \
|
|
MACRO(i8) MACRO(i16) MACRO(i32) MACRO(i64) \
|
|
MACRO(f32) MACRO(f64)
|
|
|
|
/**
|
|
* vectorized addition. each element of vector `b` will be added to its
|
|
* counterpart element in vector `a` (i.e. `a[i] += b[i]`). the result will be
|
|
* stored in `a`.
|
|
*
|
|
* @param n: number of elements to add
|
|
* @param a: first vector, result destination
|
|
* @param b: second vector
|
|
*
|
|
* @usage either through `vec_add_<type> (e.g. `vec_add_u32`), or through the
|
|
* generic macro `vec_add`
|
|
*/
|
|
#define IMPL_VEC_ADD(TYPE) \
|
|
void \
|
|
vec_add_##TYPE (usize n, TYPE a[restrict n], const TYPE b[restrict n]) \
|
|
{ \
|
|
usize i; \
|
|
for (i = 0; i < n; i++) \
|
|
a[i] += b[i]; \
|
|
}
|
|
|
|
/**
|
|
* vectorized subtraction. each element of vector `b` will be subtracted from
|
|
* its counterpart element in vector `a` (i.e. `a[i] -= b[i]`). the result will
|
|
* be stored in `a`.
|
|
*
|
|
* @param n: number of elements to subtract
|
|
* @param a: first vector, result destination
|
|
* @param b: second vector
|
|
*
|
|
* @usage either through `vec_sub_<type> (e.g. `vec_sub_u32`), or through the
|
|
* generic macro `vec_sub`
|
|
*/
|
|
#define IMPL_VEC_SUB(TYPE) \
|
|
void \
|
|
vec_sub_##TYPE (usize n, TYPE a[restrict n], const TYPE b[restrict n]) \
|
|
{ \
|
|
usize i; \
|
|
for (i = 0; i < n; i++) \
|
|
a[i] -= b[i]; \
|
|
}
|
|
|
|
/**
|
|
* vectorized fused multiply-add. each element of vector `a` will be multiplied
|
|
* by its counterpart element in vector `b`, then the corresponding element in
|
|
* vector `c` will be added to a (i.e. `a[i] = a[i] * b[i] + c[i]`). the result
|
|
* will be stored in `a`.
|
|
*
|
|
* @param n: number of elements to subtract
|
|
* @param a: first vector, result destination
|
|
* @param b: second vector (for multiplication)
|
|
* @param c: third vector (final summand)
|
|
*
|
|
* @usage either through `vec_fma_<type> (e.g. `vec_fma_u32`), or through the
|
|
* generic macro `vec_fma`
|
|
*/
|
|
#define IMPL_VEC_FMA(TYPE) \
|
|
void \
|
|
vec_fma_##TYPE (usize n, TYPE a[restrict n], const TYPE b[restrict n], \
|
|
const TYPE c[restrict n]) \
|
|
{ \
|
|
usize i; \
|
|
for (i = 0; i < n; i++) \
|
|
a[i] = (TYPE) (a[i] * b[i]) + c[i]; \
|
|
}
|
|
|
|
IMPLEMENT_FUNCTIONS(IMPL_VEC_ADD)
|
|
IMPLEMENT_FUNCTIONS(IMPL_VEC_SUB)
|
|
IMPLEMENT_FUNCTIONS(IMPL_VEC_FMA)
|