#ifndef GDS_MATH_ARCH_HH
#define GDS_MATH_ARCH_HH

/*  Test the compiler/platform architecture to see if it supports 
 *  vectorized calculations using the SSE2-4 and/or avx instruction
 *  sets.
 */

//==================================  Figure out the platform
#if defined(__GNUG__) && (defined(__i386__) || defined(__x86_64__))
  #if defined(__AVX__)
    #define GDS_USE_AVX 1
    #define GDS_USE_SSE4 1
    #define GDS_USE_SSE4_2 1
  #elif defined(__SSE4_2__)
    #define GDS_USE_SSE4_2 1
    #define GDS_USE_SSE4 1
  #elif defined(__SSE4_1__)
    #define GDS_USE_SSE4 1
  #elif defined(__SSSE3__)
    #define GDS_USE_SSE3 1
  #elif defined(__SSE2__)
    #define GDS_USE_SSE2 1
  #else
    #warning "SSE2 not enabled"
    #define GDS_USE_BASE 1
  #endif
#else 
  #warning "Not a gnu compilation"
  #define GDS_USE_BASE 1
#endif // end test of i386/gcc

//==================================  Useful data types
#define SSE_VEC_LENGTH 16
typedef double vecd128 __attribute__ ((vector_size(SSE_VEC_LENGTH)));
typedef float  vecf128 __attribute__ ((vector_size(SSE_VEC_LENGTH)));
typedef int    veci128 __attribute__ ((vector_size(SSE_VEC_LENGTH)));
/*---- Mask to test pointer alignment  */
#define VEC128_ADMASK (sizeof(vecd128)-1)
union vd2map {
  vecd128 v;
  double d[2];
};

#define AVX_VEC_LENGTH 32
typedef double vecd256 __attribute__ ((vector_size(AVX_VEC_LENGTH)));
typedef float  vecf256 __attribute__ ((vector_size(AVX_VEC_LENGTH)));

/*---- Mask to test pointer alignment  */
#define VEC256_ADMASK (sizeof(vecd256)-1)
union vd4map {
  vecd256 v;
  double d[4];
};

/* ------------------  Alignment tests  --------------------------*/


#define AVX_REG_LOAD_DUP(r,x) vecd256 r = {x, x, x, x}
// #define SSE_REG_LOAD_DUP(r,x) vecd128 r = {x, x}
#define SSE_REG_LOAD_DUP(r,x) vecd128 r; asm("movddup %1, %0" : "=x" (r) : "m" (x): )

//======================================  Alignment tests
//
// ---  Test alignment of single operand on 8-byte boundary
inline bool
align8_abs(const void* a) {
   return (long(a) & 7) == 0;
}
//
//       Test abosolute alignment realtive to a  16-byte boundary for:
//  ---  One argument
inline bool
align16_abs(const void* a) {
   return (long(a) & 15) == 0;
}
//  ---  Two operands
inline bool
align16_abs(const void* a, const void* b) {
   return ((long(a) | long(b)) & 15) == 0;
}
//
//      Test relative alignment to 16-byte boundaries for:
//  ---  Two operands
inline bool
align16_rel(const void* a, const void* b) {
   return ((long(a) ^ long(b)) & 15) == 0;
}
//  ---  Three operands
inline bool
align16_rel(const void* a, const void* b, const void* c) {
   return (((long(a) ^ long(b)) | (long(a) ^ long(c))) & 15) == 0;
}
//
//      Test absolute alignment on a 32-byte boundary for:
// ---  One argument
inline bool
align32_abs(const void* a) {
   return (long(a) & 31) == 0;
}
// ---  Two operands
inline bool
align32_abs(const void* a, const void* b) {
   return ((long(a) | long(b)) & 31) == 0;
}
//
//      Test relative alignment to a 32-byte boundary for
//  --- Two operands 
inline bool
align32_rel(const void* a, const void* b) {
   return ((long(a) ^ long(b)) & 31) == 0;
}


#endif // !defined(gds_math_arch_hh)
