From 7cf4634e668730749aa8b7fa9ff16cf4234958fa Mon Sep 17 00:00:00 2001 From: rodri Date: Fri, 24 Nov 2023 16:48:14 +0000 Subject: clean and organize things up. implement VZEROUPPER. --- avx.h | 40 ++++++++++++++++++++++++++++++++++++++++ dppd.s | 2 ++ mkfile | 2 ++ regs.h | 19 +++++++++++++++++++ sse.h | 53 ----------------------------------------------------- 5 files changed, 63 insertions(+), 53 deletions(-) create mode 100644 avx.h create mode 100644 regs.h diff --git a/avx.h b/avx.h new file mode 100644 index 0000000..ef0b2c3 --- /dev/null +++ b/avx.h @@ -0,0 +1,40 @@ +#define VEX_m_0F (1) +#define VEX_m_0F38 (2) +#define VEX_m_0F3A (3) +#define VEX_L_128 (0) +#define VEX_L_256 (1) +#define VEX_p_NO (0) +#define VEX_p_66 (1) +#define VEX_p_F3 (2) +#define VEX_p_F2 (3) + +#define VEX3(r, x, b, m, w, v, l, p) BYTE $0xC4; \ + BYTE $(((~r)<<7)|((~x)<<6)|((~b)<<5)|(m)); \ + BYTE $(((w)<<7)|((~v)<<3)|((l)<<2)|(p)) +#define VEX2(r, b, l, p) BYTE $0xC5; \ + BYTE $(((~r)<<7)|((~v)<<3)|((l)<<2)|(p)) +#define VOP(o, m, ro, rm) BYTE $(o); \ + BYTE $(((m)<<6)|((ro)<<3)|(rm)) +#define VOPi(o, m, ro, rm, i) VOP((o), (m), (ro), (rm)); \ + BYTE $(i) + + +/* VZEROUPPER */ +#define VZEROUPPER VEX3(0,0,0,VEX_m_0F,0,0,VEX_L_128,VEX_p_NO); BYTE $0x77 + +/* VMOVAPD */ +#define VMOVUPD_128mr(off, s, d) VEX3(0,0,0,VEX_m_0F,0,0,VEX_L_128,VEX_p_66); \ + VOPi(0x10, 0x1, (d), (s), (off)) +#define VMOVAPD_128rr(s, d) VEX3(0,0,0,VEX_m_0F,0,0,VEX_L_128,VEX_p_66); \ + VOP(0x28, 0x3, (d), (s)) +/* VDPPD */ +#define VDPPD(s0, s1, d) VEX3(0,0,0,VEX_m_0F3A,0,(s0),VEX_L_128,VEX_p_66); \ + VOPi(0x41, 0x3, (d), (s1), 0x31) + +/* VFMADD231SD (128 bit) */ +#define VFMADD231SD(s0, s1, d) VEX3(0,0,0,VEX_m_0F38,1,(s0),VEX_L_128,VEX_p_66); \ + VOP(0xB9, 0x3, (d), (s1)) + +/* VFMADD231PD (128 bit) */ +#define VFMADD231PD(s0, s1, d) VEX3(0,0,0,VEX_m_0F38,1,(s0),VEX_L_128,VEX_p_66); \ + VOP(0xB8, 0x3, (d), (s1)) diff --git a/dppd.s b/dppd.s index 7239d9f..e738dde 100644 --- a/dppd.s +++ b/dppd.s @@ -1,4 +1,6 @@ +#include "regs.h" #include "sse.h" +#include "avx.h" DATA one(SB)/8,$1.0 GLOBL one(SB), $8 diff --git a/mkfile b/mkfile index 3273545..7e94b16 100644 --- a/mkfile +++ b/mkfile @@ -9,6 +9,8 @@ OFILES=\ nanosec.$O\ HFILES=\ + regs.h\ sse.h\ + avx.h\