diff options
author | rodri <rgl@antares-labs.eu> | 2023-11-24 16:48:14 +0000 |
---|---|---|
committer | rodri <rgl@antares-labs.eu> | 2023-11-24 16:48:14 +0000 |
commit | 7cf4634e668730749aa8b7fa9ff16cf4234958fa (patch) | |
tree | 4489eb93c162ab2b24b78b5261d6f404c398602c | |
parent | d850c3b7f47e58556c160f9d03ea20aa52452020 (diff) | |
download | amd64-simd-7cf4634e668730749aa8b7fa9ff16cf4234958fa.tar.gz amd64-simd-7cf4634e668730749aa8b7fa9ff16cf4234958fa.tar.bz2 amd64-simd-7cf4634e668730749aa8b7fa9ff16cf4234958fa.zip |
clean and organize things up. implement VZEROUPPER.
-rw-r--r-- | avx.h | 40 | ||||
-rw-r--r-- | dppd.s | 2 | ||||
-rw-r--r-- | mkfile | 2 | ||||
-rw-r--r-- | regs.h | 19 | ||||
-rw-r--r-- | sse.h | 53 |
5 files changed, 63 insertions, 53 deletions
@@ -0,0 +1,40 @@ +#define VEX_m_0F (1) +#define VEX_m_0F38 (2) +#define VEX_m_0F3A (3) +#define VEX_L_128 (0) +#define VEX_L_256 (1) +#define VEX_p_NO (0) +#define VEX_p_66 (1) +#define VEX_p_F3 (2) +#define VEX_p_F2 (3) + +#define VEX3(r, x, b, m, w, v, l, p) BYTE $0xC4; \ + BYTE $(((~r)<<7)|((~x)<<6)|((~b)<<5)|(m)); \ + BYTE $(((w)<<7)|((~v)<<3)|((l)<<2)|(p)) +#define VEX2(r, b, l, p) BYTE $0xC5; \ + BYTE $(((~r)<<7)|((~v)<<3)|((l)<<2)|(p)) +#define VOP(o, m, ro, rm) BYTE $(o); \ + BYTE $(((m)<<6)|((ro)<<3)|(rm)) +#define VOPi(o, m, ro, rm, i) VOP((o), (m), (ro), (rm)); \ + BYTE $(i) + + +/* VZEROUPPER */ +#define VZEROUPPER VEX3(0,0,0,VEX_m_0F,0,0,VEX_L_128,VEX_p_NO); BYTE $0x77 + +/* VMOVAPD */ +#define VMOVUPD_128mr(off, s, d) VEX3(0,0,0,VEX_m_0F,0,0,VEX_L_128,VEX_p_66); \ + VOPi(0x10, 0x1, (d), (s), (off)) +#define VMOVAPD_128rr(s, d) VEX3(0,0,0,VEX_m_0F,0,0,VEX_L_128,VEX_p_66); \ + VOP(0x28, 0x3, (d), (s)) +/* VDPPD */ +#define VDPPD(s0, s1, d) VEX3(0,0,0,VEX_m_0F3A,0,(s0),VEX_L_128,VEX_p_66); \ + VOPi(0x41, 0x3, (d), (s1), 0x31) + +/* VFMADD231SD (128 bit) */ +#define VFMADD231SD(s0, s1, d) VEX3(0,0,0,VEX_m_0F38,1,(s0),VEX_L_128,VEX_p_66); \ + VOP(0xB9, 0x3, (d), (s1)) + +/* VFMADD231PD (128 bit) */ +#define VFMADD231PD(s0, s1, d) VEX3(0,0,0,VEX_m_0F38,1,(s0),VEX_L_128,VEX_p_66); \ + VOP(0xB8, 0x3, (d), (s1)) @@ -1,4 +1,6 @@ +#include "regs.h" #include "sse.h" +#include "avx.h" DATA one(SB)/8,$1.0 GLOBL one(SB), $8 @@ -9,6 +9,8 @@ OFILES=\ nanosec.$O\ HFILES=\ + regs.h\ sse.h\ + avx.h\ </sys/src/cmd/mkone @@ -0,0 +1,19 @@ +/* GPRs */ +#define rAX 0 +#define rCX 1 +#define rDX 2 +#define rBX 3 +#define rSP 4 +#define rBP 5 +#define rSI 6 +#define rDI 7 + +/* SSE and AVX (represent [XYZ]MM) */ +#define rX0 0 /* X8 */ +#define rX1 1 /* X9 */ +#define rX2 2 /* X10 */ +#define rX3 3 /* X11 */ +#define rX4 4 /* X12 */ +#define rX5 5 /* X13 */ +#define rX6 6 /* X14 */ +#define rX7 7 /* X15 */ @@ -1,30 +1,3 @@ -#define rAX 0 -#define rCX 1 -#define rDX 2 -#define rBX 3 -#define rSP 4 -#define rBP 5 -#define rSI 6 -#define rDI 7 - -#define rX0 0 -#define rX1 1 -#define rX2 2 -#define rX3 3 -#define rX4 4 -#define rX5 5 -#define rX6 6 - -#define VEX_m_0F (1) -#define VEX_m_0F38 (2) -#define VEX_m_0F3A (3) -#define VEX_L_128 (0) -#define VEX_L_256 (1) -#define VEX_p_NO (0) -#define VEX_p_66 (1) -#define VEX_p_F3 (2) -#define VEX_p_F2 (3) - #define OP(o, m, ro, rm) WORD $0x0F66; BYTE $(o); \ BYTE $(((m)<<6)|((ro)<<3)|(rm)) #define OPi(o, m, ro, rm, i) OP((o), (m), (ro), (rm)); \ @@ -34,15 +7,6 @@ #define OP4i(o, m, ro, rm, i) OP4((o), (m), (ro), (rm)); \ BYTE $(i) -#define VEX3(r, x, b, m, w, v, l, p) BYTE $0xC4; \ - BYTE $(((~r)<<7)|((~x)<<6)|((~b)<<5)|(m)); \ - BYTE $(((w)<<7)|((~v)<<3)|((l)<<2)|(p)) -#define VEX2(r, b, l, p) BYTE $0xC5; \ - BYTE $(((~r)<<7)|((~v)<<3)|((l)<<2)|(p)) -#define VOP(o, m, ro, rm) BYTE $(o); \ - BYTE $(((m)<<6)|((ro)<<3)|(rm)) -#define VOPi(o, m, ro, rm, i) VOP((o), (m), (ro), (rm)); \ - BYTE $(i) /* MOVLPD */ //opcode = 660F12 @@ -66,20 +30,3 @@ //modrm = 11 000 001 [X1 → X0] //imm8 = 0011 0001 #define DPPD(s, d) OP4i(0x413A, 0x3, (d), (s), 0x31) - -/* VMOVAPD */ -#define VMOVUPD_128mr(off, s, d) VEX3(0,0,0,VEX_m_0F,0,0,VEX_L_128,VEX_p_66); \ - VOPi(0x10, 0x1, (d), (s), (off)) -#define VMOVAPD_128rr(s, d) VEX3(0,0,0,VEX_m_0F,0,0,VEX_L_128,VEX_p_66); \ - VOP(0x28, 0x3, (d), (s)) -/* VDPPD */ -#define VDPPD(s0, s1, d) VEX3(0,0,0,VEX_m_0F3A,0,(s0),VEX_L_128,VEX_p_66); \ - VOPi(0x41, 0x3, (d), (s1), 0x31) - -/* VFMADD231SD (128 bit) */ -#define VFMADD231SD(s0, s1, d) VEX3(0,0,0,VEX_m_0F38,1,(s0),VEX_L_128,VEX_p_66); \ - VOP(0xB9, 0x3, (d), (s1)) - -/* VFMADD231PD (128 bit) */ -#define VFMADD231PD(s0, s1, d) VEX3(0,0,0,VEX_m_0F38,1,(s0),VEX_L_128,VEX_p_66); \ - VOP(0xB8, 0x3, (d), (s1)) |