aboutsummaryrefslogtreecommitdiff
path: root/readme.md
blob: d6e2ed23e02a3e63263fd2213f97deb2bd2165f7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
# Running a benchmark

	% mk pulldeps && mk; cd bench9 && mk; cd ../bench && mk && 6.out

# Results so far

The plan 9 system already uses SSE instructions for FP operations, but they are all based on the `*SD` (scalar double-precision) subset. The tests suffixed by `_sse` below use instructions from the `*PD` (packed double-precision) subset, which perform actual SIMD operations over the data using the 128-bit XMM registers. Those suffixed by `_avx` do the same but using AVX instructions that operate with the 256-bit wide YMM registers, even when performing 128-bit operations. The trailing `_a` means the tests are run loading from memory-aligned operands, conforming to the requirements of each instruction.

## AMD Ryzen 3 1200

```
min
                  op/s      98%      96%      75%      med      avg      min      max  
fmin               361  3044124  2985684  2939465  2680777  2764144  2413529  3075293  
fmin_sse           372  2896115  2779496  2703976  2677661  2687187  2478868  3335431  

2d dot product
                  op/s       98%       96%       75%       med       avg       min       max  
dotvec2            287   3780198   3604559   3499310   3455915   3473738   3333691   3835117  
dotvec2_sse         71  14434822  14355733  14107845  14021745  14020526  13589009  14477322  
dotvec2_sse4        71  14625530  14403252  13963576  13875276  13903761  13520649  14845119  
dotvec2_avx         73  14076825  13895586  13650538  13555359  13579508  13187532  14766759  
dotvec2_sse_a      377   2830075   2802426   2673797   2638162   2648806   2497038   2835175  
dotvec2_sse4_a     376   2821095   2797976   2683037   2647902   2658576   2517738   2853045  
dotvec2_avx_a      274   3914877   3809088   3664309   3625289   3646672   3480450   3929327  

3d dot product
                  op/s       98%       96%       75%       med       avg       min       max  
dotvec3            224   4671051   4632761   4506772   4448877   4460460   4248664   4706200  
dotvec3_sse4        61  16951792  16523715  16307767  16199238  16224074  15851190  17695886  
dotvec3_avx         60  17148110  16819313  16621474  16501135  16533685  16026849  17921144  
dotvec3_sse4_a     375   2871885   2793346   2701906   2641992   2661341   2493918   3019034  

3d cross product
                  op/s       98%       96%       75%       med       avg       min       max  
crossvec3          103  10677972  10357675   9631911   9557251   9621273   9300243  10730622  
crossvec3_sse      174   6159839   5905261   5786872   5727122   5746890   5454484   6776764  

Pt2
                  op/s       98%       96%       75%       med       avg       min       max  
Pt2                256   4403353   4060095   3924147   3877597   3901523   3713348   4504072  
Pt2b               353   3218812   2964274   2842645   2798780   2829463   2542988   4211884  

multiply + add
                  op/s       98%       96%       75%       med       avg       min       max  
madd               374   2862275   2771706   2680487   2635897   2666921   2464438   4505202  
fma_avx            377   2939275   2776556   2670487   2629482   2648270   2453488   3007364  

2d point sum
                  op/s       98%       96%       75%       med       avg       min       max  
addpt2             139   7414438   7376269   7237740   7154955   7158222   6895843   7649357  
addpt2_sse          77  13324941  13153272  12959924  12803215  12856458  12526487  14520431  
addpt2_avx          74  14000655  13705698  13478190  13381560  13392993  13009893  14768289  

3d point sum
                  op/s       98%       96%       75%       med       avg       min       max  
addpt3             110  10508294   9225684   9030185   8937311   9014848   8688758  13812397  
addpt3_avx          74  13747217  13686208  13453110  13313956  13354783  13000193  14005195  

```

## AMD Ryzen 5 1600

```
min
                  op/s      98%      96%      75%      med      avg      min      max  
fmin               381  3078249  3078041  2952450  2423244  2620317  2290029  3078428  
fmin_sse           441  2375831  2373577  2248324  2247143  2264664  2247073  2383068  

2d dot product
                  op/s       98%       96%       75%       med       avg       min       max  
dotvec2            246   4152258   4151970   4149845   4025763   4058215   4024602   4152843  
dotvec2_sse         90  11131322  11129793  11128671  11127301  11083286  11001502  11131660  
dotvec2_sse4        90  11130389  11129466  11128572  11127614  11088122  11001512  11130697  
dotvec2_avx         89  11303035  11273362  11263197  11262283  11230664  11135710  11317291  
dotvec2_sse_a      385   2736729   2727328   2615625   2582562   2596861   2522457   2741514  
dotvec2_sse4_a     413   2566356   2544933   2445620   2427428   2421156   2280628   2619497  
dotvec2_avx_a      270   3794864   3793861   3670891   3668062   3696267   3666861   3796492  
dotvec2_p          396   2637237   2629504   2513364   2503983   2524380   2486908   2642032  

3d dot product
                  op/s       98%       96%       75%       med       avg       min       max  
dotvec3            205   4958347   4957275   4954992   4830662   4865361   4793732   4958407  
dotvec3_sse4        77  13009365  13008769  13007558  13006531  12984386  12879694  13012780  
dotvec3_avx         73  13546345  13545521  13544051  13543108  13514745  13416326  13549015  
dotvec3_sse4_a     339   3103713   3091482   2987682   2908482   2943368   2884299   3104656  

3d cross product
                  op/s       98%       96%       75%       med       avg       min       max  
crossvec3          104  10100627  10009077   9554613   9527666   9575022   9392093  10430234  
crossvec3_sse      127   7956531   7914250   7908721   7793683   7853727   7780996   8882017  

Pt2
                  op/s       98%       96%       75%       med       avg       min       max  
Pt2                249   4172470   4167526   4051644   4015052   4015541   3845771   4177711  
Pt2b               277   3705885   3704515   3583610   3578537   3607662   3577415   3707930  

multiply + add
                  op/s       98%       96%       75%       med       avg       min       max  
madd               364   2942989   2871234   2805496   2736987   2741434   2592991   2944081  
fma_avx            370   2841929   2821290   2717142   2688333   2697838   2638349   2864404  

2d point sum
                  op/s       98%       96%       75%       med       avg       min       max  
addpt2             130   7934552   7872437   7716697   7677146   7666291   7523143   7938354  
addpt2_sse          95  10593092  10591841  10534997  10451245  10433652  10196594  10593618  
addpt2_avx          95  10602751  10592873  10590342  10464975  10475812  10260238  10647474  

3d point sum
                  op/s       98%       96%       75%       med       avg       min       max  
addpt3             107   9698450   9697567   9572244   9149533   9309953   8982729   9698768  
addpt3_avx          92  10863383  10861795  10860365  10859209  10816886  10733246  10912534  

```

## AMD Ryzen 3 5400U

> provided by **llamaa**

```
min
                  op/s      98%      96%      75%      med      avg      min      max  
fmin               494  2382435  2038636  2032938  2002998  2023068  1995669  2719499  
fmin_sse           500  2006126  2003791  2001287  1999331  1999632  1995669  2054751  

2d dot product
                  op/s      98%      96%      75%      med      avg      min      max  
dotvec2            484  2258333  2251627  2096242  2011714  2063398  1995689  2258482  
dotvec2_sse        108  9269760  9267864  9265149  9261662  9253332  9230724  9271536  
dotvec2_sse4       108  9261647  9256628  9250741  9245292  9245148  9231083  9262984  
dotvec2_avx        108  9262076  9255101  9247438  9241221  9242259  9231502  9265459  
dotvec2_sse_a      499  2007793  2003033  2000349  1999331  2001089  1995669  2250011  
dotvec2_sse4_a     516  2003063  2000977  1999361  1992919  1937670  1746216  2003083  
dotvec2_avx_a      500  2004989  2003312  2000758  1999371  1999161  1995669  2006156  

3d dot product
                  op/s       98%       96%       75%       med       avg       min       max  
dotvec3            384   2609136   2605464   2601782   2599278   2598953   2591854   2613926  
dotvec3_sse4        99  10018456  10017478  10011281  10007719  10001388   9979919  10018635  
dotvec3_avx         95  10505566  10503021  10495956  10490224  10491056  10479233  10506145  
dotvec3_sse4_a     500   2005158   2003492   1999850   1999276   1998858   1995669   2018011  

3d cross product
                  op/s       98%       96%       75%       med       avg       min       max  
crossvec3          185   5412204   5406706   5400679   5395620   5395606   5377330   5417363  
crossvec3_sse      235   4255667   4254091   4248154   4245394   4246084   4240829   4255677  

Pt2
                  op/s       98%       96%       75%       med       avg       min       max  
Pt2                444   2294315   2271873   2252545   2248803   2252010   2245131   2310360  
Pt2b               489   2079758   2073401   2061986   2058149   2043489   1995669   2081534  

multiply + add
                  op/s       98%       96%       75%       med       avg       min       max  
madd               496   2016813   2014618   2012503   2010926   2014984   1999371   2504193  
fma_avx            494   2040792   2037179   2033457   2028159   2021563   1995669   2041171  

2d point sum
                  op/s       98%       96%       75%       med       avg       min       max  
addpt2             231   4341951   4336353   4329379   4325162   4325468   4315289   4367826  
addpt2_sse         114   8758561   8745859   8719007   8701080   8699660   8633920   8759040  
addpt2_avx         114   8748343   8740161   8718897   8705890   8705307   8669034   8748852  

3d point sum
                  op/s       98%       96%       75%       med       avg       min       max  
addpt3             190   5260970   5259025   5254235   5252459   5251446   5238679   5262717  
addpt3_avx         108   9261707   9256249   9229686   9195514   9200410   9131717   9280945  

```

## Intel Core i5-10300H

> provided by **uramekus**

```
min
                  op/s      98%      96%      75%      med      avg      min      max  
fmin               283  4568225  4565163  4563893  3543909  3527765  2488743  4571387  
fmin_sse           414  2424605  2420142  2417015  2414723  2415306  2409636  2426538  

2d dot product
                  op/s       98%       96%       75%       med       avg       min       max  
dotvec2            267   3744010   3742209   3737007   3735923   3736518   3731135   3767934  
dotvec2_sse         73  13698430  13695892  13693351  13692214  13692714  13690210  13698510  
dotvec2_sse4        75  13283629  13280936  13278169  13277493  13277783  13275146  13286098  
dotvec2_avx         75  13283761  13279452  13278088  13277284  13277534  13275028  13287380  
dotvec2_sse_a      373   2881463   2669431   2667139   2665984   2679081   2627220   3743409  
dotvec2_sse4_a     393   2569252   2568403   2563474   2531893   2540101   2488852   3467011  
dotvec2_avx_a      342   2910797   2906958   2905773   2905043   2915722   2903536   3964388  

3d dot product
                  op/s       98%       96%       75%       med       avg       min       max  
dotvec3            206   5304768   5210421   4784381   4760076   4833173   4756544   5677615  
dotvec3_sse4        65  15355526  15353759  15352531  15351795  15351913  15349422  15357083  
dotvec3_avx         65  15354566  15354048  15352757  15351779  15351973  15349416  15357971  
dotvec3_sse4_a     337   2959474   2957670   2956259   2955527   2963219   2926466   3756403  

3d cross product
                  op/s       98%       96%       75%       med       avg       min       max  
crossvec3           91  10986157  10983141  10971494  10968571  10969827  10962082  10989235  
crossvec3_sse      143   7001828   7000719   6998587   6996428   6974455   6863747   7199104  

Pt2
                  op/s       98%       96%       75%       med       avg       min       max  
Pt2                227   4415172   4408012   4398711   4397851   4399747   4381089   4559482  
Pt2b               277   3608261   3607181   3604815   3603066   3603403   3599702   3609269  

multiply + add
                  op/s       98%       96%       75%       med       avg       min       max  
madd               347   2878112   2877760   2876937   2876112   2876230   2874718   2883503  
fma_avx            341   2938089   2937966   2936871   2935967   2926577   2802398   2938710  

2d point sum
                  op/s       98%       96%       75%       med       avg       min       max  
addpt2             120   8314705   8311729   8310052   8308082   8308210   8285693   8353482  
addpt2_sse          89  11204824  11203825  11203201  11202634  11202605  11201048  11205531  
addpt2_avx          73  13696975  13696348  13695183  13694241  13694325  13691704  13697637  

3d point sum
                  op/s       98%       96%       75%       med       avg       min       max  
addpt3              94  10560908  10537671  10530696  10525254  10528137  10520659  10579957  
addpt3_avx          77  12873349  12868526  12867472  12866408  12868654  12863977  13081995
```