diff options
author | rodri <rgl@antares-labs.eu> | 2023-12-02 22:12:36 +0000 |
---|---|---|
committer | rodri <rgl@antares-labs.eu> | 2023-12-02 22:12:36 +0000 |
commit | 499e44ebfde8c649d48d4c05093a8e1819be5349 (patch) | |
tree | 54a6e67c7473daa215db5bf93126723fef3fa6ff /readme.md | |
parent | cdfd06439c4c4354e9fdc4f4124149a8d7abdfe5 (diff) | |
download | amd64-simd-499e44ebfde8c649d48d4c05093a8e1819be5349.tar.gz amd64-simd-499e44ebfde8c649d48d4c05093a8e1819be5349.tar.bz2 amd64-simd-499e44ebfde8c649d48d4c05093a8e1819be5349.zip |
Diffstat (limited to 'readme.md')
-rw-r--r-- | readme.md | 223 |
1 files changed, 223 insertions, 0 deletions
diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..d6e2ed2 --- /dev/null +++ b/readme.md @@ -0,0 +1,223 @@ +# Running a benchmark + + % mk pulldeps && mk; cd bench9 && mk; cd ../bench && mk && 6.out + +# Results so far + +The plan 9 system already uses SSE instructions for FP operations, but they are all based on the `*SD` (scalar double-precision) subset. The tests suffixed by `_sse` below use instructions from the `*PD` (packed double-precision) subset, which perform actual SIMD operations over the data using the 128-bit XMM registers. Those suffixed by `_avx` do the same but using AVX instructions that operate with the 256-bit wide YMM registers, even when performing 128-bit operations. The trailing `_a` means the tests are run loading from memory-aligned operands, conforming to the requirements of each instruction. + +## AMD Ryzen 3 1200 + +``` +min + op/s 98% 96% 75% med avg min max +fmin 361 3044124 2985684 2939465 2680777 2764144 2413529 3075293 +fmin_sse 372 2896115 2779496 2703976 2677661 2687187 2478868 3335431 + +2d dot product + op/s 98% 96% 75% med avg min max +dotvec2 287 3780198 3604559 3499310 3455915 3473738 3333691 3835117 +dotvec2_sse 71 14434822 14355733 14107845 14021745 14020526 13589009 14477322 +dotvec2_sse4 71 14625530 14403252 13963576 13875276 13903761 13520649 14845119 +dotvec2_avx 73 14076825 13895586 13650538 13555359 13579508 13187532 14766759 +dotvec2_sse_a 377 2830075 2802426 2673797 2638162 2648806 2497038 2835175 +dotvec2_sse4_a 376 2821095 2797976 2683037 2647902 2658576 2517738 2853045 +dotvec2_avx_a 274 3914877 3809088 3664309 3625289 3646672 3480450 3929327 + +3d dot product + op/s 98% 96% 75% med avg min max +dotvec3 224 4671051 4632761 4506772 4448877 4460460 4248664 4706200 +dotvec3_sse4 61 16951792 16523715 16307767 16199238 16224074 15851190 17695886 +dotvec3_avx 60 17148110 16819313 16621474 16501135 16533685 16026849 17921144 +dotvec3_sse4_a 375 2871885 2793346 2701906 2641992 2661341 2493918 3019034 + +3d cross product + op/s 98% 96% 75% med avg min max +crossvec3 103 10677972 10357675 9631911 9557251 9621273 9300243 10730622 +crossvec3_sse 174 6159839 5905261 5786872 5727122 5746890 5454484 6776764 + +Pt2 + op/s 98% 96% 75% med avg min max +Pt2 256 4403353 4060095 3924147 3877597 3901523 3713348 4504072 +Pt2b 353 3218812 2964274 2842645 2798780 2829463 2542988 4211884 + +multiply + add + op/s 98% 96% 75% med avg min max +madd 374 2862275 2771706 2680487 2635897 2666921 2464438 4505202 +fma_avx 377 2939275 2776556 2670487 2629482 2648270 2453488 3007364 + +2d point sum + op/s 98% 96% 75% med avg min max +addpt2 139 7414438 7376269 7237740 7154955 7158222 6895843 7649357 +addpt2_sse 77 13324941 13153272 12959924 12803215 12856458 12526487 14520431 +addpt2_avx 74 14000655 13705698 13478190 13381560 13392993 13009893 14768289 + +3d point sum + op/s 98% 96% 75% med avg min max +addpt3 110 10508294 9225684 9030185 8937311 9014848 8688758 13812397 +addpt3_avx 74 13747217 13686208 13453110 13313956 13354783 13000193 14005195 + +``` + +## AMD Ryzen 5 1600 + +``` +min + op/s 98% 96% 75% med avg min max +fmin 381 3078249 3078041 2952450 2423244 2620317 2290029 3078428 +fmin_sse 441 2375831 2373577 2248324 2247143 2264664 2247073 2383068 + +2d dot product + op/s 98% 96% 75% med avg min max +dotvec2 246 4152258 4151970 4149845 4025763 4058215 4024602 4152843 +dotvec2_sse 90 11131322 11129793 11128671 11127301 11083286 11001502 11131660 +dotvec2_sse4 90 11130389 11129466 11128572 11127614 11088122 11001512 11130697 +dotvec2_avx 89 11303035 11273362 11263197 11262283 11230664 11135710 11317291 +dotvec2_sse_a 385 2736729 2727328 2615625 2582562 2596861 2522457 2741514 +dotvec2_sse4_a 413 2566356 2544933 2445620 2427428 2421156 2280628 2619497 +dotvec2_avx_a 270 3794864 3793861 3670891 3668062 3696267 3666861 3796492 +dotvec2_p 396 2637237 2629504 2513364 2503983 2524380 2486908 2642032 + +3d dot product + op/s 98% 96% 75% med avg min max +dotvec3 205 4958347 4957275 4954992 4830662 4865361 4793732 4958407 +dotvec3_sse4 77 13009365 13008769 13007558 13006531 12984386 12879694 13012780 +dotvec3_avx 73 13546345 13545521 13544051 13543108 13514745 13416326 13549015 +dotvec3_sse4_a 339 3103713 3091482 2987682 2908482 2943368 2884299 3104656 + +3d cross product + op/s 98% 96% 75% med avg min max +crossvec3 104 10100627 10009077 9554613 9527666 9575022 9392093 10430234 +crossvec3_sse 127 7956531 7914250 7908721 7793683 7853727 7780996 8882017 + +Pt2 + op/s 98% 96% 75% med avg min max +Pt2 249 4172470 4167526 4051644 4015052 4015541 3845771 4177711 +Pt2b 277 3705885 3704515 3583610 3578537 3607662 3577415 3707930 + +multiply + add + op/s 98% 96% 75% med avg min max +madd 364 2942989 2871234 2805496 2736987 2741434 2592991 2944081 +fma_avx 370 2841929 2821290 2717142 2688333 2697838 2638349 2864404 + +2d point sum + op/s 98% 96% 75% med avg min max +addpt2 130 7934552 7872437 7716697 7677146 7666291 7523143 7938354 +addpt2_sse 95 10593092 10591841 10534997 10451245 10433652 10196594 10593618 +addpt2_avx 95 10602751 10592873 10590342 10464975 10475812 10260238 10647474 + +3d point sum + op/s 98% 96% 75% med avg min max +addpt3 107 9698450 9697567 9572244 9149533 9309953 8982729 9698768 +addpt3_avx 92 10863383 10861795 10860365 10859209 10816886 10733246 10912534 + +``` + +## AMD Ryzen 3 5400U + +> provided by **llamaa** + +``` +min + op/s 98% 96% 75% med avg min max +fmin 494 2382435 2038636 2032938 2002998 2023068 1995669 2719499 +fmin_sse 500 2006126 2003791 2001287 1999331 1999632 1995669 2054751 + +2d dot product + op/s 98% 96% 75% med avg min max +dotvec2 484 2258333 2251627 2096242 2011714 2063398 1995689 2258482 +dotvec2_sse 108 9269760 9267864 9265149 9261662 9253332 9230724 9271536 +dotvec2_sse4 108 9261647 9256628 9250741 9245292 9245148 9231083 9262984 +dotvec2_avx 108 9262076 9255101 9247438 9241221 9242259 9231502 9265459 +dotvec2_sse_a 499 2007793 2003033 2000349 1999331 2001089 1995669 2250011 +dotvec2_sse4_a 516 2003063 2000977 1999361 1992919 1937670 1746216 2003083 +dotvec2_avx_a 500 2004989 2003312 2000758 1999371 1999161 1995669 2006156 + +3d dot product + op/s 98% 96% 75% med avg min max +dotvec3 384 2609136 2605464 2601782 2599278 2598953 2591854 2613926 +dotvec3_sse4 99 10018456 10017478 10011281 10007719 10001388 9979919 10018635 +dotvec3_avx 95 10505566 10503021 10495956 10490224 10491056 10479233 10506145 +dotvec3_sse4_a 500 2005158 2003492 1999850 1999276 1998858 1995669 2018011 + +3d cross product + op/s 98% 96% 75% med avg min max +crossvec3 185 5412204 5406706 5400679 5395620 5395606 5377330 5417363 +crossvec3_sse 235 4255667 4254091 4248154 4245394 4246084 4240829 4255677 + +Pt2 + op/s 98% 96% 75% med avg min max +Pt2 444 2294315 2271873 2252545 2248803 2252010 2245131 2310360 +Pt2b 489 2079758 2073401 2061986 2058149 2043489 1995669 2081534 + +multiply + add + op/s 98% 96% 75% med avg min max +madd 496 2016813 2014618 2012503 2010926 2014984 1999371 2504193 +fma_avx 494 2040792 2037179 2033457 2028159 2021563 1995669 2041171 + +2d point sum + op/s 98% 96% 75% med avg min max +addpt2 231 4341951 4336353 4329379 4325162 4325468 4315289 4367826 +addpt2_sse 114 8758561 8745859 8719007 8701080 8699660 8633920 8759040 +addpt2_avx 114 8748343 8740161 8718897 8705890 8705307 8669034 8748852 + +3d point sum + op/s 98% 96% 75% med avg min max +addpt3 190 5260970 5259025 5254235 5252459 5251446 5238679 5262717 +addpt3_avx 108 9261707 9256249 9229686 9195514 9200410 9131717 9280945 + +``` + +## Intel Core i5-10300H + +> provided by **uramekus** + +``` +min + op/s 98% 96% 75% med avg min max +fmin 283 4568225 4565163 4563893 3543909 3527765 2488743 4571387 +fmin_sse 414 2424605 2420142 2417015 2414723 2415306 2409636 2426538 + +2d dot product + op/s 98% 96% 75% med avg min max +dotvec2 267 3744010 3742209 3737007 3735923 3736518 3731135 3767934 +dotvec2_sse 73 13698430 13695892 13693351 13692214 13692714 13690210 13698510 +dotvec2_sse4 75 13283629 13280936 13278169 13277493 13277783 13275146 13286098 +dotvec2_avx 75 13283761 13279452 13278088 13277284 13277534 13275028 13287380 +dotvec2_sse_a 373 2881463 2669431 2667139 2665984 2679081 2627220 3743409 +dotvec2_sse4_a 393 2569252 2568403 2563474 2531893 2540101 2488852 3467011 +dotvec2_avx_a 342 2910797 2906958 2905773 2905043 2915722 2903536 3964388 + +3d dot product + op/s 98% 96% 75% med avg min max +dotvec3 206 5304768 5210421 4784381 4760076 4833173 4756544 5677615 +dotvec3_sse4 65 15355526 15353759 15352531 15351795 15351913 15349422 15357083 +dotvec3_avx 65 15354566 15354048 15352757 15351779 15351973 15349416 15357971 +dotvec3_sse4_a 337 2959474 2957670 2956259 2955527 2963219 2926466 3756403 + +3d cross product + op/s 98% 96% 75% med avg min max +crossvec3 91 10986157 10983141 10971494 10968571 10969827 10962082 10989235 +crossvec3_sse 143 7001828 7000719 6998587 6996428 6974455 6863747 7199104 + +Pt2 + op/s 98% 96% 75% med avg min max +Pt2 227 4415172 4408012 4398711 4397851 4399747 4381089 4559482 +Pt2b 277 3608261 3607181 3604815 3603066 3603403 3599702 3609269 + +multiply + add + op/s 98% 96% 75% med avg min max +madd 347 2878112 2877760 2876937 2876112 2876230 2874718 2883503 +fma_avx 341 2938089 2937966 2936871 2935967 2926577 2802398 2938710 + +2d point sum + op/s 98% 96% 75% med avg min max +addpt2 120 8314705 8311729 8310052 8308082 8308210 8285693 8353482 +addpt2_sse 89 11204824 11203825 11203201 11202634 11202605 11201048 11205531 +addpt2_avx 73 13696975 13696348 13695183 13694241 13694325 13691704 13697637 + +3d point sum + op/s 98% 96% 75% med avg min max +addpt3 94 10560908 10537671 10530696 10525254 10528137 10520659 10579957 +addpt3_avx 77 12873349 12868526 12867472 12866408 12868654 12863977 13081995 +``` |