This test was run at 17/10/2024 16:09:45
Results for a cyclic layout with add C
benchmark | layout | add C | M | N | K | plots | cycles | ideal | utilization |
---|---|---|---|---|---|---|---|---|---|
dense_gemm_cyclic_32x32x32 | cyclic | yes | 32 | 32 | 32 | yes | 109 | 80 | 0.7339449541284404 |
dense_gemm_cyclic_32x32x48 | cyclic | yes | 32 | 32 | 48 | yes | 154 | 112 | 0.7272727272727273 |
dense_gemm_cyclic_32x32x64 | cyclic | yes | 32 | 32 | 64 | no | 171 | 144 | 0.8421052631578947 |
dense_gemm_cyclic_32x48x32 | cyclic | yes | 32 | 48 | 32 | yes | 156 | 120 | 0.7692307692307693 |
dense_gemm_cyclic_32x48x48 | cyclic | yes | 32 | 48 | 48 | yes | 226 | 168 | 0.7433628318584071 |
dense_gemm_cyclic_32x48x64 | cyclic | yes | 32 | 48 | 64 | no | 251 | 216 | 0.8605577689243028 |
dense_gemm_cyclic_32x64x32 | cyclic | yes | 32 | 64 | 32 | no | 204 | 160 | 0.7843137254901961 |
dense_gemm_cyclic_32x64x48 | cyclic | yes | 32 | 64 | 48 | no | 297 | 224 | 0.7542087542087542 |
dense_gemm_cyclic_32x64x64 | cyclic | yes | 32 | 64 | 64 | no | 331 | 288 | 0.8700906344410876 |
dense_gemm_cyclic_48x32x32 | cyclic | yes | 48 | 32 | 32 | yes | 156 | 120 | 0.7692307692307693 |
dense_gemm_cyclic_48x32x48 | cyclic | yes | 48 | 32 | 48 | yes | 229 | 168 | 0.7336244541484717 |
dense_gemm_cyclic_48x32x64 | cyclic | yes | 48 | 32 | 64 | no | 251 | 216 | 0.8605577689243028 |
dense_gemm_cyclic_48x48x32 | cyclic | yes | 48 | 48 | 32 | yes | 229 | 180 | 0.7860262008733624 |
dense_gemm_cyclic_48x48x48 | cyclic | yes | 48 | 48 | 48 | yes | 337 | 252 | 0.7477744807121661 |
dense_gemm_cyclic_48x48x64 | cyclic | yes | 48 | 48 | 64 | no | 371 | 324 | 0.8733153638814016 |
dense_gemm_cyclic_48x64x32 | cyclic | yes | 48 | 64 | 32 | no | 301 | 240 | 0.7973421926910299 |
dense_gemm_cyclic_48x64x48 | cyclic | yes | 48 | 64 | 48 | no | 444 | 336 | 0.7567567567567568 |
dense_gemm_cyclic_48x64x64 | cyclic | yes | 48 | 64 | 64 | no | 491 | 432 | 0.879837067209776 |
dense_gemm_cyclic_64x32x32 | cyclic | yes | 64 | 32 | 32 | no | 204 | 160 | 0.7843137254901961 |
dense_gemm_cyclic_64x32x48 | cyclic | yes | 64 | 32 | 48 | no | 304 | 224 | 0.7368421052631579 |
dense_gemm_cyclic_64x32x64 | cyclic | yes | 64 | 32 | 64 | no | 331 | 288 | 0.8700906344410876 |
dense_gemm_cyclic_64x48x32 | cyclic | yes | 64 | 48 | 32 | no | 301 | 240 | 0.7973421926910299 |
dense_gemm_cyclic_64x48x48 | cyclic | yes | 64 | 48 | 48 | no | 448 | 336 | 0.75 |
dense_gemm_cyclic_64x48x64 | cyclic | yes | 64 | 48 | 64 | no | 491 | 432 | 0.879837067209776 |
dense_gemm_cyclic_64x64x32 | cyclic | yes | 64 | 64 | 32 | no | 396 | 320 | 0.8080808080808081 |
dense_gemm_cyclic_64x64x48 | cyclic | yes | 64 | 64 | 48 | no | 591 | 448 | 0.7580372250423012 |
dense_gemm_cyclic_64x64x64 | cyclic | yes | 64 | 64 | 64 | no | 651 | 576 | 0.8847926267281107 |
dense_gemm_cyclic_16x32x512 | cyclic | yes | 16 | 32 | 512 | no | 539 | 520 | 0.9647495361781077 |
dense_gemm_cyclic_448x32x32 | cyclic | yes | 448 | 32 | 32 | no | 1356 | 1120 | 0.8259587020648967 |
dense_gemm_cyclic_8x192x32 | cyclic | yes | 8 | 192 | 32 | no | 156 | 120 | 0.7692307692307693 |
dense_gemm_cyclic_8x16x16 | cyclic | yes | 8 | 16 | 16 | yes | 19 | 6 | 0.3157894736842105 |
dense_gemm_cyclic_224x16x192 | cyclic | yes | 224 | 16 | 192 | no | 1467 | 1400 | 0.9543285616905249 |
dense_gemm_cyclic_8x96x16 | cyclic | yes | 8 | 96 | 16 | no | 60 | 36 | 0.6 |
dense_gemm_cyclic_64x24x96 | cyclic | yes | 64 | 24 | 96 | no | 347 | 312 | 0.899135446685879 |
dense_gemm_cyclic_8x48x24 | cyclic | yes | 8 | 48 | 24 | yes | 43 | 24 | 0.5581395348837209 |
dense_gemm_cyclic_56x48x16 | cyclic | yes | 56 | 48 | 16 | no | 180 | 126 | 0.7 |
dense_gemm_cyclic_8x32x144 | cyclic | yes | 8 | 32 | 144 | no | 94 | 76 | 0.8085106382978723 |
dense_gemm_cyclic_56x32x32 | cyclic | yes | 56 | 32 | 32 | no | 181 | 140 | 0.7734806629834254 |
dense_gemm_cyclic_200x48x16 | cyclic | yes | 200 | 48 | 16 | no | 612 | 450 | 0.7352941176470589 |
dense_gemm_cyclic_200x32x64 | cyclic | yes | 200 | 32 | 64 | no | 1011 | 900 | 0.8902077151335311 |
dense_gemm_cyclic_200x96x16 | cyclic | yes | 200 | 96 | 16 | no | 1212 | 900 | 0.7425742574257426 |
dense_gemm_cyclic_200x8x384 | cyclic | yes | 200 | 8 | 384 | no | 1261 | 1225 | 0.9714512291831879 |
dense_gemm_cyclic_200x8x96 | cyclic | yes | 200 | 8 | 96 | no | 361 | 325 | 0.9002770083102493 |
dense_gemm_cyclic_56x576x16 | cyclic | yes | 56 | 576 | 16 | no | 2028 | 1512 | 0.7455621301775148 |
dense_gemm_cyclic_8x160x576 | cyclic | yes | 8 | 160 | 576 | no | 1491 | 1460 | 0.9792085848423877 |
dense_gemm_cyclic_56x48x160 | cyclic | yes | 56 | 48 | 160 | no | 935 | 882 | 0.9433155080213904 |
dense_gemm_cyclic_8x960x16 | cyclic | yes | 8 | 960 | 16 | no | 492 | 360 | 0.7317073170731707 |
dense_gemm_cyclic_56x64x960 | cyclic | yes | 56 | 64 | 960 | no | 6843 | 6776 | 0.9902089726728043 |
dense_gemm_cyclic_56x64x320 | cyclic | yes | 56 | 64 | 320 | no | 2363 | 2296 | 0.9716462124418113 |
dense_gemm_cyclic_8x40x1280 | cyclic | yes | 8 | 40 | 1280 | no | 821 | 805 | 0.9805115712545676 |
dense_gemm_cyclic_8x32x152 | cyclic | yes | 8 | 32 | 152 | no | 95 | 80 | 0.8421052631578947 |
dense_gemm_cyclic_8x64x576 | cyclic | yes | 8 | 64 | 576 | no | 603 | 584 | 0.9684908789386402 |
dense_gemm_cyclic_8x128x576 | cyclic | yes | 8 | 128 | 576 | no | 1195 | 1168 | 0.9774058577405857 |
dense_gemm_cyclic_112x128x128 | cyclic | yes | 112 | 128 | 128 | no | 4043 | 3808 | 0.9418748454118229 |
dense_gemm_cyclic_56x32x64 | cyclic | yes | 56 | 32 | 64 | no | 291 | 252 | 0.865979381443299 |
dense_gemm_cyclic_40x64x1152 | cyclic | yes | 40 | 64 | 1152 | no | 5851 | 5800 | 0.9912835412749957 |
dense_gemm_cyclic_200x64x192 | cyclic | yes | 200 | 64 | 192 | no | 5211 | 5000 | 0.9595087315294569 |
dense_gemm_cyclic_200x32x128 | cyclic | yes | 200 | 32 | 128 | no | 1811 | 1700 | 0.9387078961899503 |
dense_gemm_cyclic_56x8x576 | cyclic | yes | 56 | 8 | 576 | no | 529 | 511 | 0.9659735349716446 |
dense_gemm_cyclic_56x8x512 | cyclic | yes | 56 | 8 | 512 | no | 473 | 455 | 0.9619450317124736 |
dense_gemm_cyclic_56x128x256 | cyclic | yes | 56 | 128 | 256 | no | 3819 | 3696 | 0.9677926158680282 |
dense_gemm_cyclic_8x200x512 | cyclic | yes | 8 | 200 | 512 | no | 1661 | 1625 | 0.9783263094521373 |
dense_gemm_cyclic_40x96x768 | cyclic | yes | 40 | 96 | 768 | no | 5891 | 5820 | 0.9879477168562214 |
dense_gemm_cyclic_40x200x64 | cyclic | yes | 40 | 200 | 64 | no | 1261 | 1125 | 0.8921490880253767 |
dense_gemm_cyclic_200x64x200 | cyclic | yes | 200 | 64 | 200 | no | 5750 | 5200 | 0.9043478260869565 |
dense_gemm_cyclic_40x8x768 | cyclic | yes | 40 | 8 | 768 | no | 501 | 485 | 0.9680638722554891 |
dense_gemm_cyclic_8x128x192 | cyclic | yes | 8 | 128 | 192 | no | 427 | 400 | 0.936768149882904 |
dense_gemm_cyclic_8x40x768 | cyclic | yes | 8 | 40 | 768 | no | 501 | 485 | 0.9680638722554891 |
dense_gemm_cyclic_32x64x768 | cyclic | yes | 32 | 64 | 768 | no | 3147 | 3104 | 0.9863361931998729 |
dense_gemm_cyclic_8x512x64 | cyclic | yes | 8 | 512 | 64 | no | 651 | 576 | 0.8847926267281107 |
dense_gemm_cyclic_32x64x512 | cyclic | yes | 32 | 64 | 512 | no | 2123 | 2080 | 0.9797456429580782 |
dense_gemm_cyclic_128x8x768 | cyclic | yes | 128 | 8 | 768 | no | 1579 | 1552 | 0.9829005699810006 |
dense_gemm_cyclic_128x8x792 | cyclic | yes | 128 | 8 | 792 | no | 1642 | 1600 | 0.97442143727162 |
dense_gemm_cyclic_128x88x192 | cyclic | yes | 128 | 88 | 192 | no | 4587 | 4400 | 0.9592326139088729 |
average | 0.8526262204739302 |
Results for a cyclic layout
benchmark | layout | add C | M | N | K | plots | cycles | ideal | utilization |
---|---|---|---|---|---|---|---|---|---|
dense_matmul_cyclic_32x32x32 | cyclic | no | 32 | 32 | 32 | yes | 91 | 80 | 0.8791208791208791 |
dense_matmul_cyclic_32x32x48 | cyclic | no | 32 | 32 | 48 | yes | 134 | 112 | 0.835820895522388 |
dense_matmul_cyclic_32x32x64 | cyclic | no | 32 | 32 | 64 | no | 155 | 144 | 0.9290322580645162 |
dense_matmul_cyclic_32x48x32 | cyclic | no | 32 | 48 | 32 | yes | 131 | 120 | 0.916030534351145 |
dense_matmul_cyclic_32x48x48 | cyclic | no | 32 | 48 | 48 | yes | 198 | 168 | 0.8484848484848485 |
dense_matmul_cyclic_32x48x64 | cyclic | no | 32 | 48 | 64 | no | 227 | 216 | 0.9515418502202643 |
dense_matmul_cyclic_32x64x32 | cyclic | no | 32 | 64 | 32 | no | 171 | 160 | 0.935672514619883 |
dense_matmul_cyclic_32x64x48 | cyclic | no | 32 | 64 | 48 | no | 262 | 224 | 0.8549618320610687 |
dense_matmul_cyclic_32x64x64 | cyclic | no | 32 | 64 | 64 | no | 299 | 288 | 0.9632107023411371 |
dense_matmul_cyclic_48x32x32 | cyclic | no | 48 | 32 | 32 | yes | 131 | 120 | 0.916030534351145 |
dense_matmul_cyclic_48x32x48 | cyclic | no | 48 | 32 | 48 | yes | 196 | 168 | 0.8571428571428571 |
dense_matmul_cyclic_48x32x64 | cyclic | no | 48 | 32 | 64 | no | 227 | 216 | 0.9515418502202643 |
dense_matmul_cyclic_48x48x32 | cyclic | no | 48 | 48 | 32 | yes | 191 | 180 | 0.9424083769633508 |
dense_matmul_cyclic_48x48x48 | cyclic | no | 48 | 48 | 48 | yes | 292 | 252 | 0.863013698630137 |
dense_matmul_cyclic_48x48x64 | cyclic | no | 48 | 48 | 64 | no | 335 | 324 | 0.9671641791044776 |
dense_matmul_cyclic_48x64x32 | cyclic | no | 48 | 64 | 32 | no | 251 | 240 | 0.9561752988047809 |
dense_matmul_cyclic_48x64x48 | cyclic | no | 48 | 64 | 48 | no | 388 | 336 | 0.865979381443299 |
dense_matmul_cyclic_48x64x64 | cyclic | no | 48 | 64 | 64 | no | 443 | 432 | 0.9751693002257337 |
dense_matmul_cyclic_64x32x32 | cyclic | no | 64 | 32 | 32 | no | 171 | 160 | 0.935672514619883 |
dense_matmul_cyclic_64x32x48 | cyclic | no | 64 | 32 | 48 | no | 258 | 224 | 0.8682170542635659 |
dense_matmul_cyclic_64x32x64 | cyclic | no | 64 | 32 | 64 | no | 299 | 288 | 0.9632107023411371 |
dense_matmul_cyclic_64x48x32 | cyclic | no | 64 | 48 | 32 | no | 251 | 240 | 0.9561752988047809 |
dense_matmul_cyclic_64x48x48 | cyclic | no | 64 | 48 | 48 | no | 386 | 336 | 0.8704663212435233 |
dense_matmul_cyclic_64x48x64 | cyclic | no | 64 | 48 | 64 | no | 443 | 432 | 0.9751693002257337 |
dense_matmul_cyclic_64x64x32 | cyclic | no | 64 | 64 | 32 | no | 331 | 320 | 0.9667673716012085 |
dense_matmul_cyclic_64x64x48 | cyclic | no | 64 | 64 | 48 | no | 514 | 448 | 0.8715953307392996 |
dense_matmul_cyclic_64x64x64 | cyclic | no | 64 | 64 | 64 | no | 587 | 576 | 0.9812606473594548 |
dense_matmul_cyclic_16x32x512 | cyclic | no | 16 | 32 | 512 | no | 531 | 520 | 0.9792843691148776 |
dense_matmul_cyclic_448x32x32 | cyclic | no | 448 | 32 | 32 | no | 1131 | 1120 | 0.9902740937223696 |
dense_matmul_cyclic_8x192x32 | cyclic | no | 8 | 192 | 32 | no | 131 | 120 | 0.916030534351145 |
dense_matmul_cyclic_8x16x16 | cyclic | no | 8 | 16 | 16 | yes | 16 | 6 | 0.375 |
dense_matmul_cyclic_224x16x192 | cyclic | no | 224 | 16 | 192 | no | 1411 | 1400 | 0.9922041105598866 |
dense_matmul_cyclic_8x96x16 | cyclic | no | 8 | 96 | 16 | no | 46 | 36 | 0.782608695652174 |
dense_matmul_cyclic_64x24x96 | cyclic | no | 64 | 24 | 96 | no | 323 | 312 | 0.9659442724458205 |
dense_matmul_cyclic_8x48x24 | cyclic | no | 8 | 48 | 24 | yes | 37 | 24 | 0.6486486486486487 |
dense_matmul_cyclic_56x48x16 | cyclic | no | 56 | 48 | 16 | no | 136 | 126 | 0.9264705882352942 |
dense_matmul_cyclic_8x32x144 | cyclic | no | 8 | 32 | 144 | no | 88 | 76 | 0.8636363636363636 |
dense_matmul_cyclic_56x32x32 | cyclic | no | 56 | 32 | 32 | no | 151 | 140 | 0.9271523178807947 |
dense_matmul_cyclic_200x48x16 | cyclic | no | 200 | 48 | 16 | no | 460 | 450 | 0.9782608695652174 |
dense_matmul_cyclic_200x32x64 | cyclic | no | 200 | 32 | 64 | no | 911 | 900 | 0.9879253567508233 |
dense_matmul_cyclic_200x96x16 | cyclic | no | 200 | 96 | 16 | no | 910 | 900 | 0.989010989010989 |
dense_matmul_cyclic_200x8x384 | cyclic | no | 200 | 8 | 384 | no | 1236 | 1225 | 0.9911003236245954 |
dense_matmul_cyclic_200x8x96 | cyclic | no | 200 | 8 | 96 | no | 336 | 325 | 0.9672619047619048 |
dense_matmul_cyclic_56x576x16 | cyclic | no | 56 | 576 | 16 | no | 1522 | 1512 | 0.9934296977660972 |
dense_matmul_cyclic_8x160x576 | cyclic | no | 8 | 160 | 576 | no | 1471 | 1460 | 0.9925220938137321 |
dense_matmul_cyclic_56x48x160 | cyclic | no | 56 | 48 | 160 | no | 893 | 882 | 0.9876819708846585 |
dense_matmul_cyclic_8x960x16 | cyclic | no | 8 | 960 | 16 | no | 370 | 360 | 0.972972972972973 |
dense_matmul_cyclic_56x64x960 | cyclic | no | 56 | 64 | 960 | no | 6787 | 6776 | 0.9983792544570502 |
dense_matmul_cyclic_56x64x320 | cyclic | no | 56 | 64 | 320 | no | 2307 | 2296 | 0.9952319029042046 |
dense_matmul_cyclic_8x40x1280 | cyclic | no | 8 | 40 | 1280 | no | 816 | 805 | 0.9865196078431373 |
dense_matmul_cyclic_8x32x152 | cyclic | no | 8 | 32 | 152 | no | 92 | 80 | 0.8695652173913043 |
dense_matmul_cyclic_8x64x576 | cyclic | no | 8 | 64 | 576 | no | 595 | 584 | 0.9815126050420168 |
dense_matmul_cyclic_8x128x576 | cyclic | no | 8 | 128 | 576 | no | 1179 | 1168 | 0.9906700593723494 |
dense_matmul_cyclic_112x128x128 | cyclic | no | 112 | 128 | 128 | no | 3819 | 3808 | 0.9971196648337262 |
dense_matmul_cyclic_56x32x64 | cyclic | no | 56 | 32 | 64 | no | 263 | 252 | 0.9581749049429658 |
dense_matmul_cyclic_40x64x1152 | cyclic | no | 40 | 64 | 1152 | no | 5811 | 5800 | 0.9981070383754947 |
dense_matmul_cyclic_200x64x192 | cyclic | no | 200 | 64 | 192 | no | 5011 | 5000 | 0.9978048293753742 |
dense_matmul_cyclic_200x32x128 | cyclic | no | 200 | 32 | 128 | no | 1711 | 1700 | 0.9935710111046172 |
dense_matmul_cyclic_56x8x576 | cyclic | no | 56 | 8 | 576 | no | 522 | 511 | 0.9789272030651341 |
dense_matmul_cyclic_56x8x512 | cyclic | no | 56 | 8 | 512 | no | 466 | 455 | 0.9763948497854077 |
dense_matmul_cyclic_56x128x256 | cyclic | no | 56 | 128 | 256 | no | 3707 | 3696 | 0.9970326409495549 |
dense_matmul_cyclic_8x200x512 | cyclic | no | 8 | 200 | 512 | no | 1636 | 1625 | 0.9932762836185819 |
dense_matmul_cyclic_40x96x768 | cyclic | no | 40 | 96 | 768 | no | 5831 | 5820 | 0.9981135311267364 |
dense_matmul_cyclic_40x200x64 | cyclic | no | 40 | 200 | 64 | no | 1136 | 1125 | 0.9903169014084507 |
dense_matmul_cyclic_200x64x200 | cyclic | no | 200 | 64 | 200 | no | 5396 | 5200 | 0.9636767976278725 |
dense_matmul_cyclic_40x8x768 | cyclic | no | 40 | 8 | 768 | no | 496 | 485 | 0.9778225806451613 |
dense_matmul_cyclic_8x128x192 | cyclic | no | 8 | 128 | 192 | no | 411 | 400 | 0.9732360097323601 |
dense_matmul_cyclic_8x40x768 | cyclic | no | 8 | 40 | 768 | no | 496 | 485 | 0.9778225806451613 |
dense_matmul_cyclic_32x64x768 | cyclic | no | 32 | 64 | 768 | no | 3115 | 3104 | 0.9964686998394864 |
dense_matmul_cyclic_8x512x64 | cyclic | no | 8 | 512 | 64 | no | 587 | 576 | 0.9812606473594548 |
dense_matmul_cyclic_32x64x512 | cyclic | no | 32 | 64 | 512 | no | 2091 | 2080 | 0.9947393591582975 |
dense_matmul_cyclic_128x8x768 | cyclic | no | 128 | 8 | 768 | no | 1563 | 1552 | 0.9929622520793346 |
dense_matmul_cyclic_128x8x792 | cyclic | no | 128 | 8 | 792 | no | 1624 | 1600 | 0.9852216748768473 |
dense_matmul_cyclic_128x88x192 | cyclic | no | 128 | 88 | 192 | no | 4411 | 4400 | 0.9975062344139651 |
average | 0.9400796060573402 |
Results for a banked layout with add C
benchmark | layout | add C | M | N | K | plots | cycles | ideal | utilization |
---|---|---|---|---|---|---|---|---|---|
dense_gemm_banked_32x32x32 | banked | yes | 32 | 32 | 32 | yes | 110 | 80 | 0.7272727272727273 |
dense_gemm_banked_32x32x48 | banked | yes | 32 | 32 | 48 | yes | 138 | 112 | 0.8115942028985508 |
dense_gemm_banked_32x32x64 | banked | yes | 32 | 32 | 64 | no | 170 | 144 | 0.8470588235294118 |
dense_gemm_banked_32x48x32 | banked | yes | 32 | 48 | 32 | yes | 158 | 120 | 0.759493670886076 |
dense_gemm_banked_32x48x48 | banked | yes | 32 | 48 | 48 | yes | 202 | 168 | 0.8316831683168316 |
dense_gemm_banked_32x48x64 | banked | yes | 32 | 48 | 64 | no | 250 | 216 | 0.864 |
dense_gemm_banked_32x64x32 | banked | yes | 32 | 64 | 32 | no | 206 | 160 | 0.7766990291262136 |
dense_gemm_banked_32x64x48 | banked | yes | 32 | 64 | 48 | no | 266 | 224 | 0.8421052631578947 |
dense_gemm_banked_32x64x64 | banked | yes | 32 | 64 | 64 | no | 330 | 288 | 0.8727272727272727 |
dense_gemm_banked_48x32x32 | banked | yes | 48 | 32 | 32 | yes | 158 | 120 | 0.759493670886076 |
dense_gemm_banked_48x32x48 | banked | yes | 48 | 32 | 48 | yes | 202 | 168 | 0.8316831683168316 |
dense_gemm_banked_48x32x64 | banked | yes | 48 | 32 | 64 | no | 250 | 216 | 0.864 |
dense_gemm_banked_48x48x32 | banked | yes | 48 | 48 | 32 | yes | 230 | 180 | 0.782608695652174 |
dense_gemm_banked_48x48x48 | banked | yes | 48 | 48 | 48 | yes | 298 | 252 | 0.8456375838926175 |
dense_gemm_banked_48x48x64 | banked | yes | 48 | 48 | 64 | no | 370 | 324 | 0.8756756756756757 |
dense_gemm_banked_48x64x32 | banked | yes | 48 | 64 | 32 | no | 302 | 240 | 0.7947019867549668 |
dense_gemm_banked_48x64x48 | banked | yes | 48 | 64 | 48 | no | 394 | 336 | 0.8527918781725888 |
dense_gemm_banked_48x64x64 | banked | yes | 48 | 64 | 64 | no | 490 | 432 | 0.8816326530612245 |
dense_gemm_banked_64x32x32 | banked | yes | 64 | 32 | 32 | no | 206 | 160 | 0.7766990291262136 |
dense_gemm_banked_64x32x48 | banked | yes | 64 | 32 | 48 | no | 266 | 224 | 0.8421052631578947 |
dense_gemm_banked_64x32x64 | banked | yes | 64 | 32 | 64 | no | 330 | 288 | 0.8727272727272727 |
dense_gemm_banked_64x48x32 | banked | yes | 64 | 48 | 32 | no | 302 | 240 | 0.7947019867549668 |
dense_gemm_banked_64x48x48 | banked | yes | 64 | 48 | 48 | no | 394 | 336 | 0.8527918781725888 |
dense_gemm_banked_64x48x64 | banked | yes | 64 | 48 | 64 | no | 490 | 432 | 0.8816326530612245 |
dense_gemm_banked_64x64x32 | banked | yes | 64 | 64 | 32 | no | 398 | 320 | 0.8040201005025126 |
dense_gemm_banked_64x64x48 | banked | yes | 64 | 64 | 48 | no | 522 | 448 | 0.8582375478927203 |
dense_gemm_banked_64x64x64 | banked | yes | 64 | 64 | 64 | no | 650 | 576 | 0.8861538461538462 |
dense_gemm_banked_16x32x512 | banked | yes | 16 | 32 | 512 | no | 538 | 520 | 0.966542750929368 |
dense_gemm_banked_448x32x32 | banked | yes | 448 | 32 | 32 | no | 1358 | 1120 | 0.8247422680412371 |
dense_gemm_banked_8x192x32 | banked | yes | 8 | 192 | 32 | no | 158 | 120 | 0.759493670886076 |
dense_gemm_banked_8x16x16 | banked | yes | 8 | 16 | 16 | yes | 18 | 6 | 0.3333333333333333 |
dense_gemm_banked_224x16x192 | banked | yes | 224 | 16 | 192 | no | 1466 | 1400 | 0.9549795361527967 |
dense_gemm_banked_8x96x16 | banked | yes | 8 | 96 | 16 | no | 62 | 36 | 0.5806451612903226 |
dense_gemm_banked_64x24x96 | banked | yes | 64 | 24 | 96 | no | 346 | 312 | 0.9017341040462428 |
dense_gemm_banked_8x48x24 | banked | yes | 8 | 48 | 24 | yes | 42 | 24 | 0.5714285714285714 |
dense_gemm_banked_56x48x16 | banked | yes | 56 | 48 | 16 | no | 212 | 126 | 0.5943396226415094 |
dense_gemm_banked_8x32x144 | banked | yes | 8 | 32 | 144 | no | 90 | 76 | 0.8444444444444444 |
dense_gemm_banked_56x32x32 | banked | yes | 56 | 32 | 32 | no | 182 | 140 | 0.7692307692307693 |
dense_gemm_banked_200x48x16 | banked | yes | 200 | 48 | 16 | no | 752 | 450 | 0.598404255319149 |
dense_gemm_banked_200x32x64 | banked | yes | 200 | 32 | 64 | no | 1010 | 900 | 0.8910891089108911 |
dense_gemm_banked_200x96x16 | banked | yes | 200 | 96 | 16 | no | 1502 | 900 | 0.5992010652463382 |
dense_gemm_banked_200x8x384 | banked | yes | 200 | 8 | 384 | no | 1260 | 1225 | 0.9722222222222222 |
dense_gemm_banked_200x8x96 | banked | yes | 200 | 8 | 96 | no | 360 | 325 | 0.9027777777777778 |
dense_gemm_banked_56x576x16 | banked | yes | 56 | 576 | 16 | no | 2522 | 1512 | 0.5995241871530531 |
dense_gemm_banked_8x160x576 | banked | yes | 8 | 160 | 576 | no | 1490 | 1460 | 0.9798657718120806 |
dense_gemm_banked_56x48x160 | banked | yes | 56 | 48 | 160 | no | 934 | 882 | 0.9443254817987152 |
dense_gemm_banked_8x960x16 | banked | yes | 8 | 960 | 16 | no | 602 | 360 | 0.5980066445182725 |
dense_gemm_banked_56x64x960 | banked | yes | 56 | 64 | 960 | no | 6842 | 6776 | 0.9903536977491961 |
dense_gemm_banked_56x64x320 | banked | yes | 56 | 64 | 320 | no | 2362 | 2296 | 0.9720575783234547 |
dense_gemm_banked_8x40x1280 | banked | yes | 8 | 40 | 1280 | no | 820 | 805 | 0.9817073170731707 |
dense_gemm_banked_8x32x152 | banked | yes | 8 | 32 | 152 | no | 94 | 80 | 0.851063829787234 |
dense_gemm_banked_8x64x576 | banked | yes | 8 | 64 | 576 | no | 602 | 584 | 0.9700996677740864 |
dense_gemm_banked_8x128x576 | banked | yes | 8 | 128 | 576 | no | 1194 | 1168 | 0.9782244556113903 |
dense_gemm_banked_112x128x128 | banked | yes | 112 | 128 | 128 | no | 4042 | 3808 | 0.9421078673923801 |
dense_gemm_banked_56x32x64 | banked | yes | 56 | 32 | 64 | no | 290 | 252 | 0.8689655172413793 |
dense_gemm_banked_40x64x1152 | banked | yes | 40 | 64 | 1152 | no | 5850 | 5800 | 0.9914529914529915 |
dense_gemm_banked_200x64x192 | banked | yes | 200 | 64 | 192 | no | 5210 | 5000 | 0.9596928982725528 |
dense_gemm_banked_200x32x128 | banked | yes | 200 | 32 | 128 | no | 1810 | 1700 | 0.9392265193370166 |
dense_gemm_banked_56x8x576 | banked | yes | 56 | 8 | 576 | no | 528 | 511 | 0.9678030303030303 |
dense_gemm_banked_56x8x512 | banked | yes | 56 | 8 | 512 | no | 472 | 455 | 0.9639830508474576 |
dense_gemm_banked_56x128x256 | banked | yes | 56 | 128 | 256 | no | 3818 | 3696 | 0.9680460974332111 |
dense_gemm_banked_8x200x512 | banked | yes | 8 | 200 | 512 | no | 1660 | 1625 | 0.9789156626506024 |
dense_gemm_banked_40x96x768 | banked | yes | 40 | 96 | 768 | no | 5890 | 5820 | 0.9881154499151104 |
dense_gemm_banked_40x200x64 | banked | yes | 40 | 200 | 64 | no | 1260 | 1125 | 0.8928571428571429 |
dense_gemm_banked_200x64x200 | banked | yes | 200 | 64 | 200 | no | 5410 | 5200 | 0.9611829944547134 |
dense_gemm_banked_40x8x768 | banked | yes | 40 | 8 | 768 | no | 500 | 485 | 0.97 |
dense_gemm_banked_8x128x192 | banked | yes | 8 | 128 | 192 | no | 426 | 400 | 0.9389671361502347 |
dense_gemm_banked_8x40x768 | banked | yes | 8 | 40 | 768 | no | 500 | 485 | 0.97 |
dense_gemm_banked_32x64x768 | banked | yes | 32 | 64 | 768 | no | 3146 | 3104 | 0.9866497139224412 |
dense_gemm_banked_8x512x64 | banked | yes | 8 | 512 | 64 | no | 650 | 576 | 0.8861538461538462 |
dense_gemm_banked_32x64x512 | banked | yes | 32 | 64 | 512 | no | 2122 | 2080 | 0.9802073515551367 |
dense_gemm_banked_128x8x768 | banked | yes | 128 | 8 | 768 | no | 1578 | 1552 | 0.9835234474017744 |
dense_gemm_banked_128x8x792 | banked | yes | 128 | 8 | 792 | no | 1626 | 1600 | 0.984009840098401 |
dense_gemm_banked_128x88x192 | banked | yes | 128 | 88 | 192 | no | 4586 | 4400 | 0.9594417793283908 |
average | 0.8567711983533229 |
Results for a banked layout
benchmark | layout | add C | M | N | K | plots | cycles | ideal | utilization |
---|---|---|---|---|---|---|---|---|---|
dense_matmul_banked_32x32x32 | banked | no | 32 | 32 | 32 | yes | 90 | 80 | 0.8888888888888888 |
dense_matmul_banked_32x32x48 | banked | no | 32 | 32 | 48 | yes | 122 | 112 | 0.9180327868852459 |
dense_matmul_banked_32x32x64 | banked | no | 32 | 32 | 64 | no | 154 | 144 | 0.935064935064935 |
dense_matmul_banked_32x48x32 | banked | no | 32 | 48 | 32 | yes | 130 | 120 | 0.9230769230769231 |
dense_matmul_banked_32x48x48 | banked | no | 32 | 48 | 48 | yes | 178 | 168 | 0.9438202247191011 |
dense_matmul_banked_32x48x64 | banked | no | 32 | 48 | 64 | no | 226 | 216 | 0.9557522123893806 |
dense_matmul_banked_32x64x32 | banked | no | 32 | 64 | 32 | no | 170 | 160 | 0.9411764705882353 |
dense_matmul_banked_32x64x48 | banked | no | 32 | 64 | 48 | no | 234 | 224 | 0.9572649572649573 |
dense_matmul_banked_32x64x64 | banked | no | 32 | 64 | 64 | no | 298 | 288 | 0.9664429530201343 |
dense_matmul_banked_48x32x32 | banked | no | 48 | 32 | 32 | yes | 130 | 120 | 0.9230769230769231 |
dense_matmul_banked_48x32x48 | banked | no | 48 | 32 | 48 | yes | 178 | 168 | 0.9438202247191011 |
dense_matmul_banked_48x32x64 | banked | no | 48 | 32 | 64 | no | 226 | 216 | 0.9557522123893806 |
dense_matmul_banked_48x48x32 | banked | no | 48 | 48 | 32 | yes | 190 | 180 | 0.9473684210526315 |
dense_matmul_banked_48x48x48 | banked | no | 48 | 48 | 48 | yes | 262 | 252 | 0.9618320610687023 |
dense_matmul_banked_48x48x64 | banked | no | 48 | 48 | 64 | no | 334 | 324 | 0.9700598802395209 |
dense_matmul_banked_48x64x32 | banked | no | 48 | 64 | 32 | no | 250 | 240 | 0.96 |
dense_matmul_banked_48x64x48 | banked | no | 48 | 64 | 48 | no | 346 | 336 | 0.9710982658959537 |
dense_matmul_banked_48x64x64 | banked | no | 48 | 64 | 64 | no | 442 | 432 | 0.9773755656108597 |
dense_matmul_banked_64x32x32 | banked | no | 64 | 32 | 32 | no | 170 | 160 | 0.9411764705882353 |
dense_matmul_banked_64x32x48 | banked | no | 64 | 32 | 48 | no | 234 | 224 | 0.9572649572649573 |
dense_matmul_banked_64x32x64 | banked | no | 64 | 32 | 64 | no | 298 | 288 | 0.9664429530201343 |
dense_matmul_banked_64x48x32 | banked | no | 64 | 48 | 32 | no | 250 | 240 | 0.96 |
dense_matmul_banked_64x48x48 | banked | no | 64 | 48 | 48 | no | 346 | 336 | 0.9710982658959537 |
dense_matmul_banked_64x48x64 | banked | no | 64 | 48 | 64 | no | 442 | 432 | 0.9773755656108597 |
dense_matmul_banked_64x64x32 | banked | no | 64 | 64 | 32 | no | 330 | 320 | 0.9696969696969697 |
dense_matmul_banked_64x64x48 | banked | no | 64 | 64 | 48 | no | 458 | 448 | 0.9781659388646288 |
dense_matmul_banked_64x64x64 | banked | no | 64 | 64 | 64 | no | 586 | 576 | 0.9829351535836177 |
dense_matmul_banked_16x32x512 | banked | no | 16 | 32 | 512 | no | 530 | 520 | 0.9811320754716981 |
dense_matmul_banked_448x32x32 | banked | no | 448 | 32 | 32 | no | 1130 | 1120 | 0.9911504424778761 |
dense_matmul_banked_8x192x32 | banked | no | 8 | 192 | 32 | no | 130 | 120 | 0.9230769230769231 |
dense_matmul_banked_8x16x16 | banked | no | 8 | 16 | 16 | yes | 16 | 6 | 0.375 |
dense_matmul_banked_224x16x192 | banked | no | 224 | 16 | 192 | no | 1410 | 1400 | 0.9929078014184397 |
dense_matmul_banked_8x96x16 | banked | no | 8 | 96 | 16 | no | 46 | 36 | 0.782608695652174 |
dense_matmul_banked_64x24x96 | banked | no | 64 | 24 | 96 | no | 322 | 312 | 0.968944099378882 |
dense_matmul_banked_8x48x24 | banked | no | 8 | 48 | 24 | yes | 34 | 24 | 0.7058823529411765 |
dense_matmul_banked_56x48x16 | banked | no | 56 | 48 | 16 | no | 136 | 126 | 0.9264705882352942 |
dense_matmul_banked_8x32x144 | banked | no | 8 | 32 | 144 | no | 86 | 76 | 0.8837209302325582 |
dense_matmul_banked_56x32x32 | banked | no | 56 | 32 | 32 | no | 150 | 140 | 0.9333333333333333 |
dense_matmul_banked_200x48x16 | banked | no | 200 | 48 | 16 | no | 460 | 450 | 0.9782608695652174 |
dense_matmul_banked_200x32x64 | banked | no | 200 | 32 | 64 | no | 910 | 900 | 0.989010989010989 |
dense_matmul_banked_200x96x16 | banked | no | 200 | 96 | 16 | no | 910 | 900 | 0.989010989010989 |
dense_matmul_banked_200x8x384 | banked | no | 200 | 8 | 384 | no | 1235 | 1225 | 0.9919028340080972 |
dense_matmul_banked_200x8x96 | banked | no | 200 | 8 | 96 | no | 335 | 325 | 0.9701492537313433 |
dense_matmul_banked_56x576x16 | banked | no | 56 | 576 | 16 | no | 1522 | 1512 | 0.9934296977660972 |
dense_matmul_banked_8x160x576 | banked | no | 8 | 160 | 576 | no | 1470 | 1460 | 0.9931972789115646 |
dense_matmul_banked_56x48x160 | banked | no | 56 | 48 | 160 | no | 892 | 882 | 0.9887892376681614 |
dense_matmul_banked_8x960x16 | banked | no | 8 | 960 | 16 | no | 370 | 360 | 0.972972972972973 |
dense_matmul_banked_56x64x960 | banked | no | 56 | 64 | 960 | no | 6786 | 6776 | 0.9985263778367227 |
dense_matmul_banked_56x64x320 | banked | no | 56 | 64 | 320 | no | 2306 | 2296 | 0.9956634865568084 |
dense_matmul_banked_8x40x1280 | banked | no | 8 | 40 | 1280 | no | 815 | 805 | 0.9877300613496932 |
dense_matmul_banked_8x32x152 | banked | no | 8 | 32 | 152 | no | 90 | 80 | 0.8888888888888888 |
dense_matmul_banked_8x64x576 | banked | no | 8 | 64 | 576 | no | 594 | 584 | 0.9831649831649831 |
dense_matmul_banked_8x128x576 | banked | no | 8 | 128 | 576 | no | 1178 | 1168 | 0.9915110356536503 |
dense_matmul_banked_112x128x128 | banked | no | 112 | 128 | 128 | no | 3818 | 3808 | 0.9973808276584599 |
dense_matmul_banked_56x32x64 | banked | no | 56 | 32 | 64 | no | 262 | 252 | 0.9618320610687023 |
dense_matmul_banked_40x64x1152 | banked | no | 40 | 64 | 1152 | no | 5810 | 5800 | 0.9982788296041308 |
dense_matmul_banked_200x64x192 | banked | no | 200 | 64 | 192 | no | 5010 | 5000 | 0.998003992015968 |
dense_matmul_banked_200x32x128 | banked | no | 200 | 32 | 128 | no | 1710 | 1700 | 0.9941520467836257 |
dense_matmul_banked_56x8x576 | banked | no | 56 | 8 | 576 | no | 521 | 511 | 0.980806142034549 |
dense_matmul_banked_56x8x512 | banked | no | 56 | 8 | 512 | no | 465 | 455 | 0.978494623655914 |
dense_matmul_banked_56x128x256 | banked | no | 56 | 128 | 256 | no | 3706 | 3696 | 0.9973016729627631 |
dense_matmul_banked_8x200x512 | banked | no | 8 | 200 | 512 | no | 1635 | 1625 | 0.9938837920489296 |
dense_matmul_banked_40x96x768 | banked | no | 40 | 96 | 768 | no | 5830 | 5820 | 0.9982847341337907 |
dense_matmul_banked_40x200x64 | banked | no | 40 | 200 | 64 | no | 1135 | 1125 | 0.9911894273127754 |
dense_matmul_banked_200x64x200 | banked | no | 200 | 64 | 200 | no | 5210 | 5200 | 0.9980806142034548 |
dense_matmul_banked_40x8x768 | banked | no | 40 | 8 | 768 | no | 495 | 485 | 0.9797979797979798 |
dense_matmul_banked_8x128x192 | banked | no | 8 | 128 | 192 | no | 410 | 400 | 0.975609756097561 |
dense_matmul_banked_8x40x768 | banked | no | 8 | 40 | 768 | no | 495 | 485 | 0.9797979797979798 |
dense_matmul_banked_32x64x768 | banked | no | 32 | 64 | 768 | no | 3114 | 3104 | 0.9967886962106616 |
dense_matmul_banked_8x512x64 | banked | no | 8 | 512 | 64 | no | 586 | 576 | 0.9829351535836177 |
dense_matmul_banked_32x64x512 | banked | no | 32 | 64 | 512 | no | 2090 | 2080 | 0.9952153110047847 |
dense_matmul_banked_128x8x768 | banked | no | 128 | 8 | 768 | no | 1562 | 1552 | 0.9935979513444302 |
dense_matmul_banked_128x8x792 | banked | no | 128 | 8 | 792 | no | 1610 | 1600 | 0.9937888198757764 |
dense_matmul_banked_128x88x192 | banked | no | 128 | 88 | 192 | no | 4410 | 4400 | 0.9977324263038548 |
average | 0.9554520164363577 |