|
| 1 | +""" |
| 2 | +
|
| 3 | +.. _l-example-mul-timeit: |
| 4 | +
|
| 5 | +Compares mul implementations with timeit |
| 6 | +======================================== |
| 7 | +
|
| 8 | +:epkg:`numpy` has a very fast implementation of |
| 9 | +matrix multiplication. There are many ways to be slower. |
| 10 | +The following uses :epkg:`timeit` to compare implementations. |
| 11 | +
|
| 12 | +.. contents:: |
| 13 | + :local: |
| 14 | +
|
| 15 | +Preparation |
| 16 | ++++++++++++ |
| 17 | +""" |
| 18 | +import timeit |
| 19 | +import numpy |
| 20 | + |
| 21 | +from td3a_cpp.tutorial.td_mul_cython import ( |
| 22 | + multiply_matrix, c_multiply_matrix, |
| 23 | + c_multiply_matrix_parallel, |
| 24 | + c_multiply_matrix_parallel_transposed as cmulparamtr) |
| 25 | + |
| 26 | + |
| 27 | +va = numpy.random.randn(150, 100).astype(numpy.float64) |
| 28 | +vb = numpy.random.randn(100, 100).astype(numpy.float64) |
| 29 | +ctx = { |
| 30 | + 'va': va, 'vb': vb, 'c_multiply_matrix': c_multiply_matrix, |
| 31 | + 'multiply_matrix': multiply_matrix, |
| 32 | + 'c_multiply_matrix_parallel': c_multiply_matrix_parallel, |
| 33 | + 'c_multiply_matrix_parallel_transposed': cmulparamtr} |
| 34 | + |
| 35 | +########################################## |
| 36 | +# Measures |
| 37 | +# ++++++++ |
| 38 | +# |
| 39 | +# numpy |
| 40 | +res0 = timeit.timeit('va @ vb', number=100, globals=ctx) |
| 41 | +print("numpy time", res0) |
| 42 | + |
| 43 | +########################### |
| 44 | +# python implementation |
| 45 | + |
| 46 | +res1 = timeit.timeit( |
| 47 | + 'multiply_matrix(va, vb)', number=10, globals=ctx) |
| 48 | +print('python implementation', res1) |
| 49 | + |
| 50 | + |
| 51 | +########################### |
| 52 | +# cython implementation |
| 53 | + |
| 54 | +res2 = timeit.timeit( |
| 55 | + 'c_multiply_matrix(va, vb)', number=100, globals=ctx) |
| 56 | +print('cython implementation', res2) |
| 57 | + |
| 58 | + |
| 59 | +########################### |
| 60 | +# cython implementation parallelized |
| 61 | + |
| 62 | +res3 = timeit.timeit( |
| 63 | + 'c_multiply_matrix_parallel(va, vb)', number=100, globals=ctx) |
| 64 | +print('cython implementation parallelized', res3) |
| 65 | + |
| 66 | + |
| 67 | +########################### |
| 68 | +# cython implementation parallelized, AVX + transposed |
| 69 | + |
| 70 | +res4 = timeit.timeit( |
| 71 | + 'c_multiply_matrix_parallel_transposed(va, vb)', number=100, globals=ctx) |
| 72 | +print('cython implementation parallelized avx', res4) |
| 73 | + |
| 74 | + |
| 75 | +############################ |
| 76 | +# Speed up... |
| 77 | + |
| 78 | +print("numpy is %f faster than pure python." % (res1 / res0)) |
| 79 | +print("numpy is %f faster than cython." % (res2 / res0)) |
| 80 | +print("numpy is %f faster than parallelized cython." % (res3 / res0)) |
| 81 | +print("numpy is %f faster than avx parallelized cython." % (res4 / res0)) |
0 commit comments