diff --git a/uebung_03/exercise_04/exercise_04.py b/uebung_03/exercise_04/exercise_04.py
new file mode 100644
index 0000000..e52b14f
--- /dev/null
+++ b/uebung_03/exercise_04/exercise_04.py
@@ -0,0 +1,41 @@
+from mpi4py import MPI
+import numpy as np
+import math
+import sys
+
+def dot_product(a, x):
+    result = 0
+    for i in range(len(a)):
+        result += a[i] * x[i]
+    return result
+
+def row(i):
+    row = []
+    for j in range(1, n+1):
+        row.append(i / j)
+    return row
+
+comm = MPI.COMM_WORLD
+rank = comm.Get_rank()
+size = comm.Get_size()
+
+n = int(sys.argv[1])
+
+x = list(range(1, n+1))
+
+# each rank should compute almost the same amount of matrix.row * x
+# for this split the list [1,...,n+1] in sublists containing the rownumbers every rank has to compute
+# if n is a multiple of size all ranks have the same amount to compute, if not, the first (n % size) ranks compute each one more
+chunks = np.array_split(list(range(1,n+1)), size) 
+
+chunk = chunks[rank]
+sub_b = []
+for i in chunk:
+    sub_b.append(dot_product(row(i), x)) # every rank computes its delegated rows times x
+
+comm.barrier()
+
+b = comm.gather(sub_b)
+
+if rank == 0:
+    b = np.concatenate(b).tolist() # b is a list of lists, np.concatenate 'flattens' the list of lists into an np.ndarray and tolist() to get an python list
diff --git a/uebung_03/exercise_04/exercise_04_visualization.py b/uebung_03/exercise_04/exercise_04_visualization.py
new file mode 100644
index 0000000..cbe6e1f
--- /dev/null
+++ b/uebung_03/exercise_04/exercise_04_visualization.py
@@ -0,0 +1,30 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import timeit
+
+nthreads = [1, 2, 4, 8, 16]
+sizes = [100, 400, 1600, 6400, 12800, 256000, 512000]
+
+thread_timings = [] 
+for t in nthreads:
+    print(f"Measure timing for {t} thread(s)")
+    size_timings = []
+    for s in sizes:
+        print(f"--- Measure timing for size {s}")
+        command = f"subprocess.run(\"mpirun --use-hwthread-cpus -n {t} python3 exercise_04.py {s}\", shell = True)"
+        size_timings.append(timeit.timeit(command, setup = "import subprocess", number = 1) * 1000)
+    thread_timings.append(size_timings)
+
+plt.plot(sizes, thread_timings[0], "green", label=f"{nthreads[0]}")
+plt.plot(sizes, thread_timings[1], "blue", label=f"{nthreads[1]}")
+plt.plot(sizes, thread_timings[2], "purple", label=f"{nthreads[2]}")
+plt.plot(sizes, thread_timings[3], "red", label=f"{nthreads[3]}")
+plt.plot(sizes, thread_timings[4], "orange", label=f"{nthreads[4]}")
+
+plt.title("Matrix-Vector-Multiplication time/threads comparison")
+plt.xlabel("Sizes")
+plt.ylabel("Time (ms)")
+plt.loglog()
+plt.legend(title = "# of Threads")
+
+plt.show()