diff --git a/.gitignore b/.gitignore index a5c5045..a13bb6a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +cs257 cs257.pdf CS257.cfg cachegrind.out.* diff --git a/coursework.c b/coursework.c index a345e66..5e1f137 100755 --- a/coursework.c +++ b/coursework.c @@ -8,14 +8,30 @@ __m128 _mm_hsum_ps(__m128 a) { return _mm_add_ps(b, _mm_shuffle_ps(b, b, 1)); } +// - prior to merging loops l0 had the highest LLC miss ratio accounting for +// 40% +- 4% of the cache misses +// - most likely due to extremely memory-bound operation +// - after merging loops 1+2 and 3+4, new test performed +// - l1+2 now relatively stall-free and cache misses are not that common +// - l3+4 accounts for 80% of the cache misses + +// http://www.agner.org/optimize/instruction_tables.pdf +// Intel Intrinsics guide +// Some reference for square root precisions +// Some reference for order of operations precision +// Some other reference for something else + +// after removing l0 and further optimising l1, l3+4 had 80% of the cache +// misses, mostly stemming from comparisons +// additionally the 3+4 + void compute() { - // Preponderation + // Preponderationing float factor = dmp * dt; int V = (N/4)*4; - int B = N/4; - int i, j, k, l; + int i, j; - // packaged floats + // Packed preponderationing __m128 dmp_ = _mm_load1_ps(&dmp); __m128 dt_ = _mm_load1_ps(&dt); __m128 eps_ = _mm_load1_ps(&eps); @@ -26,115 +42,83 @@ void compute() { __m128 negone_ = _mm_set1_ps(-1.0f); __m128 half_ = _mm_set1_ps(0.5f); __m128 three_ = _mm_set1_ps(3.0f); - __m128 fsign_ = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); - + // Timers double t0, t1; - // Loop 0. - t0 = wtime(); - /* - for (i = 0; i < V; i += 4) { - _mm_store_ps(ax+i, zero_); - } - for (i = 0; i < V; i += 4) { - _mm_store_ps(ay+i, zero_); - } - for (i = 0; i < V; i += 4) { - _mm_store_ps(az+i, zero_); - } - for (; i < N; i++) { - _mm_store_ss(ax+i, zero_); - _mm_store_ss(ay+i, zero_); - _mm_store_ss(az+i, zero_); - } - */ - t1 = wtime(); - l0 += (t1 - t0); - - // Loop 1. + // Acceleration and velocity loop t0 = wtime(); #pragma omp parallel for for (int i = 0; i < N; i++) { + // prep x __m128 xi_ = _mm_load1_ps(x+i); - __m128 yi_ = _mm_load1_ps(y+i); - __m128 zi_ = _mm_load1_ps(z+i); __m128 sx_ = zero_; + // prep y + __m128 yi_ = _mm_load1_ps(y+i); __m128 sy_ = zero_; + // prep z + __m128 zi_ = _mm_load1_ps(z+i); __m128 sz_ = zero_; - for (j = 0; j < V; j += 4) { - // Square x diff - __m128 xj_ = _mm_load_ps(x+j); - __m128 rx_ = _mm_sub_ps(xj_, xi_); + // the nasty part + for (int j = 0; j < V; j += 4) { + // rx/x2 + __m128 rx_ = _mm_load_ps(x+j); + rx_ = _mm_sub_ps(rx_, xi_); __m128 r2_ = _mm_mul_ps(rx_, rx_); - // Start adding r2 + // r2 start r2_ = _mm_add_ps(r2_, eps_); - // Square y diff - __m128 yj_ = _mm_load_ps(y+j); - __m128 ry_ = _mm_sub_ps(yj_, yi_); + // ry/y2 + __m128 ry_ = _mm_load_ps(y+j); + ry_ = _mm_sub_ps(ry_, yi_); __m128 y2_ = _mm_mul_ps(ry_, ry_); - // Continue adding r2 + // r2 con r2_ = _mm_add_ps(y2_, r2_); - // Square z diff - __m128 zj_ = _mm_load_ps(z+j); - __m128 rz_ = _mm_sub_ps(zj_, zi_); + //rz/z2 + __m128 rz_ = _mm_load_ps(z+j); + rz_ = _mm_sub_ps(rz_, zi_); __m128 z2_ = _mm_mul_ps(rz_, rz_); - // Finish adding r2 + // r2 fin r2_ = _mm_add_ps(z2_, r2_); - // Load mj - __m128 mj_ = _mm_load_ps(m+j); - // Fast inverse + // s start + __m128 s_ = _mm_load_ps(m+j); + // Fast inverse - source of error __m128 r2inv_ = _mm_rsqrt_ps(r2_); - // Newton-Phelps + fast inverse - //__m128 inv_ = _mm_rsqrt_ps(r2_); - //__m128 top_ = _mm_mul_ps(_mm_mul_ps(r2_, inv_), inv_); - //__m128 r2inv_ = _mm_mul_ps(_mm_mul_ps(half_, inv_), _mm_sub_ps(three_, top_)); + // Newton-Raphson step - source of error + //__m128 top_ = _mm_mul_ps(_mm_mul_ps(r2_, r2inv_), r2inv_); + //__m128 r2inv_ = _mm_mul_ps(_mm_mul_ps(half_, r2inv_), _mm_sub_ps(three_, top_)); // Accurate inverse square root - //__m128 r2inv_ = _mm_div_ps(one_, _mm_sqrt_ps(r2_)) - __m128 r2inv2_ = _mm_mul_ps(r2inv_, r2inv_); - __m128 r6inv_ = _mm_mul_ps(r2inv2_, r2inv_); - __m128 s_ = _mm_mul_ps(mj_, r6inv_); - // IF WE WANT TO SKIP LOOP 2: - // More accurate - // s_ = _mm_mul_ps(dt_, s_); - // s_ = _mm_mul_ps(dmp_, s_); - // More approximate - // s_ = _mm_mul_ps(factor_, s_); - // ENDIF + //__m128 r2inv_ = _mm_div_ps(one_, _mm_sqrt_ps(r2_)); + // r6inv + __m128 r6inv_ = _mm_mul_ps(_mm_mul_ps(r2inv_, r2inv_), r2inv_); + // s fin + s_ = _mm_mul_ps(s_, r6inv_); + // Directly calculate velocity - source of error + // s_ = _mm_mul_ps(factor_, s_); + // Calculate results __m128 mx_ = _mm_mul_ps(s_, rx_); sx_ = _mm_add_ps(mx_, sx_); __m128 my_ = _mm_mul_ps(s_, ry_); sy_ = _mm_add_ps(my_, sy_); __m128 mz_ = _mm_mul_ps(s_, rz_); sz_ = _mm_add_ps(mz_, sz_); - } + // Horizontal sum - source of error + //__m128 vx_ = _mm_load1_ps(vx+i); + // sx_ = _mm_add_ps(sx_, vx_); __m128 sx1_ = _mm_add_ps(sx_, _mm_movehl_ps(sx_, sx_)); __m128 sx2_ = _mm_add_ps(sx1_, _mm_shuffle_ps(sx1_, sx1_, 1)); + _mm_store_ss(ax+i, sx2_); + //__m128 vy_ = _mm_load1_ps(vy+i); + // sy_ = _mm_add_ps(sy_, vy_); __m128 sy1_ = _mm_add_ps(sy_, _mm_movehl_ps(sy_, sy_)); __m128 sy2_ = _mm_add_ps(sy1_, _mm_shuffle_ps(sy1_, sy1_, 1)); + _mm_store_ss(ay+i, sy2_); + //__m128 vz_ = _mm_load1_ps(vz+i); + // sz_ = _mm_add_ps(sz_, vz_); __m128 sz1_ = _mm_add_ps(sz_, _mm_movehl_ps(sz_, sz_)); __m128 sz2_ = _mm_add_ps(sz1_, _mm_shuffle_ps(sz1_, sz1_, 1)); - _mm_store_ss(ax+i, sx2_); - _mm_store_ss(ay+i, sy2_); _mm_store_ss(az+i, sz2_); - //*/ - /* - for (; j < N; j++) { - float rx = x[j] - x[i]; - float ry = y[j] - y[i]; - float rz = z[j] - z[i]; - float r2 = rx*rx + ry*ry + rz*rz + eps; - float r2inv = 1.0f / sqrt(r2); - float r6inv = r2inv * r2inv * r2inv; - float s = m[j] * r6inv; - ax[i] += s * rx; - ay[i] += s * ry; - az[i] += s * rz; - } - */ } - t1 = wtime(); l1 += (t1 - t0); @@ -144,72 +128,54 @@ void compute() { for (i = 0; i < V; i += 4) { __m128 ax_ = _mm_load_ps(ax+i); __m128 vx_ = _mm_load_ps(vx+i); - __m128 mult_ = _mm_mul_ps(dmp_, _mm_mul_ps(dt_, ax_)); - vx_ = _mm_add_ps(vx_, mult_); + __m128 m1_ = _mm_mul_ps(dmp_, _mm_mul_ps(dt_, ax_)); + vx_ = _mm_add_ps(vx_, m1_); - __m128 x_ = _mm_load_ps(x+i); - - mult_ = _mm_mul_ps(dt_, vx_); - x_ = _mm_add_ps(x_, mult_); + __m128 x_ = _mm_load_ps(x+i); + __m128 m_ = _mm_mul_ps(dt_, vx_); + x_ = _mm_add_ps(x_, m_); _mm_store_ps(x+i, x_); + x_ = _mm_andnot_ps(negzero_, x_); __m128 ge_ = _mm_cmpge_ps(x_, one_); - __m128 le_ = _mm_cmple_ps(x_, negone_); - __m128 cond_ = _mm_or_ps(ge_, le_); - __m128 and_ = _mm_and_ps(cond_, negzero_); - __m128 invx_ = _mm_xor_ps(vx_, and_); - _mm_store_ps(vx+i, invx_); + __m128 mx_ = _mm_and_ps(ge_, negzero_); + vx_ = _mm_xor_ps(vx_, mx_); + _mm_store_ps(vx+i, vx_); } for (i = 0; i < V; i += 4) { __m128 ax_ = _mm_load_ps(ay+i); __m128 vx_ = _mm_load_ps(vy+i); - __m128 mult_ = _mm_mul_ps(dmp_, _mm_mul_ps(dt_, ax_)); - vx_ = _mm_add_ps(vx_, mult_); - - __m128 x_ = _mm_load_ps(y+i); + __m128 m1_ = _mm_mul_ps(dmp_, _mm_mul_ps(dt_, ax_)); + vx_ = _mm_add_ps(vx_, m1_); - mult_ = _mm_mul_ps(dt_, vx_); - x_ = _mm_add_ps(x_, mult_); + __m128 x_ = _mm_load_ps(y+i); + __m128 m_ = _mm_mul_ps(dt_, vx_); + x_ = _mm_add_ps(x_, m_); _mm_store_ps(y+i, x_); + x_ = _mm_andnot_ps(negzero_, x_); __m128 ge_ = _mm_cmpge_ps(x_, one_); - __m128 le_ = _mm_cmple_ps(x_, negone_); - __m128 cond_ = _mm_or_ps(ge_, le_); - __m128 and_ = _mm_and_ps(cond_, negzero_); - __m128 invx_ = _mm_xor_ps(vx_, and_); - _mm_store_ps(vy+i, invx_); + __m128 mx_ = _mm_and_ps(ge_, negzero_); + vx_ = _mm_xor_ps(vx_, mx_); + _mm_store_ps(vy+i, vx_); } for (i = 0; i < V; i += 4) { __m128 ax_ = _mm_load_ps(az+i); __m128 vx_ = _mm_load_ps(vz+i); - __m128 mult_ = _mm_mul_ps(dmp_, _mm_mul_ps(dt_, ax_)); - vx_ = _mm_add_ps(vx_, mult_); + __m128 m1_ = _mm_mul_ps(dmp_, _mm_mul_ps(dt_, ax_)); + vx_ = _mm_add_ps(vx_, m1_); - __m128 x_ = _mm_load_ps(z+i); - - mult_ = _mm_mul_ps(dt_, vx_); - x_ = _mm_add_ps(x_, mult_); + __m128 x_ = _mm_load_ps(z+i); + __m128 m_ = _mm_mul_ps(dt_, vx_); + x_ = _mm_add_ps(x_, m_); _mm_store_ps(z+i, x_); + x_ = _mm_andnot_ps(negzero_, x_); __m128 ge_ = _mm_cmpge_ps(x_, one_); - __m128 le_ = _mm_cmple_ps(x_, negone_); - __m128 cond_ = _mm_or_ps(ge_, le_); - __m128 and_ = _mm_and_ps(cond_, negzero_); - __m128 invx_ = _mm_xor_ps(vx_, and_); - _mm_store_ps(vz+i, invx_); - } - for (; i < N; i++) { - vx[i] += dmp * (dt * ax[i]); - vy[i] += dmp * (dt * ay[i]); - vz[i] += dmp * (dt * az[i]); - x[i] += dt * vx[i]; - y[i] += dt * vy[i]; - z[i] += dt * vz[i]; - if (x[i] >= 1.0f || x[i] <= -1.0f) vx[i] *= -1.0f; - if (y[i] >= 1.0f || y[i] <= -1.0f) vy[i] *= -1.0f; - if (z[i] >= 1.0f || z[i] <= -1.0f) vz[i] *= -1.0f; + __m128 mx_ = _mm_and_ps(ge_, negzero_); + vx_ = _mm_xor_ps(vx_, mx_); + _mm_store_ps(vz+i, vx_); } t1 = wtime(); l3 += (t1 - t0); - } diff --git a/coursework_12_34.c b/coursework_12_34.c new file mode 100644 index 0000000..5e1f137 --- /dev/null +++ b/coursework_12_34.c @@ -0,0 +1,181 @@ +#include + +#define min(a,b) ((a) < (b) ? (a) : (b)) +#define max(a,b) ((a) > (b) ? (a) : (b)) + +__m128 _mm_hsum_ps(__m128 a) { + __m128 b = _mm_add_ps(a, _mm_movehl_ps(a, a)); + return _mm_add_ps(b, _mm_shuffle_ps(b, b, 1)); +} + +// - prior to merging loops l0 had the highest LLC miss ratio accounting for +// 40% +- 4% of the cache misses +// - most likely due to extremely memory-bound operation +// - after merging loops 1+2 and 3+4, new test performed +// - l1+2 now relatively stall-free and cache misses are not that common +// - l3+4 accounts for 80% of the cache misses + +// http://www.agner.org/optimize/instruction_tables.pdf +// Intel Intrinsics guide +// Some reference for square root precisions +// Some reference for order of operations precision +// Some other reference for something else + +// after removing l0 and further optimising l1, l3+4 had 80% of the cache +// misses, mostly stemming from comparisons +// additionally the 3+4 + +void compute() { + // Preponderationing + float factor = dmp * dt; + int V = (N/4)*4; + int i, j; + + // Packed preponderationing + __m128 dmp_ = _mm_load1_ps(&dmp); + __m128 dt_ = _mm_load1_ps(&dt); + __m128 eps_ = _mm_load1_ps(&eps); + __m128 factor_ = _mm_mul_ps(dmp_, dt_); + __m128 zero_ = _mm_setzero_ps(); + __m128 negzero_ = _mm_set1_ps(-0.0f); + __m128 one_ = _mm_set1_ps(1.0f); + __m128 negone_ = _mm_set1_ps(-1.0f); + __m128 half_ = _mm_set1_ps(0.5f); + __m128 three_ = _mm_set1_ps(3.0f); + + // Timers + double t0, t1; + + // Acceleration and velocity loop + t0 = wtime(); + #pragma omp parallel for + for (int i = 0; i < N; i++) { + // prep x + __m128 xi_ = _mm_load1_ps(x+i); + __m128 sx_ = zero_; + // prep y + __m128 yi_ = _mm_load1_ps(y+i); + __m128 sy_ = zero_; + // prep z + __m128 zi_ = _mm_load1_ps(z+i); + __m128 sz_ = zero_; + // the nasty part + for (int j = 0; j < V; j += 4) { + // rx/x2 + __m128 rx_ = _mm_load_ps(x+j); + rx_ = _mm_sub_ps(rx_, xi_); + __m128 r2_ = _mm_mul_ps(rx_, rx_); + // r2 start + r2_ = _mm_add_ps(r2_, eps_); + // ry/y2 + __m128 ry_ = _mm_load_ps(y+j); + ry_ = _mm_sub_ps(ry_, yi_); + __m128 y2_ = _mm_mul_ps(ry_, ry_); + // r2 con + r2_ = _mm_add_ps(y2_, r2_); + //rz/z2 + __m128 rz_ = _mm_load_ps(z+j); + rz_ = _mm_sub_ps(rz_, zi_); + __m128 z2_ = _mm_mul_ps(rz_, rz_); + // r2 fin + r2_ = _mm_add_ps(z2_, r2_); + // s start + __m128 s_ = _mm_load_ps(m+j); + // Fast inverse - source of error + __m128 r2inv_ = _mm_rsqrt_ps(r2_); + // Newton-Raphson step - source of error + //__m128 top_ = _mm_mul_ps(_mm_mul_ps(r2_, r2inv_), r2inv_); + //__m128 r2inv_ = _mm_mul_ps(_mm_mul_ps(half_, r2inv_), _mm_sub_ps(three_, top_)); + // Accurate inverse square root + //__m128 r2inv_ = _mm_div_ps(one_, _mm_sqrt_ps(r2_)); + // r6inv + __m128 r6inv_ = _mm_mul_ps(_mm_mul_ps(r2inv_, r2inv_), r2inv_); + // s fin + s_ = _mm_mul_ps(s_, r6inv_); + // Directly calculate velocity - source of error + // s_ = _mm_mul_ps(factor_, s_); + // Calculate results + __m128 mx_ = _mm_mul_ps(s_, rx_); + sx_ = _mm_add_ps(mx_, sx_); + __m128 my_ = _mm_mul_ps(s_, ry_); + sy_ = _mm_add_ps(my_, sy_); + __m128 mz_ = _mm_mul_ps(s_, rz_); + sz_ = _mm_add_ps(mz_, sz_); + } + // Horizontal sum - source of error + //__m128 vx_ = _mm_load1_ps(vx+i); + // sx_ = _mm_add_ps(sx_, vx_); + __m128 sx1_ = _mm_add_ps(sx_, _mm_movehl_ps(sx_, sx_)); + __m128 sx2_ = _mm_add_ps(sx1_, _mm_shuffle_ps(sx1_, sx1_, 1)); + _mm_store_ss(ax+i, sx2_); + //__m128 vy_ = _mm_load1_ps(vy+i); + // sy_ = _mm_add_ps(sy_, vy_); + __m128 sy1_ = _mm_add_ps(sy_, _mm_movehl_ps(sy_, sy_)); + __m128 sy2_ = _mm_add_ps(sy1_, _mm_shuffle_ps(sy1_, sy1_, 1)); + _mm_store_ss(ay+i, sy2_); + //__m128 vz_ = _mm_load1_ps(vz+i); + // sz_ = _mm_add_ps(sz_, vz_); + __m128 sz1_ = _mm_add_ps(sz_, _mm_movehl_ps(sz_, sz_)); + __m128 sz2_ = _mm_add_ps(sz1_, _mm_shuffle_ps(sz1_, sz1_, 1)); + _mm_store_ss(az+i, sz2_); + } + t1 = wtime(); + l1 += (t1 - t0); + + + // Loop 3. + t0 = wtime(); + for (i = 0; i < V; i += 4) { + __m128 ax_ = _mm_load_ps(ax+i); + __m128 vx_ = _mm_load_ps(vx+i); + __m128 m1_ = _mm_mul_ps(dmp_, _mm_mul_ps(dt_, ax_)); + vx_ = _mm_add_ps(vx_, m1_); + + __m128 x_ = _mm_load_ps(x+i); + __m128 m_ = _mm_mul_ps(dt_, vx_); + x_ = _mm_add_ps(x_, m_); + _mm_store_ps(x+i, x_); + + x_ = _mm_andnot_ps(negzero_, x_); + __m128 ge_ = _mm_cmpge_ps(x_, one_); + __m128 mx_ = _mm_and_ps(ge_, negzero_); + vx_ = _mm_xor_ps(vx_, mx_); + _mm_store_ps(vx+i, vx_); + } + for (i = 0; i < V; i += 4) { + __m128 ax_ = _mm_load_ps(ay+i); + __m128 vx_ = _mm_load_ps(vy+i); + __m128 m1_ = _mm_mul_ps(dmp_, _mm_mul_ps(dt_, ax_)); + vx_ = _mm_add_ps(vx_, m1_); + + __m128 x_ = _mm_load_ps(y+i); + __m128 m_ = _mm_mul_ps(dt_, vx_); + x_ = _mm_add_ps(x_, m_); + _mm_store_ps(y+i, x_); + + x_ = _mm_andnot_ps(negzero_, x_); + __m128 ge_ = _mm_cmpge_ps(x_, one_); + __m128 mx_ = _mm_and_ps(ge_, negzero_); + vx_ = _mm_xor_ps(vx_, mx_); + _mm_store_ps(vy+i, vx_); + } + for (i = 0; i < V; i += 4) { + __m128 ax_ = _mm_load_ps(az+i); + __m128 vx_ = _mm_load_ps(vz+i); + __m128 m1_ = _mm_mul_ps(dmp_, _mm_mul_ps(dt_, ax_)); + vx_ = _mm_add_ps(vx_, m1_); + + __m128 x_ = _mm_load_ps(z+i); + __m128 m_ = _mm_mul_ps(dt_, vx_); + x_ = _mm_add_ps(x_, m_); + _mm_store_ps(z+i, x_); + + x_ = _mm_andnot_ps(negzero_, x_); + __m128 ge_ = _mm_cmpge_ps(x_, one_); + __m128 mx_ = _mm_and_ps(ge_, negzero_); + vx_ = _mm_xor_ps(vx_, mx_); + _mm_store_ps(vz+i, vx_); + } + t1 = wtime(); + l3 += (t1 - t0); +} diff --git a/cs257 b/cs257 index 03add85..8da7f6b 100755 Binary files a/cs257 and b/cs257 differ diff --git a/cs257_12_34 b/cs257_12_34 new file mode 100755 index 0000000..47b5a63 Binary files /dev/null and b/cs257_12_34 differ diff --git a/cs257_reference b/cs257_reference deleted file mode 100755 index bdcc1c1..0000000 Binary files a/cs257_reference and /dev/null differ diff --git a/profiling/initial_cache1.zoom b/profiling/initial_cache1.zoom new file mode 100644 index 0000000..efaa2f5 Binary files /dev/null and b/profiling/initial_cache1.zoom differ diff --git a/profiling/initial_cache2.zoom b/profiling/initial_cache2.zoom new file mode 100644 index 0000000..81c9970 Binary files /dev/null and b/profiling/initial_cache2.zoom differ diff --git a/profiling/initial_cache3.zoom b/profiling/initial_cache3.zoom new file mode 100644 index 0000000..5001822 Binary files /dev/null and b/profiling/initial_cache3.zoom differ diff --git a/profiling/initial_time1.zoom b/profiling/initial_time1.zoom new file mode 100644 index 0000000..b9a2a84 Binary files /dev/null and b/profiling/initial_time1.zoom differ diff --git a/profiling/initial_time2.zoom b/profiling/initial_time2.zoom new file mode 100644 index 0000000..d82aaab Binary files /dev/null and b/profiling/initial_time2.zoom differ diff --git a/profiling/initial_time3.zoom b/profiling/initial_time3.zoom new file mode 100644 index 0000000..86a8332 Binary files /dev/null and b/profiling/initial_time3.zoom differ diff --git a/reference/coursework.c b/reference/coursework.c new file mode 100644 index 0000000..6366145 --- /dev/null +++ b/reference/coursework.c @@ -0,0 +1,64 @@ +/** + * The function to optimise as part of the coursework. + * + * l0, l1, l2 and l3 record the amount of time spent in each loop + * and should not be optimised out. :) + */ +void compute() { + + double t0, t1; + + // Loop 0. + t0 = wtime(); + for (int i = 0; i < N; i++) { + ax[i] = 0.0f; + ay[i] = 0.0f; + az[i] = 0.0f; + } + t1 = wtime(); + l0 += (t1 - t0); + + // Loop 1. + t0 = wtime(); + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + float rx = x[j] - x[i]; + float ry = y[j] - y[i]; + float rz = z[j] - z[i]; + float r2 = rx*rx + ry*ry + rz*rz + eps; + float r2inv = 1.0f / sqrt(r2); + float r6inv = r2inv * r2inv * r2inv; + float s = m[j] * r6inv; + ax[i] += s * rx; + ay[i] += s * ry; + az[i] += s * rz; + } + } + t1 = wtime(); + l1 += (t1 - t0); + + // Loop 2. + t0 = wtime(); + for (int i = 0; i < N; i++) { + vx[i] += dmp * (dt * ax[i]); + vy[i] += dmp * (dt * ay[i]); + vz[i] += dmp * (dt * az[i]); + } + t1 = wtime(); + l2 += (t1 - t0); + + // Loop 3. + t0 = wtime(); + for (int i = 0; i < N; i++) { + x[i] += dt * vx[i]; + y[i] += dt * vy[i]; + z[i] += dt * vz[i]; + if (x[i] >= 1.0f || x[i] <= -1.0f) vx[i] *= -1.0f; + if (y[i] >= 1.0f || y[i] <= -1.0f) vy[i] *= -1.0f; + if (z[i] >= 1.0f || z[i] <= -1.0f) vz[i] *= -1.0f; + + } + t1 = wtime(); + l3 += (t1 - t0); + +} diff --git a/report/Coursework1.aux b/report/Coursework1.aux new file mode 100644 index 0000000..3acbd29 --- /dev/null +++ b/report/Coursework1.aux @@ -0,0 +1,5 @@ +\relax +\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}} +\@writefile{toc}{\contentsline {section}{\numberline {2}Structural Optimisations}{1}} +\@writefile{toc}{\contentsline {section}{\numberline {3}Threading}{1}} +\@writefile{toc}{\contentsline {section}{\numberline {4}Comparison of Different Versions}{1}} diff --git a/report/Coursework1.log b/report/Coursework1.log new file mode 100644 index 0000000..6f06d71 --- /dev/null +++ b/report/Coursework1.log @@ -0,0 +1,727 @@ +This is pdfTeX, Version 3.1415926-2.5-1.40.14 (TeX Live 2013/Debian) (format=pdflatex 2014.10.8) 9 MAR 2015 16:01 +entering extended mode + restricted \write18 enabled. + %&-line parsing enabled. +**Coursework1.tex +(./Coursework1.tex +LaTeX2e <2011/06/27> +Babel <3.9h> and hyphenation patterns for 78 languages loaded. +(/usr/share/texlive/texmf-dist/tex/latex/base/article.cls +Document Class: article 2007/10/19 v1.4h Standard LaTeX document class +(/usr/share/texlive/texmf-dist/tex/latex/base/size11.clo +File: size11.clo 2007/10/19 v1.4h Standard LaTeX file (size option) +) +\c@part=\count79 +\c@section=\count80 +\c@subsection=\count81 +\c@subsubsection=\count82 +\c@paragraph=\count83 +\c@subparagraph=\count84 +\c@figure=\count85 +\c@table=\count86 +\abovecaptionskip=\skip41 +\belowcaptionskip=\skip42 +\bibindent=\dimen102 +) +(/usr/share/texlive/texmf-dist/tex/latex/base/inputenc.sty +Package: inputenc 2008/03/30 v1.1d Input encoding file +\inpenc@prehook=\toks14 +\inpenc@posthook=\toks15 + +(/usr/share/texlive/texmf-dist/tex/latex/base/utf8.def +File: utf8.def 2008/04/05 v1.1m UTF-8 support for inputenc +Now handling font encoding OML ... +... no UTF-8 mapping file for font encoding OML +Now handling font encoding T1 ... +... processing UTF-8 mapping file for font encoding T1 + +(/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.dfu +File: t1enc.dfu 2008/04/05 v1.1m UTF-8 support for inputenc + defining Unicode char U+00A1 (decimal 161) + defining Unicode char U+00A3 (decimal 163) + defining Unicode char U+00AB (decimal 171) + defining Unicode char U+00BB (decimal 187) + defining Unicode char U+00BF (decimal 191) + defining Unicode char U+00C0 (decimal 192) + defining Unicode char U+00C1 (decimal 193) + defining Unicode char U+00C2 (decimal 194) + defining Unicode char U+00C3 (decimal 195) + defining Unicode char U+00C4 (decimal 196) + defining Unicode char U+00C5 (decimal 197) + defining Unicode char U+00C6 (decimal 198) + defining Unicode char U+00C7 (decimal 199) + defining Unicode char U+00C8 (decimal 200) + defining Unicode char U+00C9 (decimal 201) + defining Unicode char U+00CA (decimal 202) + defining Unicode char U+00CB (decimal 203) + defining Unicode char U+00CC (decimal 204) + defining Unicode char U+00CD (decimal 205) + defining Unicode char U+00CE (decimal 206) + defining Unicode char U+00CF (decimal 207) + defining Unicode char U+00D0 (decimal 208) + defining Unicode char U+00D1 (decimal 209) + defining Unicode char U+00D2 (decimal 210) + defining Unicode char U+00D3 (decimal 211) + defining Unicode char U+00D4 (decimal 212) + defining Unicode char U+00D5 (decimal 213) + defining Unicode char U+00D6 (decimal 214) + defining Unicode char U+00D8 (decimal 216) + defining Unicode char U+00D9 (decimal 217) + defining Unicode char U+00DA (decimal 218) + defining Unicode char U+00DB (decimal 219) + defining Unicode char U+00DC (decimal 220) + defining Unicode char U+00DD (decimal 221) + defining Unicode char U+00DE (decimal 222) + defining Unicode char U+00DF (decimal 223) + defining Unicode char U+00E0 (decimal 224) + defining Unicode char U+00E1 (decimal 225) + defining Unicode char U+00E2 (decimal 226) + defining Unicode char U+00E3 (decimal 227) + defining Unicode char U+00E4 (decimal 228) + defining Unicode char U+00E5 (decimal 229) + defining Unicode char U+00E6 (decimal 230) + defining Unicode char U+00E7 (decimal 231) + defining Unicode char U+00E8 (decimal 232) + defining Unicode char U+00E9 (decimal 233) + defining Unicode char U+00EA (decimal 234) + defining Unicode char U+00EB (decimal 235) + defining Unicode char U+00EC (decimal 236) + defining Unicode char U+00ED (decimal 237) + defining Unicode char U+00EE (decimal 238) + defining Unicode char U+00EF (decimal 239) + defining Unicode char U+00F0 (decimal 240) + defining Unicode char U+00F1 (decimal 241) + defining Unicode char U+00F2 (decimal 242) + defining Unicode char U+00F3 (decimal 243) + defining Unicode char U+00F4 (decimal 244) + defining Unicode char U+00F5 (decimal 245) + defining Unicode char U+00F6 (decimal 246) + defining Unicode char U+00F8 (decimal 248) + defining Unicode char U+00F9 (decimal 249) + defining Unicode char U+00FA (decimal 250) + defining Unicode char U+00FB (decimal 251) + defining Unicode char U+00FC (decimal 252) + defining Unicode char U+00FD (decimal 253) + defining Unicode char U+00FE (decimal 254) + defining Unicode char U+00FF (decimal 255) + defining Unicode char U+0102 (decimal 258) + defining Unicode char U+0103 (decimal 259) + defining Unicode char U+0104 (decimal 260) + defining Unicode char U+0105 (decimal 261) + defining Unicode char U+0106 (decimal 262) + defining Unicode char U+0107 (decimal 263) + defining Unicode char U+010C (decimal 268) + defining Unicode char U+010D (decimal 269) + defining Unicode char U+010E (decimal 270) + defining Unicode char U+010F (decimal 271) + defining Unicode char U+0110 (decimal 272) + defining Unicode char U+0111 (decimal 273) + defining Unicode char U+0118 (decimal 280) + defining Unicode char U+0119 (decimal 281) + defining Unicode char U+011A (decimal 282) + defining Unicode char U+011B (decimal 283) + defining Unicode char U+011E (decimal 286) + defining Unicode char U+011F (decimal 287) + defining Unicode char U+0130 (decimal 304) + defining Unicode char U+0131 (decimal 305) + defining Unicode char U+0132 (decimal 306) + defining Unicode char U+0133 (decimal 307) + defining Unicode char U+0139 (decimal 313) + defining Unicode char U+013A (decimal 314) + defining Unicode char U+013D (decimal 317) + defining Unicode char U+013E (decimal 318) + defining Unicode char U+0141 (decimal 321) + defining Unicode char U+0142 (decimal 322) + defining Unicode char U+0143 (decimal 323) + defining Unicode char U+0144 (decimal 324) + defining Unicode char U+0147 (decimal 327) + defining Unicode char U+0148 (decimal 328) + defining Unicode char U+014A (decimal 330) + defining Unicode char U+014B (decimal 331) + defining Unicode char U+0150 (decimal 336) + defining Unicode char U+0151 (decimal 337) + defining Unicode char U+0152 (decimal 338) + defining Unicode char U+0153 (decimal 339) + defining Unicode char U+0154 (decimal 340) + defining Unicode char U+0155 (decimal 341) + defining Unicode char U+0158 (decimal 344) + defining Unicode char U+0159 (decimal 345) + defining Unicode char U+015A (decimal 346) + defining Unicode char U+015B (decimal 347) + defining Unicode char U+015E (decimal 350) + defining Unicode char U+015F (decimal 351) + defining Unicode char U+0160 (decimal 352) + defining Unicode char U+0161 (decimal 353) + defining Unicode char U+0162 (decimal 354) + defining Unicode char U+0163 (decimal 355) + defining Unicode char U+0164 (decimal 356) + defining Unicode char U+0165 (decimal 357) + defining Unicode char U+016E (decimal 366) + defining Unicode char U+016F (decimal 367) + defining Unicode char U+0170 (decimal 368) + defining Unicode char U+0171 (decimal 369) + defining Unicode char U+0178 (decimal 376) + defining Unicode char U+0179 (decimal 377) + defining Unicode char U+017A (decimal 378) + defining Unicode char U+017B (decimal 379) + defining Unicode char U+017C (decimal 380) + defining Unicode char U+017D (decimal 381) + defining Unicode char U+017E (decimal 382) + defining Unicode char U+200C (decimal 8204) + defining Unicode char U+2013 (decimal 8211) + defining Unicode char U+2014 (decimal 8212) + defining Unicode char U+2018 (decimal 8216) + defining Unicode char U+2019 (decimal 8217) + defining Unicode char U+201A (decimal 8218) + defining Unicode char U+201C (decimal 8220) + defining Unicode char U+201D (decimal 8221) + defining Unicode char U+201E (decimal 8222) + defining Unicode char U+2030 (decimal 8240) + defining Unicode char U+2031 (decimal 8241) + defining Unicode char U+2039 (decimal 8249) + defining Unicode char U+203A (decimal 8250) + defining Unicode char U+2423 (decimal 9251) +) +Now handling font encoding OT1 ... +... processing UTF-8 mapping file for font encoding OT1 + +(/usr/share/texlive/texmf-dist/tex/latex/base/ot1enc.dfu +File: ot1enc.dfu 2008/04/05 v1.1m UTF-8 support for inputenc + defining Unicode char U+00A1 (decimal 161) + defining Unicode char U+00A3 (decimal 163) + defining Unicode char U+00B8 (decimal 184) + defining Unicode char U+00BF (decimal 191) + defining Unicode char U+00C5 (decimal 197) + defining Unicode char U+00C6 (decimal 198) + defining Unicode char U+00D8 (decimal 216) + defining Unicode char U+00DF (decimal 223) + defining Unicode char U+00E6 (decimal 230) + defining Unicode char U+00EC (decimal 236) + defining Unicode char U+00ED (decimal 237) + defining Unicode char U+00EE (decimal 238) + defining Unicode char U+00EF (decimal 239) + defining Unicode char U+00F8 (decimal 248) + defining Unicode char U+0131 (decimal 305) + defining Unicode char U+0141 (decimal 321) + defining Unicode char U+0142 (decimal 322) + defining Unicode char U+0152 (decimal 338) + defining Unicode char U+0153 (decimal 339) + defining Unicode char U+2013 (decimal 8211) + defining Unicode char U+2014 (decimal 8212) + defining Unicode char U+2018 (decimal 8216) + defining Unicode char U+2019 (decimal 8217) + defining Unicode char U+201C (decimal 8220) + defining Unicode char U+201D (decimal 8221) +) +Now handling font encoding OMS ... +... processing UTF-8 mapping file for font encoding OMS + +(/usr/share/texlive/texmf-dist/tex/latex/base/omsenc.dfu +File: omsenc.dfu 2008/04/05 v1.1m UTF-8 support for inputenc + defining Unicode char U+00A7 (decimal 167) + defining Unicode char U+00B6 (decimal 182) + defining Unicode char U+00B7 (decimal 183) + defining Unicode char U+2020 (decimal 8224) + defining Unicode char U+2021 (decimal 8225) + defining Unicode char U+2022 (decimal 8226) +) +Now handling font encoding OMX ... +... no UTF-8 mapping file for font encoding OMX +Now handling font encoding U ... +... no UTF-8 mapping file for font encoding U + defining Unicode char U+00A9 (decimal 169) + defining Unicode char U+00AA (decimal 170) + defining Unicode char U+00AE (decimal 174) + defining Unicode char U+00BA (decimal 186) + defining Unicode char U+02C6 (decimal 710) + defining Unicode char U+02DC (decimal 732) + defining Unicode char U+200C (decimal 8204) + defining Unicode char U+2026 (decimal 8230) + defining Unicode char U+2122 (decimal 8482) + defining Unicode char U+2423 (decimal 9251) +)) +(/usr/share/texlive/texmf-dist/tex/latex/geometry/geometry.sty +Package: geometry 2010/09/12 v5.6 Page Geometry + +(/usr/share/texlive/texmf-dist/tex/latex/graphics/keyval.sty +Package: keyval 1999/03/16 v1.13 key=value parser (DPC) +\KV@toks@=\toks16 +) +(/usr/share/texlive/texmf-dist/tex/generic/oberdiek/ifpdf.sty +Package: ifpdf 2011/01/30 v2.3 Provides the ifpdf switch (HO) +Package ifpdf Info: pdfTeX in PDF mode is detected. +) +(/usr/share/texlive/texmf-dist/tex/generic/oberdiek/ifvtex.sty +Package: ifvtex 2010/03/01 v1.5 Detect VTeX and its facilities (HO) +Package ifvtex Info: VTeX not detected. +) +(/usr/share/texlive/texmf-dist/tex/generic/ifxetex/ifxetex.sty +Package: ifxetex 2010/09/12 v0.6 Provides ifxetex conditional +) +\Gm@cnth=\count87 +\Gm@cntv=\count88 +\c@Gm@tempcnt=\count89 +\Gm@bindingoffset=\dimen103 +\Gm@wd@mp=\dimen104 +\Gm@odd@mp=\dimen105 +\Gm@even@mp=\dimen106 +\Gm@layoutwidth=\dimen107 +\Gm@layoutheight=\dimen108 +\Gm@layouthoffset=\dimen109 +\Gm@layoutvoffset=\dimen110 +\Gm@dimlist=\toks17 +) +(/usr/share/texlive/texmf-dist/tex/latex/graphics/graphicx.sty +Package: graphicx 1999/02/16 v1.0f Enhanced LaTeX Graphics (DPC,SPQR) + +(/usr/share/texlive/texmf-dist/tex/latex/graphics/graphics.sty +Package: graphics 2009/02/05 v1.0o Standard LaTeX Graphics (DPC,SPQR) + +(/usr/share/texlive/texmf-dist/tex/latex/graphics/trig.sty +Package: trig 1999/03/16 v1.09 sin cos tan (DPC) +) +(/usr/share/texlive/texmf-dist/tex/latex/latexconfig/graphics.cfg +File: graphics.cfg 2010/04/23 v1.9 graphics configuration of TeX Live +) +Package graphics Info: Driver file: pdftex.def on input line 91. + +(/usr/share/texlive/texmf-dist/tex/latex/pdftex-def/pdftex.def +File: pdftex.def 2011/05/27 v0.06d Graphics/color for pdfTeX + +(/usr/share/texlive/texmf-dist/tex/generic/oberdiek/infwarerr.sty +Package: infwarerr 2010/04/08 v1.3 Providing info/warning/error messages (HO) +) +(/usr/share/texlive/texmf-dist/tex/generic/oberdiek/ltxcmds.sty +Package: ltxcmds 2011/11/09 v1.22 LaTeX kernel commands for general use (HO) +) +\Gread@gobject=\count90 +)) +\Gin@req@height=\dimen111 +\Gin@req@width=\dimen112 +) +(/usr/share/texlive/texmf-dist/tex/latex/preprint/fullpage.sty +Package: fullpage 1999/02/23 1.1 (PWD) +\FP@margin=\skip43 +) +(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsmath.sty +Package: amsmath 2013/01/14 v2.14 AMS math features +\@mathmargin=\skip44 + +For additional information on amsmath, use the `?' option. +(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amstext.sty +Package: amstext 2000/06/29 v2.01 + +(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsgen.sty +File: amsgen.sty 1999/11/30 v2.0 +\@emptytoks=\toks18 +\ex@=\dimen113 +)) +(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsbsy.sty +Package: amsbsy 1999/11/29 v1.2d +\pmbraise@=\dimen114 +) +(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsopn.sty +Package: amsopn 1999/12/14 v2.01 operator names +) +\inf@bad=\count91 +LaTeX Info: Redefining \frac on input line 210. +\uproot@=\count92 +\leftroot@=\count93 +LaTeX Info: Redefining \overline on input line 306. +\classnum@=\count94 +\DOTSCASE@=\count95 +LaTeX Info: Redefining \ldots on input line 378. +LaTeX Info: Redefining \dots on input line 381. +LaTeX Info: Redefining \cdots on input line 466. +\Mathstrutbox@=\box26 +\strutbox@=\box27 +\big@size=\dimen115 +LaTeX Font Info: Redeclaring font encoding OML on input line 566. +LaTeX Font Info: Redeclaring font encoding OMS on input line 567. +\macc@depth=\count96 +\c@MaxMatrixCols=\count97 +\dotsspace@=\muskip10 +\c@parentequation=\count98 +\dspbrk@lvl=\count99 +\tag@help=\toks19 +\row@=\count100 +\column@=\count101 +\maxfields@=\count102 +\andhelp@=\toks20 +\eqnshift@=\dimen116 +\alignsep@=\dimen117 +\tagshift@=\dimen118 +\tagwidth@=\dimen119 +\totwidth@=\dimen120 +\lineht@=\dimen121 +\@envbody=\toks21 +\multlinegap=\skip45 +\multlinetaggap=\skip46 +\mathdisplay@stack=\toks22 +LaTeX Info: Redefining \[ on input line 2665. +LaTeX Info: Redefining \] on input line 2666. +) +(/usr/share/texlive/texmf-dist/tex/latex/amsfonts/amsfonts.sty +Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support +\symAMSa=\mathgroup4 +\symAMSb=\mathgroup5 +LaTeX Font Info: Overwriting math alphabet `\mathfrak' in version `bold' +(Font) U/euf/m/n --> U/euf/b/n on input line 106. +) +(/usr/share/texlive/texmf-dist/tex/latex/oberdiek/centernot.sty +Package: centernot 2011/07/11 v1.3 Centers the not symbol horizontally (HO) +) +(/usr/share/texlive/texmf-dist/tex/latex/listings/listings.sty +\lst@mode=\count103 +\lst@gtempboxa=\box28 +\lst@token=\toks23 +\lst@length=\count104 +\lst@currlwidth=\dimen122 +\lst@column=\count105 +\lst@pos=\count106 +\lst@lostspace=\dimen123 +\lst@width=\dimen124 +\lst@newlines=\count107 +\lst@lineno=\count108 +\lst@maxwidth=\dimen125 + +(/usr/share/texlive/texmf-dist/tex/latex/listings/lstmisc.sty +File: lstmisc.sty 2013/08/26 1.5b (Carsten Heinz) +\c@lstnumber=\count109 +\lst@skipnumbers=\count110 +\lst@framebox=\box29 +) +(/usr/share/texlive/texmf-dist/tex/latex/listings/listings.cfg +File: listings.cfg 2013/08/26 1.5b listings configuration +)) +Package: listings 2013/08/26 1.5b (Carsten Heinz) + +(/usr/share/texlive/texmf-dist/tex/latex/graphics/color.sty +Package: color 2005/11/14 v1.0j Standard LaTeX Color (DPC) + +(/usr/share/texlive/texmf-dist/tex/latex/latexconfig/color.cfg +File: color.cfg 2007/01/18 v1.5 color configuration of teTeX/TeXLive +) +Package color Info: Driver file: pdftex.def on input line 130. +) +(/usr/share/texmf/tex/latex/xcolor/xcolor.sty +Package: xcolor 2007/01/21 v2.11 LaTeX color extensions (UK) + +(/usr/share/texlive/texmf-dist/tex/latex/latexconfig/color.cfg +File: color.cfg 2007/01/18 v1.5 color configuration of teTeX/TeXLive +) +Package xcolor Info: Driver file: pdftex.def on input line 225. +LaTeX Info: Redefining \color on input line 702. +Package xcolor Info: Model `cmy' substituted by `cmy0' on input line 1337. +Package xcolor Info: Model `hsb' substituted by `rgb' on input line 1341. +Package xcolor Info: Model `RGB' extended on input line 1353. +Package xcolor Info: Model `HTML' substituted by `rgb' on input line 1355. +Package xcolor Info: Model `Hsb' substituted by `hsb' on input line 1356. +Package xcolor Info: Model `tHsb' substituted by `hsb' on input line 1357. +Package xcolor Info: Model `HSB' substituted by `hsb' on input line 1358. +Package xcolor Info: Model `Gray' substituted by `gray' on input line 1359. +Package xcolor Info: Model `wave' substituted by `hsb' on input line 1360. +) +(/usr/share/texlive/texmf-dist/tex/latex/courier-scaled/couriers.sty +Package: couriers 2004/07/10 Scaled Courier (HS) +) +(/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang1.sty +File: lstlang1.sty 2013/08/26 1.5b listings language file +) +(/usr/share/texlive/texmf-dist/tex/latex/etoolbox/etoolbox.sty +Package: etoolbox 2011/01/03 v2.1 e-TeX tools for LaTeX + +(/usr/share/texlive/texmf-dist/tex/latex/etex-pkg/etex.sty +Package: etex 1998/03/26 v2.0 eTeX basic definition package (PEB) +\et@xins=\count111 +) +\etb@tempcnta=\count112 +) +(/usr/share/texlive/texmf-dist/tex/latex/cleveref/cleveref.sty +Package: cleveref 2013/12/28 v0.19 Intelligent cross-referencing +Package cleveref Info: `listings' support loaded on input line 2950. +) +(/usr/share/texlive/texmf-dist/tex/latex/booktabs/booktabs.sty +Package: booktabs 2005/04/14 v1.61803 publication quality tables +\heavyrulewidth=\dimen126 +\lightrulewidth=\dimen127 +\cmidrulewidth=\dimen128 +\belowrulesep=\dimen129 +\belowbottomsep=\dimen130 +\aboverulesep=\dimen131 +\abovetopsep=\dimen132 +\cmidrulesep=\dimen133 +\cmidrulekern=\dimen134 +\defaultaddspace=\dimen135 +\@cmidla=\count113 +\@cmidlb=\count114 +\@aboverulesep=\dimen136 +\@belowrulesep=\dimen137 +\@thisruleclass=\count115 +\@lastruleclass=\count116 +\@thisrulewidth=\dimen138 +) +(/usr/share/texlive/texmf-dist/tex/latex/tools/array.sty +Package: array 2008/09/09 v2.4c Tabular extension package (FMi) +\col@sep=\dimen139 +\extrarowheight=\dimen140 +\NC@list=\toks24 +\extratabsurround=\skip47 +\backup@length=\skip48 +) +(/usr/share/texlive/texmf-dist/tex/latex/paralist/paralist.sty +Package: paralist 2013/06/09 v2.4 Extended list environments +\pltopsep=\skip49 +\plpartopsep=\skip50 +\plitemsep=\skip51 +\plparsep=\skip52 +\pl@lab=\toks25 +) +(/usr/share/texlive/texmf-dist/tex/latex/tools/verbatim.sty +Package: verbatim 2003/08/22 v1.5q LaTeX2e package for verbatim enhancements +\every@verbatim=\toks26 +\verbatim@line=\toks27 +\verbatim@in@stream=\read1 +) +(/usr/share/texlive/texmf-dist/tex/latex/subfig/subfig.sty +Package: subfig 2005/06/28 ver: 1.3 subfig package + +(/usr/share/texlive/texmf-dist/tex/latex/caption/caption.sty +Package: caption 2013/05/02 v3.3-89 Customizing captions (AR) + +(/usr/share/texlive/texmf-dist/tex/latex/caption/caption3.sty +Package: caption3 2013/05/02 v1.6-88 caption3 kernel (AR) +Package caption3 Info: TeX engine: e-TeX on input line 57. +\captionmargin=\dimen141 +\captionmargin@=\dimen142 +\captionwidth=\dimen143 +\caption@tempdima=\dimen144 +\caption@indent=\dimen145 +\caption@parindent=\dimen146 +\caption@hangindent=\dimen147 +) +\c@ContinuedFloat=\count117 +Package caption Info: listings package is loaded. +) +\c@KVtest=\count118 +\sf@farskip=\skip53 +\sf@captopadj=\dimen148 +\sf@capskip=\skip54 +\sf@nearskip=\skip55 +\c@subfigure=\count119 +\c@subfigure@save=\count120 +\c@lofdepth=\count121 +\c@subtable=\count122 +\c@subtable@save=\count123 +\c@lotdepth=\count124 +\sf@top=\skip56 +\sf@bottom=\skip57 +) +(/usr/share/texlive/texmf-dist/tex/latex/fancyhdr/fancyhdr.sty +\fancy@headwidth=\skip58 +\f@ncyO@elh=\skip59 +\f@ncyO@erh=\skip60 +\f@ncyO@olh=\skip61 +\f@ncyO@orh=\skip62 +\f@ncyO@elf=\skip63 +\f@ncyO@erf=\skip64 +\f@ncyO@olf=\skip65 +\f@ncyO@orf=\skip66 +) +(/usr/share/texlive/texmf-dist/tex/latex/sectsty/sectsty.sty +Package: sectsty 2002/02/25 v2.0.2 Commands to change all sectional heading sty +les +) +(/usr/share/texlive/texmf-dist/tex/latex/tocbibind/tocbibind.sty +Package: tocbibind 2010/10/13 v1.5k extra ToC listings +Package tocbibind Info: The document has section divisions on input line 50. + + +Package tocbibind Note: Using section or other style headings. + +) (/usr/share/texlive/texmf-dist/tex/latex/tocloft/tocloft.sty +Package: tocloft 2013/05/02 v2.3f parameterised ToC, etc., typesetting +Package tocloft Info: The document has section divisions on input line 44. +\cftparskip=\skip67 +\cftbeforetoctitleskip=\skip68 +\cftaftertoctitleskip=\skip69 +\cftbeforepartskip=\skip70 +\cftpartnumwidth=\skip71 +\cftpartindent=\skip72 +\cftbeforesecskip=\skip73 +\cftsecindent=\skip74 +\cftsecnumwidth=\skip75 +\cftbeforesubsecskip=\skip76 +\cftsubsecindent=\skip77 +\cftsubsecnumwidth=\skip78 +\cftbeforesubsubsecskip=\skip79 +\cftsubsubsecindent=\skip80 +\cftsubsubsecnumwidth=\skip81 +\cftbeforeparaskip=\skip82 +\cftparaindent=\skip83 +\cftparanumwidth=\skip84 +\cftbeforesubparaskip=\skip85 +\cftsubparaindent=\skip86 +\cftsubparanumwidth=\skip87 +\cftbeforeloftitleskip=\skip88 +\cftafterloftitleskip=\skip89 +\cftbeforefigskip=\skip90 +\cftfigindent=\skip91 +\cftfignumwidth=\skip92 +\cftbeforelottitleskip=\skip93 +\cftafterlottitleskip=\skip94 +\cftbeforetabskip=\skip95 +\cfttabindent=\skip96 +\cfttabnumwidth=\skip97 +\cftbeforesubfigskip=\skip98 +\cftsubfigindent=\skip99 +\cftsubfignumwidth=\skip100 +\cftbeforesubtabskip=\skip101 +\cftsubtabindent=\skip102 +\cftsubtabnumwidth=\skip103 +) +(./Coursework1.aux) +\openout1 = `Coursework1.aux'. + +LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 120. +LaTeX Font Info: ... okay on input line 120. +LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 120. +LaTeX Font Info: ... okay on input line 120. +LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 120. +LaTeX Font Info: ... okay on input line 120. +LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 120. +LaTeX Font Info: ... okay on input line 120. +LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 120. +LaTeX Font Info: ... okay on input line 120. +LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 120. +LaTeX Font Info: ... okay on input line 120. + +*geometry* driver: auto-detecting +*geometry* detected driver: pdftex +*geometry* verbose mode - [ preamble ] result: +* driver: pdftex +* paper: a4paper +* layout: +* layoutoffset:(h,v)=(0.0pt,0.0pt) +* modes: +* h-part:(L,W,R)=(89.62709pt, 418.25368pt, 89.6271pt) +* v-part:(T,H,B)=(101.40665pt, 591.5302pt, 152.11pt) +* \paperwidth=597.50787pt +* \paperheight=845.04684pt +* \textwidth=452.9679pt +* \textheight=670.50687pt +* \oddsidemargin=0.0pt +* \evensidemargin=0.0pt +* \topmargin=0.0pt +* \headheight=0.0pt +* \headsep=0.0pt +* \topskip=11.0pt +* \footskip=30.0pt +* \marginparwidth=59.0pt +* \marginparsep=10.0pt +* \columnsep=10.0pt +* \skip\footins=10.0pt plus 4.0pt minus 2.0pt +* \hoffset=0.0pt +* \voffset=0.0pt +* \mag=1000 +* \@twocolumnfalse +* \@twosidefalse +* \@mparswitchfalse +* \@reversemarginfalse +* (1in=72.27pt=25.4mm, 1cm=28.453pt) + +(/usr/share/texlive/texmf-dist/tex/context/base/supp-pdf.mkii +[Loading MPS to PDF converter (version 2006.09.02).] +\scratchcounter=\count125 +\scratchdimen=\dimen149 +\scratchbox=\box30 +\nofMPsegments=\count126 +\nofMParguments=\count127 +\everyMPshowfont=\toks28 +\MPscratchCnt=\count128 +\MPscratchDim=\dimen150 +\MPnumerator=\count129 +\makeMPintoPDFobject=\count130 +\everyMPtoPDFconversion=\toks29 +) (/usr/share/texlive/texmf-dist/tex/generic/oberdiek/pdftexcmds.sty +Package: pdftexcmds 2011/11/29 v0.20 Utility functions of pdfTeX for LuaTeX (HO +) + +(/usr/share/texlive/texmf-dist/tex/generic/oberdiek/ifluatex.sty +Package: ifluatex 2010/03/01 v1.3 Provides the ifluatex switch (HO) +Package ifluatex Info: LuaTeX not detected. +) +Package pdftexcmds Info: LuaTeX not detected. +Package pdftexcmds Info: \pdf@primitive is available. +Package pdftexcmds Info: \pdf@ifprimitive is available. +Package pdftexcmds Info: \pdfdraftmode found. +) +(/usr/share/texlive/texmf-dist/tex/latex/oberdiek/epstopdf-base.sty +Package: epstopdf-base 2010/02/09 v2.5 Base part for package epstopdf + +(/usr/share/texlive/texmf-dist/tex/latex/oberdiek/grfext.sty +Package: grfext 2010/08/19 v1.1 Manage graphics extensions (HO) + +(/usr/share/texlive/texmf-dist/tex/generic/oberdiek/kvdefinekeys.sty +Package: kvdefinekeys 2011/04/07 v1.3 Define keys (HO) +)) +(/usr/share/texlive/texmf-dist/tex/latex/oberdiek/kvoptions.sty +Package: kvoptions 2011/06/30 v3.11 Key value format for package options (HO) + +(/usr/share/texlive/texmf-dist/tex/generic/oberdiek/kvsetkeys.sty +Package: kvsetkeys 2012/04/25 v1.16 Key value parser (HO) + +(/usr/share/texlive/texmf-dist/tex/generic/oberdiek/etexcmds.sty +Package: etexcmds 2011/02/16 v1.5 Avoid name clashes with e-TeX commands (HO) +Package etexcmds Info: Could not find \expanded. +(etexcmds) That can mean that you are not using pdfTeX 1.50 or +(etexcmds) that some package has redefined \expanded. +(etexcmds) In the latter case, load this package earlier. +))) +Package grfext Info: Graphics extension search list: +(grfext) [.png,.pdf,.jpg,.mps,.jpeg,.jbig2,.jb2,.PNG,.PDF,.JPG,.JPE +G,.JBIG2,.JB2,.eps] +(grfext) \AppendGraphicsExtensions on input line 452. + +(/usr/share/texlive/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg +File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Liv +e +)) +\c@lstlisting=\count131 +Package caption Info: Begin \AtBeginDocument code. +Package caption Info: subfig package v1.3 is loaded. +Package caption Info: End \AtBeginDocument code. +LaTeX Font Info: Try loading font information for U+msa on input line 121. + +(/usr/share/texlive/texmf-dist/tex/latex/amsfonts/umsa.fd +File: umsa.fd 2013/01/14 v3.01 AMS symbols A +) +LaTeX Font Info: Try loading font information for U+msb on input line 121. + +(/usr/share/texlive/texmf-dist/tex/latex/amsfonts/umsb.fd +File: umsb.fd 2013/01/14 v3.01 AMS symbols B +) [1 + +{/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map}] (./Coursework1.aux) ) +Here is how much of TeX's memory you used: + 8563 strings out of 493304 + 133162 string characters out of 6139871 + 232115 words of memory out of 5000000 + 11888 multiletter control sequences out of 15000+600000 + 12993 words of font info for 50 fonts, out of 8000000 for 9000 + 957 hyphenation exceptions out of 8191 + 44i,6n,43p,797b,219s stack positions out of 5000i,500n,10000p,200000b,80000s + +Output written on Coursework1.pdf (1 page, 65162 bytes). +PDF statistics: + 32 PDF objects out of 1000 (max. 8388607) + 22 compressed objects within 1 object stream + 0 named destinations out of 1000 (max. 500000) + 1 words of extra memory for PDF output out of 10000 (max. 10000000) + diff --git a/report/Coursework1.pdf b/report/Coursework1.pdf new file mode 100644 index 0000000..8f6bbe2 Binary files /dev/null and b/report/Coursework1.pdf differ diff --git a/report/Coursework1.synctex.gz b/report/Coursework1.synctex.gz new file mode 100644 index 0000000..7eebe45 Binary files /dev/null and b/report/Coursework1.synctex.gz differ diff --git a/report/Coursework1.tex b/report/Coursework1.tex new file mode 100644 index 0000000..113bc1f --- /dev/null +++ b/report/Coursework1.tex @@ -0,0 +1,139 @@ +% !TEX TS-program = pdflatex +% !TEX encoding = UTF-8 Unicode + +% This is a simple template for a LaTeX document using the "article" class. +% See "book", "report", "letter" for other types of document. + +\documentclass[11pt]{article} % use larger type; default would be 10pt + +\usepackage[utf8]{inputenc} % set input encoding (not needed with XeLaTeX) + +%%% Examples of Article customizations +% These packages are optional, depending whether you want the features they provide. +% See the LaTeX Companion or other references for full information. + +%%% PAGE DIMENSIONS +\usepackage{geometry} % to change the page dimensions +\geometry{a4paper} % or letterpaper (US) or a5paper or.... +% \geometry{margin=2in} % for example, change the margins to 2 inches all round +% \geometry{landscape} % set up the page for landscape +% read geometry.pdf for detailed page layout information + +\usepackage{graphicx} % support the \includegraphics command and options + +% \usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent + +%%% PACKAGES +\usepackage{fullpage} +\usepackage{amsmath} % mathhhhs +\usepackage{amsfonts} % sets n shit +\usepackage{centernot} % easier negations +%source code +\usepackage{listings} +\usepackage{color} +\usepackage{xcolor} +\definecolor{darkRed}{rgb}{0.6,0,0} +\definecolor{darkGreen}{rgb}{0,0.6,0} +\definecolor{darkBlue}{rgb}{0,0,0.6} +\definecolor{grayFifty}{rgb}{0.5,0.5,0.5} +\definecolor{graySixty}{rgb}{0.6,0.6,0.6} +\definecolor{grayc}{rgb}{0.5,0.5,0.5} +\definecolor{brownIsh}{rgb}{0.5,0.25,0} +\definecolor{purplec}{rgb}{0.58,0,0.82} +\definecolor{definec}{rgb}{0.5,0.25,0} + +% general stuff +\usepackage[scaled=1.04]{couriers} +\lstset{ + backgroundcolor=\color{white}, % background colour + basicstyle=\ttfamily\footnotesize, % font style + breakatwhitespace=false, % automatic breaks only at whitespace + breaklines=true, % sets automatic line breaking + captionpos=t, % sets the caption-position to bottom + commentstyle=\color{darkGreen}, % comment style + escapeinside={\%*}{*)}, % LaTeX in code + extendedchars=true, % allow 8-bit non-ASCII characters (does not work with UTF-8) + frame=single, % adds a frame around the code + keepspaces=true, % keeps spaces in text + keywordstyle=\bfseries\color{darkBlue}, % keyword style + numbers=left, % where to put the line-numbers; possible values are (none, left, right) + numbersep=12pt, % how far the line-numbers are from the code + numberstyle=\tiny\color{gray}, % the style that is used for the line-numbers + rulecolor=\color{black}, % if not set, the frame-color may be changed on line-breaks within not-black text + showspaces=false, % show spaces everywhere adding particular underscores; it overrides 'showstringspaces' + showstringspaces=false, % underline spaces within strings only + showtabs=false, % show tabs within strings adding particular underscores + stepnumber=1, % the step between two line-numbers. If it's 1, each line will be numbered + stringstyle=\color{blue}, % string literal style + tabsize=4, % sets default tabsize to 2 spaces + title=\lstname % show the filename of files included with \lstinputlisting; also try caption instead of title +} +% language stuff +\lstset{ + language=Octave, % the language of the code + deletekeywords={...}, % delete key words from language + morekeywords={*,...}, % if you want to add more keywords to the set +} +% hacks +\usepackage{etoolbox} +\usepackage{cleveref} +\usepackage{booktabs} % for much better looking tables +\usepackage{array} % for better arrays (eg matrices) in maths +\usepackage{paralist} % very flexible & customisable lists (eg. enumerate/itemize, etc.) +\usepackage{verbatim} % adds environment for commenting out blocks of text & for better verbatim +\usepackage{subfig} % make it possible to include more than one captioned figure/table in a single float +% These packages are all incorporated in the memoir class to one degree or another... + +%%% HEADERS & FOOTERS +\usepackage{fancyhdr} % This should be set AFTER setting up the page geometry +\pagestyle{fancy} % options: empty , plain , fancy +\renewcommand{\headrulewidth}{0pt} % customise the layout... +\lhead{}\chead{}\rhead{} +\lfoot{}\cfoot{\thepage}\rfoot{} + +%%% SECTION TITLE APPEARANCE +\usepackage{sectsty} +\allsectionsfont{\sffamily\mdseries\upshape} % (See the fntguide.pdf for font help) +% (This matches ConTeXt defaults) + +%%% ToC (table of contents) APPEARANCE +\usepackage[nottoc,notlof,notlot]{tocbibind} % Put the bibliography in the ToC +\usepackage[titles,subfigure]{tocloft} % Alter the style of the Table of Contents +\renewcommand{\cftsecfont}{\rmfamily\mdseries\upshape} +\renewcommand{\cftsecpagefont}{\rmfamily\mdseries\upshape} % No bold! + +%%% END Article customizations + +%%% The "real" document content comes below... + +\title{CS257 Report: Optimising the simulation of $N$-bodies behaving under the influence of gravity in a bounded cubical area} +\author{Alen Buhanec} +\date{} + +\makeatletter +\renewcommand*\env@matrix[1][*\c@MaxMatrixCols c]{% + \hskip -\arraycolsep + \let\@ifnextchar\new@ifnextchar + \array{#1}} +\makeatother + +\begin{document} +\maketitle +\section{Introduction} + +The aim of the coursework was to optimise a simulation of $N$-stars behaving under the influence of gravity in a cubical + +The original code of the simulation revealed a four loop structure (named loop 0 -- 3), with each successive loop depending on the previous loop. Loop 0 cleared the acceleration array, loop 1 then populated the acceleration array based on positions of the stars. Following that, loop 2 updates the velocity array and finally the last loop calculates the new position of the stars and, if a star reaches the edge of the cubical simulation area, it is ``bounce back'' into the area by inverting the appropriate components of its velocity. + + + +\section{Structural Optimisations} + + +\section{Threading} + + +\section{Comparison of Different Versions} + + +\end{document} diff --git a/report/cs132.c b/report/cs132.c new file mode 100644 index 0000000..f144441 --- /dev/null +++ b/report/cs132.c @@ -0,0 +1,47 @@ +#include + +void drawPascalsRow(int nesc); +int fibonacci(int nesc); + +int main(esc) +{ + int i = 1; + // a + printf("Pascal's Triangle:\r\n"esc); + for (i; i < 11; i++esc) + { + drawPascalsRow(iesc); + printf("\r\n"esc); + } + // b + printf("Fibonacci numbers:\r\n"esc); + i = 1; + for (i; i < 11; i++esc) + printf("%d ", fibonacci(iesc)esc); + return 0; +} + +int binomialCoefficient(int n, int resc) +{ + if (n < r || n < 0 || r < 0esc) + return 0; + if (n == r || r == 0esc) + return 1; + return binomialCoefficient(n, r-1esc)*(n+1-resc)esc/r; +} + +void drawPascalsRow(int nesc) +{ + int r = 0; + for (r; r < n; r++esc) + printf("%d ", binomialCoefficient(n-1, resc)esc); +} + +int fibonacci(int nesc) +{ + int r = 0; + int f = 0; + for (r; r <= (n-1esc)esc/2; r++esc) + f = f + binomialCoefficient(n-r-1, resc); + return f; +} diff --git a/report/cs132a.asm b/report/cs132a.asm new file mode 100644 index 0000000..836e4f4 --- /dev/null +++ b/report/cs132a.asm @@ -0,0 +1,27 @@ +| The dividend and divisor +dividend: dc #999 +divisor: dc #77 + +| Allocate memory for results +quotient: ds 1 +remainder: ds 1 + +| Allocate data registers in the following manner: +| cD0 - divisor +| cD1 - quotient +| cD2 - remainder +move divisor, D0 +move #0, D1 +move dividend, D2 + +| Perform the logic loop +div: inc D1 +sub D0, D2 +bgt div +beq fin +add D0, D2 +dec D1 + +| We store the results +fin: move D1, quotient +move D2, remainder \ No newline at end of file diff --git a/report/cs132b.asm b/report/cs132b.asm new file mode 100644 index 0000000..5d19074 --- /dev/null +++ b/report/cs132b.asm @@ -0,0 +1,28 @@ +| The number to check +num: dc #23 + +| Allocate memory for the result +result: ds 1 + +| Allocate data registers in the following manner: +| cD0 - dividend/remainder +| cD1 - divisor +move num, D0 +move #2, D1 + +| Perform the logic loop +div: sub D1, D0 +bgt div +beq fin +inc D1 +move num, D0 +sub D1, D0 +beq prime +move num, D0 +jmp div + +| Set Prime +prime: move #1, D1 + +| We store the result +fin: move D1, result