From 7d227efcd4daae9fdaca20c4d7da276c12c6de5a Mon Sep 17 00:00:00 2001 From: Modem514 Date: Sat, 14 Jan 2023 20:14:18 +0800 Subject: [PATCH] W7D1 & W10D2 & W14D1: Yang Jincheng's Arch Notes --- W10D2/menu.md | 1 + W10D2/w10d2-yjc/Arch-Note-W10D2.md | 69 +++++++++++++++ W10D2/w10d2-yjc/daisy.svg | 4 + W10D2/w10d2-yjc/interruption.svg | 4 + W10D2/w10d2-yjc/modern.svg | 4 + W14D1/menu.md | 1 + W14D1/w14d1-yjc/Arch-Note-W14D1.md | 98 +++++++++++++++++++++ W7D1/menu.md | 1 + W7D1/w7d1-yjc/Arch-Note-W7D1.md | 132 +++++++++++++++++++++++++++++ W7D1/w7d1-yjc/prefetching.svg | 4 + 10 files changed, 318 insertions(+) create mode 100644 W10D2/w10d2-yjc/Arch-Note-W10D2.md create mode 100644 W10D2/w10d2-yjc/daisy.svg create mode 100644 W10D2/w10d2-yjc/interruption.svg create mode 100644 W10D2/w10d2-yjc/modern.svg create mode 100644 W14D1/w14d1-yjc/Arch-Note-W14D1.md create mode 100644 W7D1/w7d1-yjc/Arch-Note-W7D1.md create mode 100644 W7D1/w7d1-yjc/prefetching.svg diff --git a/W10D2/menu.md b/W10D2/menu.md index e69de29..5a95a57 100644 --- a/W10D2/menu.md +++ b/W10D2/menu.md @@ -0,0 +1 @@ +[Yang Jincheng's Arch Note](w10d2-yjc/Arch-Note-W10D2.md) diff --git a/W10D2/w10d2-yjc/Arch-Note-W10D2.md b/W10D2/w10d2-yjc/Arch-Note-W10D2.md new file mode 100644 index 0000000..3de2c1b --- /dev/null +++ b/W10D2/w10d2-yjc/Arch-Note-W10D2.md @@ -0,0 +1,69 @@ +# Arch Note W10 D2 + +> written by Yang Jin-cheng + +## Async v.s. Sync + +> async technique +> +> H/W done by interruption +> +> S/W done by call + ++ Interruption I/O + ++ Internet + + + ATM (async transfer mode) / ISDN model + + ATM is widely used in the US + + ISDN is mainly used in the Euro + +### Intel's Solution for H/W Interruption + +![](interruption.svg) + +8086: central controller + +8259: interruption controller + +INT: external device tell core to stall and switch I/O port (a unilateral signal) + +IC design: handle INT signals, can be multi-stage + +### Modern Solution for H/W Interruption + +#### APIC + +![](modern.svg) + +APIC: advanced programmable IC + +signal path: I/O device -> bus -> APIC -> core + +#### Daisy Chain + +![](daisy.svg) + +rely other I/O devices on INT signal transmission + +relatively slow but highly scalable + +#### Internet Async Solution + ++ MIN: multi-stage interchange network (faster) + + O(nlogn) time complexity in data interchange + ++ SDN: software defined network (slower) + + better robust and scalability + ++ ISDN: intergrade service digital network + + VC (virtual connection) technique + + use telephone network (sync) for internet transformation (async) + ++ TCP/IP: devide data into packages (better robust) diff --git a/W10D2/w10d2-yjc/daisy.svg b/W10D2/w10d2-yjc/daisy.svg new file mode 100644 index 0000000..b51853c --- /dev/null +++ b/W10D2/w10d2-yjc/daisy.svg @@ -0,0 +1,4 @@ + + + +
core
core
I/O
I/O
I/O
I/O
I/O
I/O
Text is not SVG - cannot display
\ No newline at end of file diff --git a/W10D2/w10d2-yjc/interruption.svg b/W10D2/w10d2-yjc/interruption.svg new file mode 100644 index 0000000..5d02a0c --- /dev/null +++ b/W10D2/w10d2-yjc/interruption.svg @@ -0,0 +1,4 @@ + + + +
core
core
INT
INT
IC
8259
IC...
IC
8259
IC...
BUS
BUS
I/O
I/O
I/O
I/O
Text is not SVG - cannot display
\ No newline at end of file diff --git a/W10D2/w10d2-yjc/modern.svg b/W10D2/w10d2-yjc/modern.svg new file mode 100644 index 0000000..0f334ca --- /dev/null +++ b/W10D2/w10d2-yjc/modern.svg @@ -0,0 +1,4 @@ + + + +
core
core
APIC
APIC
BUS
BUS
I/O
I/O
Text is not SVG - cannot display
\ No newline at end of file diff --git a/W14D1/menu.md b/W14D1/menu.md index e69de29..3daae7f 100644 --- a/W14D1/menu.md +++ b/W14D1/menu.md @@ -0,0 +1 @@ +[Yang Jincheng's Arch Note](w14d1-yjc/Arch-Note-W14D1.md) diff --git a/W14D1/w14d1-yjc/Arch-Note-W14D1.md b/W14D1/w14d1-yjc/Arch-Note-W14D1.md new file mode 100644 index 0000000..62ba6fe --- /dev/null +++ b/W14D1/w14d1-yjc/Arch-Note-W14D1.md @@ -0,0 +1,98 @@ +# Arch Note W14D1 + +> written by Yang-Jincheng + +## Assessment of Architecture + +```mermaid +graph LR; + +arch{Arch} +func[Function] +perf[Performance] +idea[Principle] + +arch --- func +arch --- perf +perf --- idea + + +idea --- A[small] +idea --- B[simple] +idea --- C[tradeoff/Compromise] +idea --- D[Amdahl's Law] + + +perf --- loc[Locolity] +perf --- para[Parallelism] +``` + +## Function + +based on Von Neumann's architecture + +### Performance + +metric: CPI (clock per instruction) + +perf is about reducing latency + +#### Principles + ++ Small: the smaller, the faster + ++ Simple: easy to process, like RISC + ++ Tradeoff / Compromise + ++ Amdahl's Law: pay attention to performance of the most common part + + $S_p = \dfrac{1}{(1-\eta) + \eta/s}$ + +### Parallelism + +#### Pipeline (ILP) + ++ **basic principle** + + balance among stages; speed up = N - stages + ++ **Hazard** + + stalls & distance + + + **Structural Hazard: FU conflict** + + for example, memory conflict + + solution: I/D-cache + + + **Data Hazard: data dependency** + + true dependency (RAW) + + solution: forwarding (small distance); out of order / move code (large distance) + + pseudo dependency + + + **Control Hazard: Branch & Jump** + + solution: early branch prediction; calculation delay (BTB); kill branch; delay slot filling + +#### Super Scalar + +### Locality + +introduce Cache + +$$ +\mathrm{AMAT=HitTime + MissRate} \times \mathrm{PenaltyTime} +$$ + +Design of Cache + ++ Direct Mapping + ++ Full Association + ++ Set Association diff --git a/W7D1/menu.md b/W7D1/menu.md index 033ea6f..23f9d20 100644 --- a/W7D1/menu.md +++ b/W7D1/menu.md @@ -1 +1,2 @@ - [钟逸超](Note_W7D1_Yichao_Zhong.md) ++ [Yang Jincheng's Arch Note](w7d1-yjc/Arch-Note-W7D1.md) diff --git a/W7D1/w7d1-yjc/Arch-Note-W7D1.md b/W7D1/w7d1-yjc/Arch-Note-W7D1.md new file mode 100644 index 0000000..ee521a4 --- /dev/null +++ b/W7D1/w7d1-yjc/Arch-Note-W7D1.md @@ -0,0 +1,132 @@ +# Arch Note W7D1 + +> written by Yang Jin-cheng + +## Review + +reduce Average Memory Access Time + +$\mathrm{AMAT = HitTime + MissRate} \times \mathrm{MissPenalty}$ + +## H/W Prefetching + +![](prefetching.svg) + +write buffer can implemented based on: a) victim cache; b) wirte through design + +## S/W Prefetching + +1. Binding Prefetch: Loading data from MEM to register + + + uses LD instructions + + + takes up 1 regsiter + +2. Non-Binding Prefetch: Laoding data from MEM to cache + + + uses specific instructions, TOUCH for example + + + doesn't take up any register + +> related concept: MHSR (outstanding memory access) + +## Compiler Optimization + +### Merging Arrays + +Before: + +```cpp +// BEFORE +int val[SIZE]; +int key[SIZE]; + +// AFTER +struct merge { + int val; + int key; +}; +``` + +In this optimization, the expected access sequence is close to the location sequence of data. Thus spatial locality is improved. + +### Loop Interchange + +```cpp +// BEFORE +for (k = 0; k < 100; k = k + 1) + for (j = 0; j < 100; j = j + 1) + for (i = 0; i < 5000; i = i + 1) + x[i][j] = 2 * x[i][j]; + +// AFTER +for (k = 0; k < 100; k = k + 1) + for (i = 0; i < 5000; i = i + 1) + for (j = 0; j < 100; j = j + 1) + x[i][j] = 2 * x[i][j]; +``` + +adjust data acesss order, spatial locality improved + +### Loop Fusion + +```cpp +// BEFORE +for (i = 0; i < N; i = i + 1) + for (j = 0; j < N; j = j + 1) + a[i][j] = 1/b[i][j] + c[i][j]; +for (i = 0; i < N; i = i + 1) + for (j = 0; j < N; j = j + 1) + d[i][j] = a[i][j] + c[i][j]; + +// AFTER +for (i = 0; i < N; i = i + 1) + for (j = 0; j < N; j = j + 1) { + a[i][j] = 1/b[i][j] + c[i][j]; + d[i][j] = a[i][j] + c[i][j]; + } +``` + +completes related calculation in a singel loop + +spatial locality improved, increasing the chance of cache hit + +### Blocking + +```cpp +// BEFORE +for (i = 0; i < N; i = i + 1) + for (j = 0; j < N; j = j + 1) { + r = 0; + for (k = 0; k < N; k = k + 1) + r = r + y[i][k] * z[k][j]; + x[i][j] = r; + } + +// AFTER +for (jj = 0; jj < N; jj = jj + B) + for (kk = 0; kk < N; kk = kk + B) + for (i = 0; i < N; i = i + 1) + for (j = jj; j < min(jj + B - 1, N); j = j + 1) { + r = 0; + for (k = kk; k < min(kk + B - 1, N); k = k + 1) + r = r + y[i][k] * z[k][j]; + x[i][j] = r; + } +``` + +reduce the chance of cache conflict + +the perfermance of this optimization is related to the value of `B` + +## Development of MEM + ++ SRAM (R-S latch based) vs DRAM (capacitor based) + ++ EDO vs FP + ++ rambus (to support outstanding memory acess) + ++ SDRAM (sync cpu & mem) + ++ DDR (double digit rate) diff --git a/W7D1/w7d1-yjc/prefetching.svg b/W7D1/w7d1-yjc/prefetching.svg new file mode 100644 index 0000000..e0b5936 --- /dev/null +++ b/W7D1/w7d1-yjc/prefetching.svg @@ -0,0 +1,4 @@ + + + +
core
core
va
va
data
data
pa
pa
cache
cache
MEM
MEM
stream buffer
stream buff...
write buffer
write buffer
Text is not SVG - cannot display
\ No newline at end of file