# Die Gra kkarte als Parallelcomputer - Institut für Informatik

Die Gra kkarte als Parallelcomputer - Institut für Informatik

1024×1024

1024×1024

S

|S| p

SpeedUp = O(S)/O(S/p) = O(p)

S

S

S

(x1, y1, z, x2, y2, z) ∈ N 6 y1 ≤ y2, x1 ≤ x2

(y2−y1)· (x2−x1)

(x, y) ∈ N 2 x1 ≤ x ≤ x2 y1 ≤ y ≤ y2

S

A ∈ R n×n

float

{(0, 0), (0, n − 1), (n − 1, 0)}

S

|S| p

SpeedUp = O(S)/O(S/p) = O(p)

S

S

S

(x1, y1, z, x2, y2, z) ∈ N 6 y1 ≤ y2, x1 ≤ x2

(y2−y1)· (x2−x1)

(x, y) ∈ N 2 x1 ≤ x ≤ x2 y1 ≤ y ≤ y2

S

A ∈ R n×n

float

{(0, 0), (0, n − 1), (n − 1, 0)}

S

|S| p

SpeedUp = O(S)/O(S/p) = O(p)

S

S

S

(x1, y1, z, x2, y2, z) ∈ N 6 y1 ≤ y2, x1 ≤ x2

(y2−y1)· (x2−x1)

(x, y) ∈ N 2 x1 ≤ x ≤ x2 y1 ≤ y ≤ y2

S

A ∈ R n×n

float

{(0, 0), (0, n − 1), (n − 1, 0)}

T n

T = n i=1 Ti Ti Si

Ti

Ti

T n

T = n i=1 Ti Ti Si

Ti

Ti

T n

T = n i=1 Ti Ti Si

Ti

Ti

m

m

n ⌈n/32⌉ 32| n

X

X

128 · 1.75 · 10 9 · 2 = 448

P

Si ⊆ P

P

P

Si Si = P

P

P

P P ′ ⊂ P

P ′

P ′ Si

S0, . . . , S15 |Si| = 8

Si

Si

Si

[3×2] [2×2]

(x, y, z)

(x, y, z)

(x, y, z)

x, y, z

P

Si

Si

P

Si

Si

P

Si

Si

I K O

P = (I, K, O)

Σ

P = (I, K, O) t(P ) = t(I) + t(K) + t(O)

K t(I) ≈ t(O)

t(K) ≫ t(I) + t(O)

t(K)

t(K) ≪ t(I) + t(O)

t(K) ≈ t(I) + t(O)

Pi = (Ii, Ki, Oi)

t(Ki) ≈ t(Ii) + t(Oi) t(Ii) ≈ t(Oi)

Pi = (Ii, Ki, Oi)

Ki

Ki

Ki

P p = |P | t

M Mem(M)

p ∈ P

Mem(L)

P = h i=1 Gi h g = |Gi|

Gi Mem(G)

Gi

Tlok Tglob Tlok ≪ Tglob

Gi

Gi

h ≤ hsyncmax hsyncmax ∈ N

Gi

32

32

csimd 1 ≤ csimd ≤ 32

wcShR = 5 wcShW = 16

cconf 1 ≤ cconf ≤ max{wcShR, wcShW } = 16

gmax = 512 = 2 9

hmax = 65536 3 = 2 48

pmax = bmax · gmax = 2 57

m = 16

k = 8

K = m · k = 128

tH = 1600MHz

Msize = 1.5GB

Ssize = 16kB

Regtime ≈ 1/th

Memtime ≈ 400/tH

P

p 1 ≤ p ≤ pmax

g 1 ≤ g ≤ gmax

h 1 ≤ h ≤ hmax

hsyncmax hsyncmax = m

t t ≈ tH · 1/⌈h/K⌉ · 1/(cconf · csimd)

Mem(M) Mem(M) = Msize

Mem(G) 0 ≤ Mem(G) < Ssize − g · Mem(L)

Mem(L) 0 ≤ Mem(L) < (Ssize − Mem(G))/g

Tlok Tlok ≈ cconf · Regtime

Tglob Tglob ≈ Memtime < K · Memtime

Dq

q Q Dq = ICQ · CP I · T

ICQ Q

CP I

T T Dq = Θ(ICQ) ICq = Θ(Dq)

Dexe

p Dexe = O(

q∈P Dq) =

O(p · maxq∈P Dq)

h ≤ hsyncmax

Dexe = O(maxi q∈Gi Dq)

Gi

Dexe = Θ(maxq∈P Dq)

ICsimQ = Θ(ICQ) ICsimQ

q CP Isim

CP I

Dsimq = ICsimQ · CP Isim · Tsim Tsim := 1/t

t ≈ tH · 1/⌈h/K⌉ · 1/(cconf · csimd)

tH 1/(cconf · csimd) 1/⌈h/K⌉

h h ≤ hmax p

p h h = Θ(p) Tsim = Θ(p)

Dsimq = Θ(ICQ) · Θ(p) = Θ(Dq) · Θ(p)

Dsimexe = O(p · max

q∈P Dsimq) = O(p · max

q∈P Θ(Dq) · Θ(p)) transitiv

= O(p · max

q∈P Dq) · O(p)

SIM := Dsimexe/Dexe = O(p)

O(p)

• n Dexe = O(f(n))

Dsimexe = O(f(n))

ICQ

ICQ = O(f(n)) Dexe Dsimexe

p ≤ K

p > K

p ≤ K

p ′ p ′ ≤ K

p ≤ K

p > K

p ≤ K

p ′ p ′ ≤ K

=1

=1

=1

[0, 1] [−1, 1]

[0, 1] nDim

− −−

∗∗

− −

∗∗

− −−

∗∗

− −

∗∗

tB 192 ≤ tB = k · 64 k ∈ N

tB ≥ 192 tB = k· 64

tB 192 ≤ tB = k · 64 k ∈ N

tB ≥ 192 tB = k· 64

pB B 16 > pB > 1

B pB ≥ 1

B pB = 16

wcShW = 16

wcShR

wcShR ≤ max{pBi | i = 0...15} wcShR ≤ |{i | pBi ≥ 1}|

(pB0 , pB1 , . . . , pB15 ) = (1, 2, 3, 4, 5, 1, 0, . . . , 0)

B5

wcShR = 5

K K ∈ {4, 8, 16}

Adresse = j· K j ∈ N0

i K

O(n 2.376... )

O(n 2.807... )

A, B /∈ K2k ×2k k ∈ N0

2

3 n2

O(n 3 ) O(m · n · p)

A ∈ K m×n , B ∈ K n×p m, n, p ∈ N K ∈ {Rfloat32, Zint32}

A· B =: C ∈ K m×p

A =: (ai,j)i=1...m,j=1...n B C

B

r =1...m s=1...p m · p

cr,s =

n

i=1

ar,i · bi,s

cr,s

C

n A B 1

C 2 · m · n · p m · p

m · p O(n)

cr,s

cr,s

A B

w × w Cr,s C

n/w

Cr,s =

i=1

Ar,i · Bi,s

A B

⎞ ⎛

× × × × B1,s ×

× × ×

⎝Ar,1

. . . A n

r, ⎠ ⎜

· ⎝

w × × ⎠ = ⎝ × Cr,s × ⎠

× × ×

× × × ×

× B n

w ,s

w × w w | m, n, p

C m p

w × w

w = 16 16 2 = 256

192 ≤ 256 = 4 · 64

512 22 2 < 512

w ∈ {17, ..., 22}

64 | w 2

w = 16 16×16

A

B

Ar,i Bi,s

A

B

(x, y) 0 ≤ x, y < w

Ar,i Bi,s

2 · w 2 · 4 2048

w = 16

Ar,i[y][x] Bi,s[y][x]

A B

Ar,i Br,i

y Ar,i

x Bi,s w

w

Br,i

Ai,s w = 16

(x, y) Cr,s[y][x]

C

C

Ar,i Bi,s

w

Ar,i · Bi,s

2048

w = 32 8192

32 2 > 512

Ar,i · Bi,s w = 32 Ar,i · Bi,s+k· w

k = 0...3 w = 16

Ar,i Bi,s

Cr,s

w × w

Ar,i Bi,s

16 · 2

Ar,i

w

w = 16

w = 4...22

w = 16

w = 4...22 w = 16

16 × 16

w = 8 w = 22 w = 8 + 1

w = 16 + 1

w

w = 24 w = 32

w = 16

w = 16

N = M = P

N

2 · N 3

4 · ( 2

w N 3 + N 2 )

w = 16

N = M = P

N

2 · N 3

4 · ( 2

w N 3 + N 2 )

(nr × nc)

ni no

{0, 1}

nn

F ⊆ {b | b : {0, 1} nn → {0, 1}} nf = |F | ≤ 22nn

l

G

P = (G, ni, no, nn, F, nf, nr, nc, l)

(G, 4, 3, 2, {And, Or, Xor}, 3, 5, 5, 2)

25

2×2

(in3 in2)2 +(in1 in0)2 = (out2 out1 out0)2 out2

1110 101

(11)2 + (10)2 = (101)2 (3)10 + (2)10 = (5)10

[0, 1] 1

2 ni

[0, 1] 1

G

[0, 1]

hCumul := 0

inV al ∈ {0, 1} ni

curCol ∈ {0, ..., nc − 1}

node curCol

node

outi i ∈ {0, ..., no − 1}

outi

outi IstOut(inV al) := (out0, ..., outno−1)

SollOut(inV al) inV al

hCumul hamming(SollOut(inV al), IstOut(inV al))

fitness := 1 − hCumul/(2 ni · no) ∈ [0, 1]

inV al ∈ {0, 1} ni

{0, . . . , 2 ni − 1} ⊂ N0

nr

nr × nc

l

s > 1

i

0 ≤ i < s inV al + i

s

s nr

nr· s

K

i inV al + K · i + k k = 0...K −1

K · s

K = 32

nr K

s nr

0...ni − 1

s · 32

32 · K hCumul

Si hCumuli

hCumuli

K

hk hCumuli += K−1

k=0 hk

nr

K

{0, ..., 2 ni } = iKi Ki

hCumuli Ki

nn = 2 |F | ≤ 16

{0, . . . , maxItemId} maxItemId := ni + nr · nc − 1

0 ni−1

maxItemId ≤ 2 14 3...0

b : {0, 1} 2 → {0, 1} cb ∈ {0, ..., 16} b

b((in1, in0)) = (cb ≫ (in1in0)2) & 1

nc · nr

(col, row)

32 × 16

K = 32

[j][k] k = 0...ni − 1 k

j k ≥ ni

k

nr

c

32

nr < 16

w = ⌈16/nr⌉

w

w

hCumuli i

nr

∗ ∗

hCumuli

myRow ≥ P T

hCumul =

s−1

i=0 hCumuli

maxItemId

128

curCol l

l < nc

l

nr ≤ 512 nc

nr l

K = k · 32 k > 1

k K = 64

(nr, nc) = (32, 16)

(32 · 16 + 32) · 4 = 2176

ni ni

K = 32

ni

(G, ni, 4, 2, F, 9, 32, 16, 8) F

ni = 4, 6, 8, ..., 28

ni

ni

ni

ni

ni

10 8

ni ≥ 6 1.4· 10 9 ni ≥ 20

1.55 · 10 9

0.6· 10 8 ni = 6 ni

0.4 · 10 8 ni = 28

2 ni

ni nr · nc

ni

ni ≤ 28

ni ≥ 6 1.4· 10 9 ni ≥ 20

1.55 · 10 9

0.6· 10 8 ni = 6 ni

0.4 · 10 8 ni = 28

2 ni

ni nr · nc

ni

ni ≤ 28

ni ≥ 6 1.4· 10 9 ni ≥ 20

1.55 · 10 9

0.6· 10 8 ni = 6 ni

0.4 · 10 8 ni = 28

2 ni

ni nr · nc

ni

ni ≤ 28

ni ≥ 6 1.4· 10 9 ni ≥ 20

1.55 · 10 9

0.6· 10 8 ni = 6 ni

0.4 · 10 8 ni = 28

2 ni

ni nr · nc

ni

ni ≤ 28

ni ≥ 6 1.4· 10 9 ni ≥ 20

1.55 · 10 9

0.6· 10 8 ni = 6 ni

0.4 · 10 8 ni = 28

2 ni

ni nr · nc

ni

ni ≤ 28

More magazines by this user
Similar magazines