Cleaned up notebook
Removed ! and added %%bash to all cells that should be interpreted by the command-line shell. This makes copy paste of the relevant lines easier.
This commit is contained in:
parent
cce7f726c0
commit
53ea1d338c
@ -20,7 +20,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": true
|
||||
@ -30,33 +30,33 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Wed Jun 7 13:36:24 2017 \r\n",
|
||||
"+-----------------------------------------------------------------------------+\r\n",
|
||||
"| NVIDIA-SMI 375.66 Driver Version: 375.66 |\r\n",
|
||||
"|-------------------------------+----------------------+----------------------+\r\n",
|
||||
"| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\r\n",
|
||||
"| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\r\n",
|
||||
"|===============================+======================+======================|\r\n",
|
||||
"| 0 GeForce GTX 950 Off | 0000:01:00.0 On | N/A |\r\n",
|
||||
"| 1% 54C P5 11W / 99W | 932MiB / 1996MiB | 0% Default |\r\n",
|
||||
"+-------------------------------+----------------------+----------------------+\r\n",
|
||||
" \r\n",
|
||||
"+-----------------------------------------------------------------------------+\r\n",
|
||||
"| Processes: GPU Memory |\r\n",
|
||||
"| GPU PID Type Process name Usage |\r\n",
|
||||
"|=============================================================================|\r\n",
|
||||
"| 0 1974 G /usr/lib/xorg/Xorg 624MiB |\r\n",
|
||||
"| 0 3776 G ...s-passed-by-fd --v8-snapshot-passed-by-fd 32MiB |\r\n",
|
||||
"| 0 3875 G compiz 106MiB |\r\n",
|
||||
"| 0 4275 G ...el-token=884290AA53D676228DE3F70F025B1D21 133MiB |\r\n",
|
||||
"| 0 4324 G ...s-passed-by-fd --v8-snapshot-passed-by-fd 32MiB |\r\n",
|
||||
"| 0 28457 G /usr/lib/firefox/firefox 1MiB |\r\n",
|
||||
"+-----------------------------------------------------------------------------+\r\n"
|
||||
"Tue Jun 20 13:10:41 2017 \n",
|
||||
"+-----------------------------------------------------------------------------+\n",
|
||||
"| NVIDIA-SMI 375.66 Driver Version: 375.66 |\n",
|
||||
"|-------------------------------+----------------------+----------------------+\n",
|
||||
"| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
|
||||
"| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n",
|
||||
"|===============================+======================+======================|\n",
|
||||
"| 0 GeForce GTX 950 Off | 0000:01:00.0 On | N/A |\n",
|
||||
"| 23% 59C P0 27W / 99W | 690MiB / 1996MiB | 1% Default |\n",
|
||||
"+-------------------------------+----------------------+----------------------+\n",
|
||||
" \n",
|
||||
"+-----------------------------------------------------------------------------+\n",
|
||||
"| Processes: GPU Memory |\n",
|
||||
"| GPU PID Type Process name Usage |\n",
|
||||
"|=============================================================================|\n",
|
||||
"| 0 1982 G /usr/lib/xorg/Xorg 357MiB |\n",
|
||||
"| 0 2997 G compiz 166MiB |\n",
|
||||
"| 0 3233 G /usr/lib/firefox/firefox 1MiB |\n",
|
||||
"| 0 3449 G ...s-passed-by-fd --v8-snapshot-passed-by-fd 25MiB |\n",
|
||||
"| 0 11015 G ...el-token=53D41F0E8A4B8A669C123908959A0849 137MiB |\n",
|
||||
"+-----------------------------------------------------------------------------+\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!nvidia-smi"
|
||||
"%%bash\n",
|
||||
"nvidia-smi"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -222,19 +222,20 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Compiled Successfully!\r\n"
|
||||
"Compiled Successfully!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%bash\n",
|
||||
"# To be sure we see some output from the compiler, we'll echo out \"Compiled Successfully!\" \n",
|
||||
"#(if the compile does not return an error)\n",
|
||||
"!pgcc -fast -o task1_pre_out task1/task1.c && echo \"Compiled Successfully!\""
|
||||
"pgcc -fast -o task1_pre_out task1/task1.c && echo \"Compiled Successfully!\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": true
|
||||
@ -255,14 +256,15 @@
|
||||
" 700, 0.000345\n",
|
||||
" 800, 0.000302\n",
|
||||
" 900, 0.000269\n",
|
||||
" total: 2.884395 s\n"
|
||||
" total: 2.815460 s\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%bash\n",
|
||||
"# Execute our single-thread CPU-only Jacobi Iteration to get timing information. Make sure you compiled successfully in the \n",
|
||||
"# above command first.\n",
|
||||
"!./task1_pre_out"
|
||||
"./task1_pre_out"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -301,7 +303,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": true
|
||||
@ -347,13 +349,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pgprof"
|
||||
"%%bash\n",
|
||||
"pgprof"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -441,49 +444,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"GetTimer:\n",
|
||||
" 3, include \"timer.h\"\n",
|
||||
" 62, FMA (fused multiply-add) instruction(s) generated\n",
|
||||
"main:\n",
|
||||
" 23, Loop not fused: function call before adjacent loop\n",
|
||||
" Loop not vectorized: may not be beneficial\n",
|
||||
" Unrolled inner loop 8 times\n",
|
||||
" Generated 7 prefetches in scalar loop\n",
|
||||
" 34, Loop not vectorized/parallelized: potential early exits\n",
|
||||
" 38, Generating implicit copyout(Anew[1:1022][1:1022])\n",
|
||||
" Generating implicit copyin(A[:][:])\n",
|
||||
" Generating implicit copyout(A[1:1022][1:1022])\n",
|
||||
" 41, Loop is parallelizable\n",
|
||||
" 43, Loop is parallelizable\n",
|
||||
" Accelerator kernel generated\n",
|
||||
" Generating Tesla code\n",
|
||||
" 41, #pragma acc loop gang, vector(4) /* blockIdx.y threadIdx.y */\n",
|
||||
" 43, #pragma acc loop gang, vector(32) /* blockIdx.x threadIdx.x */\n",
|
||||
" 47, Generating implicit reduction(max:error)\n",
|
||||
" 52, Loop is parallelizable\n",
|
||||
" 54, Loop is parallelizable\n",
|
||||
" Accelerator kernel generated\n",
|
||||
" Generating Tesla code\n",
|
||||
" 52, #pragma acc loop gang, vector(4) /* blockIdx.y threadIdx.y */\n",
|
||||
" 54, #pragma acc loop gang, vector(32) /* blockIdx.x threadIdx.x */\n",
|
||||
"Compiled Successfully\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%bash\n",
|
||||
"# Compile the task2.c file with the pgcc compiler\n",
|
||||
"# -acc tells the compiler to process the source recognizing #pragma acc directives\n",
|
||||
"# -Minfo tells the compiler to share information about the compilation process\n",
|
||||
"!pgcc -acc -Minfo -fast -ta=tesla -o task2_out task2/task2.c && echo \"Compiled Successfully\""
|
||||
"pgcc -acc -Minfo -fast -ta=tesla -o task2_out task2/task2.c && echo \"Compiled Successfully\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -541,32 +512,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Jacobi relaxation Calculation: 1024 x 1024 mesh\n",
|
||||
" 0, 0.250000\n",
|
||||
" 100, 0.002397\n",
|
||||
" 200, 0.001204\n",
|
||||
" 300, 0.000804\n",
|
||||
" 400, 0.000603\n",
|
||||
" 500, 0.000483\n",
|
||||
" 600, 0.000403\n",
|
||||
" 700, 0.000345\n",
|
||||
" 800, 0.000302\n",
|
||||
" 900, 0.000269\n",
|
||||
" total: 3.403485 s\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!./task2_out"
|
||||
"%%bash\n",
|
||||
"./task2_out"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -649,37 +602,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"main:\n",
|
||||
" 34, Generating create(Anew[:][:])\n",
|
||||
" Generating copy(A[:][:])\n",
|
||||
" 42, Loop is parallelizable\n",
|
||||
" 44, Loop is parallelizable\n",
|
||||
" Accelerator kernel generated\n",
|
||||
" Generating Tesla code\n",
|
||||
" 42, #pragma acc loop gang, vector(4) /* blockIdx.y threadIdx.y */\n",
|
||||
" 44, #pragma acc loop gang, vector(32) /* blockIdx.x threadIdx.x */\n",
|
||||
" 48, Generating implicit reduction(max:error)\n",
|
||||
" 53, Loop is parallelizable\n",
|
||||
" 55, Loop is parallelizable\n",
|
||||
" Accelerator kernel generated\n",
|
||||
" Generating Tesla code\n",
|
||||
" 53, #pragma acc loop gang, vector(4) /* blockIdx.y threadIdx.y */\n",
|
||||
" 55, #pragma acc loop gang, vector(32) /* blockIdx.x threadIdx.x */\n",
|
||||
"Compiled Successfully\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pgcc -fast -acc -Minfo=accel -ta=tesla -o task3_out task3/task3.c && echo \"Compiled Successfully\""
|
||||
"%%bash\n",
|
||||
"pgcc -fast -acc -Minfo=accel -ta=tesla -o task3_out task3/task3.c && echo \"Compiled Successfully\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -691,32 +621,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Jacobi relaxation Calculation: 1024 x 1024 mesh\n",
|
||||
" 0, 0.250000\n",
|
||||
" 100, 0.002397\n",
|
||||
" 200, 0.001204\n",
|
||||
" 300, 0.000804\n",
|
||||
" 400, 0.000603\n",
|
||||
" 500, 0.000483\n",
|
||||
" 600, 0.000403\n",
|
||||
" 700, 0.000345\n",
|
||||
" 800, 0.000302\n",
|
||||
" 900, 0.000269\n",
|
||||
" total: 0.601428 s\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!./task3_out"
|
||||
"%%bash\n",
|
||||
"./task3_out"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -756,79 +668,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Jacobi relaxation Calculation: 1024 x 1024 mesh\n",
|
||||
" 0, 0.250000\n",
|
||||
" 100, 0.002397\n",
|
||||
" 200, 0.001204\n",
|
||||
" 300, 0.000804\n",
|
||||
" 400, 0.000603\n",
|
||||
" 500, 0.000483\n",
|
||||
" 600, 0.000403\n",
|
||||
" 700, 0.000345\n",
|
||||
" 800, 0.000302\n",
|
||||
" 900, 0.000269\n",
|
||||
" total: 0.581272 s\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"main:\n",
|
||||
" 34, Generating create(Anew[:][:])\n",
|
||||
" Generating copy(A[:][:])\n",
|
||||
" 42, Loop is parallelizable\n",
|
||||
" 44, Loop is parallelizable\n",
|
||||
" Accelerator kernel generated\n",
|
||||
" Generating Tesla code\n",
|
||||
" 42, #pragma acc loop gang, vector(4) /* blockIdx.y threadIdx.y */\n",
|
||||
" 44, #pragma acc loop gang, vector(32) /* blockIdx.x threadIdx.x */\n",
|
||||
" 48, Generating implicit reduction(max:error)\n",
|
||||
" 53, Loop is parallelizable\n",
|
||||
" 55, Loop is parallelizable\n",
|
||||
" Accelerator kernel generated\n",
|
||||
" Generating Tesla code\n",
|
||||
" 53, #pragma acc loop gang, vector(4) /* blockIdx.y threadIdx.y */\n",
|
||||
" 55, #pragma acc loop gang, vector(32) /* blockIdx.x threadIdx.x */\n",
|
||||
"\n",
|
||||
"Accelerator Kernel Timing data\n",
|
||||
"/home/fokke/OpenACC/labs/lab1/notebook/C/task3/task3.c\n",
|
||||
" main NVIDIA devicenum=0\n",
|
||||
" time(us): 425,590\n",
|
||||
" 34: data region reached 2 times\n",
|
||||
" 34: data copyin transfers: 1\n",
|
||||
" device time(us): total=352 max=352 min=352 avg=352\n",
|
||||
" 68: data copyout transfers: 1\n",
|
||||
" device time(us): total=336 max=336 min=336 avg=336\n",
|
||||
" 37: compute region reached 1000 times\n",
|
||||
" 37: data copyin transfers: 1000\n",
|
||||
" device time(us): total=2,452 max=13 min=2 avg=2\n",
|
||||
" 44: kernel launched 1000 times\n",
|
||||
" grid: [32x256] block: [32x4]\n",
|
||||
" device time(us): total=307,190 max=311 min=305 avg=307\n",
|
||||
" elapsed time(us): total=318,460 max=341 min=316 avg=318\n",
|
||||
" 44: reduction kernel launched 1000 times\n",
|
||||
" grid: [1] block: [256]\n",
|
||||
" device time(us): total=13,053 max=19 min=13 avg=13\n",
|
||||
" elapsed time(us): total=24,253 max=47 min=23 avg=24\n",
|
||||
" 44: data copyout transfers: 1000\n",
|
||||
" device time(us): total=7,380 max=20 min=7 avg=7\n",
|
||||
" 55: kernel launched 1000 times\n",
|
||||
" grid: [32x256] block: [32x4]\n",
|
||||
" device time(us): total=94,827 max=118 min=92 avg=94\n",
|
||||
" elapsed time(us): total=108,023 max=1,019 min=104 avg=108\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%bash\n",
|
||||
"export PGI_ACC_TIME=1\n",
|
||||
@ -928,22 +772,24 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pgcc -acc -Minfo=accel -fast -ta=tesla -o task4_out task4/task4.c && echo \"Compiled Successfully\""
|
||||
"%%bash\n",
|
||||
"pgcc -acc -Minfo=accel -fast -ta=tesla -o task4_out task4/task4.c && echo \"Compiled Successfully\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!./task4_out"
|
||||
"%%bash\n",
|
||||
"./task4_out"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -957,7 +803,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -985,11 +831,12 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pgcc -acc -fast -ta=tesla -Minfo=accel -o task4_out task4/task4.c && echo \"Compiled Successfully\""
|
||||
"%%bash\n",
|
||||
"pgcc -acc -fast -ta=tesla -Minfo=accel -o task4_out task4/task4.c && echo \"Compiled Successfully\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -1003,11 +850,12 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!./task4_out"
|
||||
"%%bash\n",
|
||||
"./task4_out"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -1038,7 +886,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -1059,11 +907,12 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!OMP_NUM_THREADS=8 ./task4_4096_omp"
|
||||
"%%bash\n",
|
||||
"OMP_NUM_THREADS=8 ./task4_4096_omp"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -1077,22 +926,24 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pgcc -acc -fast -ta=tesla -Minfo=accel -o task4_4096_out task4/task4_4096_solution.c && echo \"Compiled Successfully\""
|
||||
"%%bash\n",
|
||||
"pgcc -acc -fast -ta=tesla -Minfo=accel -o task4_4096_out task4/task4_4096_solution.c && echo \"Compiled Successfully\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!./task4_4096_out"
|
||||
"%%bash\n",
|
||||
"./task4_4096_out"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user