Original files from Nvidia for the OpenACC course:
OpenACC - 2X in 4 Steps in C/C++
This commit is contained in:
68
lab1/C/task4/task1_omp.c
Normal file
68
lab1/C/task4/task1_omp.c
Normal file
@ -0,0 +1,68 @@
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "timer.h"
|
||||
|
||||
#define NN 1024
|
||||
#define NM 1024
|
||||
|
||||
float A[NN][NM];
|
||||
float Anew[NN][NM];
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
const int n = NN;
|
||||
const int m = NM;
|
||||
const int iter_max = 1000;
|
||||
|
||||
const double tol = 1.0e-6;
|
||||
double error = 1.0;
|
||||
|
||||
memset(A, 0, n * m * sizeof(float));
|
||||
memset(Anew, 0, n * m * sizeof(float));
|
||||
|
||||
for (int j = 0; j < n; j++)
|
||||
{
|
||||
A[j][0] = 1.0;
|
||||
Anew[j][0] = 1.0;
|
||||
}
|
||||
|
||||
printf("Jacobi relaxation Calculation: %d x %d mesh\n", n, m);
|
||||
|
||||
StartTimer();
|
||||
int iter = 0;
|
||||
|
||||
while ( error > tol && iter < iter_max )
|
||||
{
|
||||
error = 0.0;
|
||||
|
||||
#pragma omp parallel for shared(m, n, Anew, A)
|
||||
for( int j = 1; j < n-1; j++)
|
||||
{
|
||||
for( int i = 1; i < m-1; i++ )
|
||||
{
|
||||
Anew[j][i] = 0.25 * ( A[j][i+1] + A[j][i-1]
|
||||
+ A[j-1][i] + A[j+1][i]);
|
||||
error = fmax( error, fabs(Anew[j][i] - A[j][i]));
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp parallel for shared(m, n, Anew, A)
|
||||
for( int j = 1; j < n-1; j++)
|
||||
{
|
||||
for( int i = 1; i < m-1; i++ )
|
||||
{
|
||||
A[j][i] = Anew[j][i];
|
||||
}
|
||||
}
|
||||
|
||||
if(iter % 100 == 0) printf("%5d, %0.6f\n", iter, error);
|
||||
|
||||
iter++;
|
||||
}
|
||||
|
||||
double runtime = GetTimer();
|
||||
|
||||
printf(" total: %f s\n", runtime / 1000);
|
||||
|
||||
return 0;
|
||||
}
|
72
lab1/C/task4/task4.c
Normal file
72
lab1/C/task4/task4.c
Normal file
@ -0,0 +1,72 @@
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "timer.h"
|
||||
|
||||
#define NN 1024
|
||||
#define NM 1024
|
||||
|
||||
float A[NN][NM];
|
||||
float Anew[NN][NM];
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
const int n = NN;
|
||||
const int m = NM;
|
||||
const int iter_max = 1000;
|
||||
|
||||
const double tol = 1.0e-6;
|
||||
double error = 1.0;
|
||||
|
||||
memset(A, 0, n * m * sizeof(float));
|
||||
memset(Anew, 0, n * m * sizeof(float));
|
||||
|
||||
for (int j = 0; j < n; j++)
|
||||
{
|
||||
A[j][0] = 1.0;
|
||||
Anew[j][0] = 1.0;
|
||||
}
|
||||
|
||||
printf("Jacobi relaxation Calculation: %d x %d mesh\n", n, m);
|
||||
|
||||
StartTimer();
|
||||
int iter = 0;
|
||||
|
||||
#pragma acc data copy(A), create(Anew)
|
||||
while ( error > tol && iter < iter_max )
|
||||
{
|
||||
error = 0.0;
|
||||
|
||||
#pragma omp parallel for shared(m, n, Anew, A)
|
||||
#pragma acc kernels
|
||||
for( int j = 1; j < n-1; j++)
|
||||
{
|
||||
for( int i = 1; i < m-1; i++ )
|
||||
{
|
||||
Anew[j][i] = 0.25 * ( A[j][i+1] + A[j][i-1]
|
||||
+ A[j-1][i] + A[j+1][i]);
|
||||
error = fmax( error, fabs(Anew[j][i] - A[j][i]));
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp parallel for shared(m, n, Anew, A)
|
||||
#pragma acc kernels
|
||||
for( int j = 1; j < n-1; j++)
|
||||
{
|
||||
for( int i = 1; i < m-1; i++ )
|
||||
{
|
||||
A[j][i] = Anew[j][i];
|
||||
}
|
||||
}
|
||||
|
||||
if(iter % 100 == 0) printf("%5d, %0.6f\n", iter, error);
|
||||
|
||||
iter++;
|
||||
|
||||
}
|
||||
|
||||
double runtime = GetTimer();
|
||||
|
||||
printf(" total: %f s\n", runtime / 1000);
|
||||
|
||||
return 0;
|
||||
}
|
68
lab1/C/task4/task4_4096_omp.c
Normal file
68
lab1/C/task4/task4_4096_omp.c
Normal file
@ -0,0 +1,68 @@
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "timer.h"
|
||||
|
||||
#define NN 4096
|
||||
#define NM 4096
|
||||
|
||||
float A[NN][NM];
|
||||
float Anew[NN][NM];
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
const int n = NN;
|
||||
const int m = NM;
|
||||
const int iter_max = 1000;
|
||||
|
||||
const double tol = 1.0e-6;
|
||||
double error = 1.0;
|
||||
|
||||
memset(A, 0, n * m * sizeof(float));
|
||||
memset(Anew, 0, n * m * sizeof(float));
|
||||
|
||||
for (int j = 0; j < n; j++)
|
||||
{
|
||||
A[j][0] = 1.0;
|
||||
Anew[j][0] = 1.0;
|
||||
}
|
||||
|
||||
printf("Jacobi relaxation Calculation: %d x %d mesh\n", n, m);
|
||||
|
||||
StartTimer();
|
||||
int iter = 0;
|
||||
|
||||
while ( error > tol && iter < iter_max )
|
||||
{
|
||||
error = 0.0;
|
||||
|
||||
#pragma omp parallel for shared(m, n, Anew, A)
|
||||
for( int j = 1; j < n-1; j++)
|
||||
{
|
||||
for( int i = 1; i < m-1; i++ )
|
||||
{
|
||||
Anew[j][i] = 0.25 * ( A[j][i+1] + A[j][i-1]
|
||||
+ A[j-1][i] + A[j+1][i]);
|
||||
error = fmax( error, fabs(Anew[j][i] - A[j][i]));
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp parallel for shared(m, n, Anew, A)
|
||||
for( int j = 1; j < n-1; j++)
|
||||
{
|
||||
for( int i = 1; i < m-1; i++ )
|
||||
{
|
||||
A[j][i] = Anew[j][i];
|
||||
}
|
||||
}
|
||||
|
||||
if(iter % 100 == 0) printf("%5d, %0.6f\n", iter, error);
|
||||
|
||||
iter++;
|
||||
}
|
||||
|
||||
double runtime = GetTimer();
|
||||
|
||||
printf(" total: %f s\n", runtime / 1000);
|
||||
|
||||
return 0;
|
||||
}
|
75
lab1/C/task4/task4_4096_solution.c
Normal file
75
lab1/C/task4/task4_4096_solution.c
Normal file
@ -0,0 +1,75 @@
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "timer.h"
|
||||
|
||||
#define NN 4096
|
||||
#define NM 4096
|
||||
|
||||
float A[NN][NM];
|
||||
float Anew[NN][NM];
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
const int n = NN;
|
||||
const int m = NM;
|
||||
const int iter_max = 1000;
|
||||
|
||||
const double tol = 1.0e-6;
|
||||
double error = 1.0;
|
||||
|
||||
memset(A, 0, n * m * sizeof(float));
|
||||
memset(Anew, 0, n * m * sizeof(float));
|
||||
|
||||
for (int j = 0; j < n; j++)
|
||||
{
|
||||
A[j][0] = 1.0;
|
||||
Anew[j][0] = 1.0;
|
||||
}
|
||||
|
||||
printf("Jacobi relaxation Calculation: %d x %d mesh\n", n, m);
|
||||
|
||||
StartTimer();
|
||||
int iter = 0;
|
||||
|
||||
#pragma acc data copy(A), create(Anew)
|
||||
while ( error > tol && iter < iter_max )
|
||||
{
|
||||
#pragma acc kernels
|
||||
{
|
||||
error = 0.0;
|
||||
|
||||
#pragma omp parallel for shared(m, n, Anew, A)
|
||||
for( int j = 1; j < n-1; j++)
|
||||
{
|
||||
#pragma acc loop device_type(nvidia) gang(8) vector(32)
|
||||
for( int i = 1; i < m-1; i++ )
|
||||
{
|
||||
Anew[j][i] = 0.25 * ( A[j][i+1] + A[j][i-1]
|
||||
+ A[j-1][i] + A[j+1][i]);
|
||||
error = fmax( error, fabs(Anew[j][i] - A[j][i]));
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp parallel for shared(m, n, Anew, A)
|
||||
for( int j = 1; j < n-1; j++)
|
||||
{
|
||||
#pragma acc loop device_type(nvidia) gang(8) vector(32)
|
||||
for( int i = 1; i < m-1; i++ )
|
||||
{
|
||||
A[j][i] = Anew[j][i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(iter % 100 == 0) printf("%5d, %0.6f\n", iter, error);
|
||||
|
||||
iter++;
|
||||
|
||||
}
|
||||
|
||||
double runtime = GetTimer();
|
||||
|
||||
printf(" total: %f s\n", runtime / 1000);
|
||||
|
||||
return 0;
|
||||
}
|
75
lab1/C/task4/task4_solution.c
Normal file
75
lab1/C/task4/task4_solution.c
Normal file
@ -0,0 +1,75 @@
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "timer.h"
|
||||
|
||||
#define NN 1024
|
||||
#define NM 1024
|
||||
|
||||
float A[NN][NM];
|
||||
float Anew[NN][NM];
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
const int n = NN;
|
||||
const int m = NM;
|
||||
const int iter_max = 1000;
|
||||
|
||||
const double tol = 1.0e-6;
|
||||
double error = 1.0;
|
||||
|
||||
memset(A, 0, n * m * sizeof(float));
|
||||
memset(Anew, 0, n * m * sizeof(float));
|
||||
|
||||
for (int j = 0; j < n; j++)
|
||||
{
|
||||
A[j][0] = 1.0;
|
||||
Anew[j][0] = 1.0;
|
||||
}
|
||||
|
||||
printf("Jacobi relaxation Calculation: %d x %d mesh\n", n, m);
|
||||
|
||||
StartTimer();
|
||||
int iter = 0;
|
||||
|
||||
#pragma acc data copy(A), create(Anew)
|
||||
while ( error > tol && iter < iter_max )
|
||||
{
|
||||
#pragma acc kernels
|
||||
{
|
||||
error = 0.0;
|
||||
|
||||
#pragma omp parallel for shared(m, n, Anew, A)
|
||||
for( int j = 1; j < n-1; j++)
|
||||
{
|
||||
#pragma acc loop device_type(nvidia) gang(8) vector(32)
|
||||
for( int i = 1; i < m-1; i++ )
|
||||
{
|
||||
Anew[j][i] = 0.25 * ( A[j][i+1] + A[j][i-1]
|
||||
+ A[j-1][i] + A[j+1][i]);
|
||||
error = fmax( error, fabs(Anew[j][i] - A[j][i]));
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp parallel for shared(m, n, Anew, A)
|
||||
for( int j = 1; j < n-1; j++)
|
||||
{
|
||||
#pragma acc loop device_type(nvidia) gang(8) vector(32)
|
||||
for( int i = 1; i < m-1; i++ )
|
||||
{
|
||||
A[j][i] = Anew[j][i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(iter % 100 == 0) printf("%5d, %0.6f\n", iter, error);
|
||||
|
||||
iter++;
|
||||
|
||||
}
|
||||
|
||||
double runtime = GetTimer();
|
||||
|
||||
printf(" total: %f s\n", runtime / 1000);
|
||||
|
||||
return 0;
|
||||
}
|
67
lab1/C/task4/timer.h
Normal file
67
lab1/C/task4/timer.h
Normal file
@ -0,0 +1,67 @@
|
||||
/*
|
||||
* Copyright 2012 NVIDIA Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TIMER_H
|
||||
#define TIMER_H
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef WIN32
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
|
||||
#ifdef WIN32
|
||||
double PCFreq = 0.0;
|
||||
__int64 timerStart = 0;
|
||||
#else
|
||||
struct timeval timerStart;
|
||||
#endif
|
||||
|
||||
void StartTimer()
|
||||
{
|
||||
#ifdef WIN32
|
||||
LARGE_INTEGER li;
|
||||
if(!QueryPerformanceFrequency(&li))
|
||||
printf("QueryPerformanceFrequency failed!\n");
|
||||
|
||||
PCFreq = (double)li.QuadPart/1000.0;
|
||||
|
||||
QueryPerformanceCounter(&li);
|
||||
timerStart = li.QuadPart;
|
||||
#else
|
||||
gettimeofday(&timerStart, NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
// time elapsed in ms
|
||||
double GetTimer()
|
||||
{
|
||||
#ifdef WIN32
|
||||
LARGE_INTEGER li;
|
||||
QueryPerformanceCounter(&li);
|
||||
return (double)(li.QuadPart-timerStart)/PCFreq;
|
||||
#else
|
||||
struct timeval timerStop, timerElapsed;
|
||||
gettimeofday(&timerStop, NULL);
|
||||
timersub(&timerStop, &timerStart, &timerElapsed);
|
||||
return timerElapsed.tv_sec*1000.0+timerElapsed.tv_usec/1000.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // TIMER_H
|
Reference in New Issue
Block a user