|
|
--- libgomp/config/linux/wait.h.jj 2013-01-31 20:29:10.091548989 +0100 |
|
|
+++ libgomp/config/linux/wait.h 2016-07-13 16:57:18.902355979 +0200 |
|
|
@@ -34,13 +34,13 @@ |
|
|
|
|
|
#define FUTEX_WAIT 0 |
|
|
#define FUTEX_WAKE 1 |
|
|
-#define FUTEX_PRIVATE_FLAG 128L |
|
|
+#define FUTEX_PRIVATE_FLAG 128 |
|
|
|
|
|
#ifdef HAVE_ATTRIBUTE_VISIBILITY |
|
|
# pragma GCC visibility push(hidden) |
|
|
#endif |
|
|
|
|
|
-extern long int gomp_futex_wait, gomp_futex_wake; |
|
|
+extern int gomp_futex_wait, gomp_futex_wake; |
|
|
|
|
|
#include <futex.h> |
|
|
|
|
|
@@ -48,7 +48,9 @@ static inline int do_spin (int *addr, in |
|
|
{ |
|
|
unsigned long long i, count = gomp_spin_count_var; |
|
|
|
|
|
- if (__builtin_expect (gomp_managed_threads > gomp_available_cpus, 0)) |
|
|
+ if (__builtin_expect (__atomic_load_n (&gomp_managed_threads, |
|
|
+ MEMMODEL_RELAXED) |
|
|
+ > gomp_available_cpus, 0)) |
|
|
count = gomp_throttled_spin_count_var; |
|
|
for (i = 0; i < count; i++) |
|
|
if (__builtin_expect (__atomic_load_n (addr, MEMMODEL_RELAXED) != val, 0)) |
|
|
--- libgomp/config/linux/affinity.c.jj 2014-05-15 10:56:37.499502573 +0200 |
|
|
+++ libgomp/config/linux/affinity.c 2016-07-13 16:57:18.902355979 +0200 |
|
|
@@ -352,6 +352,45 @@ gomp_affinity_print_place (void *p) |
|
|
fprintf (stderr, ":%lu", len); |
|
|
} |
|
|
|
|
|
+int |
|
|
+omp_get_place_num_procs (int place_num) |
|
|
+{ |
|
|
+ if (place_num < 0 || place_num >= gomp_places_list_len) |
|
|
+ return 0; |
|
|
+ |
|
|
+ cpu_set_t *cpusetp = (cpu_set_t *) gomp_places_list[place_num]; |
|
|
+ return gomp_cpuset_popcount (gomp_cpuset_size, cpusetp); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+omp_get_place_proc_ids (int place_num, int *ids) |
|
|
+{ |
|
|
+ if (place_num < 0 || place_num >= gomp_places_list_len) |
|
|
+ return; |
|
|
+ |
|
|
+ cpu_set_t *cpusetp = (cpu_set_t *) gomp_places_list[place_num]; |
|
|
+ unsigned long i, max = 8 * gomp_cpuset_size; |
|
|
+ for (i = 0; i < max; i++) |
|
|
+ if (CPU_ISSET_S (i, gomp_cpuset_size, cpusetp)) |
|
|
+ *ids++ = i; |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+gomp_get_place_proc_ids_8 (int place_num, int64_t *ids) |
|
|
+{ |
|
|
+ if (place_num < 0 || place_num >= gomp_places_list_len) |
|
|
+ return; |
|
|
+ |
|
|
+ cpu_set_t *cpusetp = (cpu_set_t *) gomp_places_list[place_num]; |
|
|
+ unsigned long i, max = 8 * gomp_cpuset_size; |
|
|
+ for (i = 0; i < max; i++) |
|
|
+ if (CPU_ISSET_S (i, gomp_cpuset_size, cpusetp)) |
|
|
+ *ids++ = i; |
|
|
+} |
|
|
+ |
|
|
+ialias(omp_get_place_num_procs) |
|
|
+ialias(omp_get_place_proc_ids) |
|
|
+ |
|
|
#else |
|
|
|
|
|
#include "../posix/affinity.c" |
|
|
--- libgomp/config/linux/mutex.c.jj 2013-01-21 16:00:38.220917670 +0100 |
|
|
+++ libgomp/config/linux/mutex.c 2016-07-13 16:57:18.870356375 +0200 |
|
|
@@ -28,8 +28,8 @@ |
|
|
|
|
|
#include "wait.h" |
|
|
|
|
|
-long int gomp_futex_wake = FUTEX_WAKE | FUTEX_PRIVATE_FLAG; |
|
|
-long int gomp_futex_wait = FUTEX_WAIT | FUTEX_PRIVATE_FLAG; |
|
|
+int gomp_futex_wake = FUTEX_WAKE | FUTEX_PRIVATE_FLAG; |
|
|
+int gomp_futex_wait = FUTEX_WAIT | FUTEX_PRIVATE_FLAG; |
|
|
|
|
|
void |
|
|
gomp_mutex_lock_slow (gomp_mutex_t *mutex, int oldval) |
|
|
--- libgomp/config/posix/affinity.c.jj 2014-05-15 10:56:37.987498844 +0200 |
|
|
+++ libgomp/config/posix/affinity.c 2016-07-15 12:08:28.410015743 +0200 |
|
|
@@ -113,3 +113,27 @@ gomp_affinity_print_place (void *p) |
|
|
{ |
|
|
(void) p; |
|
|
} |
|
|
+ |
|
|
+int |
|
|
+omp_get_place_num_procs (int place_num) |
|
|
+{ |
|
|
+ (void) place_num; |
|
|
+ return 0; |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+omp_get_place_proc_ids (int place_num, int *ids) |
|
|
+{ |
|
|
+ (void) place_num; |
|
|
+ (void) ids; |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+gomp_get_place_proc_ids_8 (int place_num, int64_t *ids) |
|
|
+{ |
|
|
+ (void) place_num; |
|
|
+ (void) ids; |
|
|
+} |
|
|
+ |
|
|
+ialias(omp_get_place_num_procs) |
|
|
+ialias(omp_get_place_proc_ids) |
|
|
--- libgomp/loop_ull.c.jj 2013-01-21 16:00:46.477871806 +0100 |
|
|
+++ libgomp/loop_ull.c 2016-07-13 16:57:18.918355780 +0200 |
|
|
@@ -174,15 +174,15 @@ GOMP_loop_ull_runtime_start (bool up, go |
|
|
{ |
|
|
case GFS_STATIC: |
|
|
return gomp_loop_ull_static_start (up, start, end, incr, |
|
|
- icv->run_sched_modifier, |
|
|
+ icv->run_sched_chunk_size, |
|
|
istart, iend); |
|
|
case GFS_DYNAMIC: |
|
|
return gomp_loop_ull_dynamic_start (up, start, end, incr, |
|
|
- icv->run_sched_modifier, |
|
|
+ icv->run_sched_chunk_size, |
|
|
istart, iend); |
|
|
case GFS_GUIDED: |
|
|
return gomp_loop_ull_guided_start (up, start, end, incr, |
|
|
- icv->run_sched_modifier, |
|
|
+ icv->run_sched_chunk_size, |
|
|
istart, iend); |
|
|
case GFS_AUTO: |
|
|
/* For now map to schedule(static), later on we could play with feedback |
|
|
@@ -278,15 +278,15 @@ GOMP_loop_ull_ordered_runtime_start (boo |
|
|
{ |
|
|
case GFS_STATIC: |
|
|
return gomp_loop_ull_ordered_static_start (up, start, end, incr, |
|
|
- icv->run_sched_modifier, |
|
|
+ icv->run_sched_chunk_size, |
|
|
istart, iend); |
|
|
case GFS_DYNAMIC: |
|
|
return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, |
|
|
- icv->run_sched_modifier, |
|
|
+ icv->run_sched_chunk_size, |
|
|
istart, iend); |
|
|
case GFS_GUIDED: |
|
|
return gomp_loop_ull_ordered_guided_start (up, start, end, incr, |
|
|
- icv->run_sched_modifier, |
|
|
+ icv->run_sched_chunk_size, |
|
|
istart, iend); |
|
|
case GFS_AUTO: |
|
|
/* For now map to schedule(static), later on we could play with feedback |
|
|
@@ -298,6 +298,114 @@ GOMP_loop_ull_ordered_runtime_start (boo |
|
|
} |
|
|
} |
|
|
|
|
|
+/* The *_doacross_*_start routines are similar. The only difference is that |
|
|
+ this work-share construct is initialized to expect an ORDERED(N) - DOACROSS |
|
|
+ section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1 |
|
|
+ and other COUNTS array elements tell the library number of iterations |
|
|
+ in the ordered inner loops. */ |
|
|
+ |
|
|
+static bool |
|
|
+gomp_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts, |
|
|
+ gomp_ull chunk_size, gomp_ull *istart, |
|
|
+ gomp_ull *iend) |
|
|
+{ |
|
|
+ struct gomp_thread *thr = gomp_thread (); |
|
|
+ |
|
|
+ thr->ts.static_trip = 0; |
|
|
+ if (gomp_work_share_start (false)) |
|
|
+ { |
|
|
+ gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, |
|
|
+ GFS_STATIC, chunk_size); |
|
|
+ gomp_doacross_ull_init (ncounts, counts, chunk_size); |
|
|
+ gomp_work_share_init_done (); |
|
|
+ } |
|
|
+ |
|
|
+ return !gomp_iter_ull_static_next (istart, iend); |
|
|
+} |
|
|
+ |
|
|
+static bool |
|
|
+gomp_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts, |
|
|
+ gomp_ull chunk_size, gomp_ull *istart, |
|
|
+ gomp_ull *iend) |
|
|
+{ |
|
|
+ struct gomp_thread *thr = gomp_thread (); |
|
|
+ bool ret; |
|
|
+ |
|
|
+ if (gomp_work_share_start (false)) |
|
|
+ { |
|
|
+ gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, |
|
|
+ GFS_DYNAMIC, chunk_size); |
|
|
+ gomp_doacross_ull_init (ncounts, counts, chunk_size); |
|
|
+ gomp_work_share_init_done (); |
|
|
+ } |
|
|
+ |
|
|
+#if defined HAVE_SYNC_BUILTINS && defined __LP64__ |
|
|
+ ret = gomp_iter_ull_dynamic_next (istart, iend); |
|
|
+#else |
|
|
+ gomp_mutex_lock (&thr->ts.work_share->lock); |
|
|
+ ret = gomp_iter_ull_dynamic_next_locked (istart, iend); |
|
|
+ gomp_mutex_unlock (&thr->ts.work_share->lock); |
|
|
+#endif |
|
|
+ |
|
|
+ return ret; |
|
|
+} |
|
|
+ |
|
|
+static bool |
|
|
+gomp_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts, |
|
|
+ gomp_ull chunk_size, gomp_ull *istart, |
|
|
+ gomp_ull *iend) |
|
|
+{ |
|
|
+ struct gomp_thread *thr = gomp_thread (); |
|
|
+ bool ret; |
|
|
+ |
|
|
+ if (gomp_work_share_start (false)) |
|
|
+ { |
|
|
+ gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, |
|
|
+ GFS_GUIDED, chunk_size); |
|
|
+ gomp_doacross_ull_init (ncounts, counts, chunk_size); |
|
|
+ gomp_work_share_init_done (); |
|
|
+ } |
|
|
+ |
|
|
+#if defined HAVE_SYNC_BUILTINS && defined __LP64__ |
|
|
+ ret = gomp_iter_ull_guided_next (istart, iend); |
|
|
+#else |
|
|
+ gomp_mutex_lock (&thr->ts.work_share->lock); |
|
|
+ ret = gomp_iter_ull_guided_next_locked (istart, iend); |
|
|
+ gomp_mutex_unlock (&thr->ts.work_share->lock); |
|
|
+#endif |
|
|
+ |
|
|
+ return ret; |
|
|
+} |
|
|
+ |
|
|
+bool |
|
|
+GOMP_loop_ull_doacross_runtime_start (unsigned ncounts, gomp_ull *counts, |
|
|
+ gomp_ull *istart, gomp_ull *iend) |
|
|
+{ |
|
|
+ struct gomp_task_icv *icv = gomp_icv (false); |
|
|
+ switch (icv->run_sched_var) |
|
|
+ { |
|
|
+ case GFS_STATIC: |
|
|
+ return gomp_loop_ull_doacross_static_start (ncounts, counts, |
|
|
+ icv->run_sched_chunk_size, |
|
|
+ istart, iend); |
|
|
+ case GFS_DYNAMIC: |
|
|
+ return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, |
|
|
+ icv->run_sched_chunk_size, |
|
|
+ istart, iend); |
|
|
+ case GFS_GUIDED: |
|
|
+ return gomp_loop_ull_doacross_guided_start (ncounts, counts, |
|
|
+ icv->run_sched_chunk_size, |
|
|
+ istart, iend); |
|
|
+ case GFS_AUTO: |
|
|
+ /* For now map to schedule(static), later on we could play with feedback |
|
|
+ driven choice. */ |
|
|
+ return gomp_loop_ull_doacross_static_start (ncounts, counts, |
|
|
+ 0, istart, iend); |
|
|
+ default: |
|
|
+ abort (); |
|
|
+ } |
|
|
+} |
|
|
+ |
|
|
/* The *_next routines are called when the thread completes processing of |
|
|
the iteration block currently assigned to it. If the work-share |
|
|
construct is bound directly to a parallel construct, then the iteration |
|
|
@@ -457,6 +565,10 @@ extern __typeof(gomp_loop_ull_dynamic_st |
|
|
__attribute__((alias ("gomp_loop_ull_dynamic_start"))); |
|
|
extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start |
|
|
__attribute__((alias ("gomp_loop_ull_guided_start"))); |
|
|
+extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_nonmonotonic_dynamic_start |
|
|
+ __attribute__((alias ("gomp_loop_ull_dynamic_start"))); |
|
|
+extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_nonmonotonic_guided_start |
|
|
+ __attribute__((alias ("gomp_loop_ull_guided_start"))); |
|
|
|
|
|
extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start |
|
|
__attribute__((alias ("gomp_loop_ull_ordered_static_start"))); |
|
|
@@ -465,12 +577,23 @@ extern __typeof(gomp_loop_ull_ordered_dy |
|
|
extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start |
|
|
__attribute__((alias ("gomp_loop_ull_ordered_guided_start"))); |
|
|
|
|
|
+extern __typeof(gomp_loop_ull_doacross_static_start) GOMP_loop_ull_doacross_static_start |
|
|
+ __attribute__((alias ("gomp_loop_ull_doacross_static_start"))); |
|
|
+extern __typeof(gomp_loop_ull_doacross_dynamic_start) GOMP_loop_ull_doacross_dynamic_start |
|
|
+ __attribute__((alias ("gomp_loop_ull_doacross_dynamic_start"))); |
|
|
+extern __typeof(gomp_loop_ull_doacross_guided_start) GOMP_loop_ull_doacross_guided_start |
|
|
+ __attribute__((alias ("gomp_loop_ull_doacross_guided_start"))); |
|
|
+ |
|
|
extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next |
|
|
__attribute__((alias ("gomp_loop_ull_static_next"))); |
|
|
extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next |
|
|
__attribute__((alias ("gomp_loop_ull_dynamic_next"))); |
|
|
extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next |
|
|
__attribute__((alias ("gomp_loop_ull_guided_next"))); |
|
|
+extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_nonmonotonic_dynamic_next |
|
|
+ __attribute__((alias ("gomp_loop_ull_dynamic_next"))); |
|
|
+extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_nonmonotonic_guided_next |
|
|
+ __attribute__((alias ("gomp_loop_ull_guided_next"))); |
|
|
|
|
|
extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next |
|
|
__attribute__((alias ("gomp_loop_ull_ordered_static_next"))); |
|
|
@@ -507,6 +630,25 @@ GOMP_loop_ull_guided_start (bool up, gom |
|
|
} |
|
|
|
|
|
bool |
|
|
+GOMP_loop_ull_nonmonotonic_dynamic_start (bool up, gomp_ull start, |
|
|
+ gomp_ull end, gomp_ull incr, |
|
|
+ gomp_ull chunk_size, |
|
|
+ gomp_ull *istart, gomp_ull *iend) |
|
|
+{ |
|
|
+ return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart, |
|
|
+ iend); |
|
|
+} |
|
|
+ |
|
|
+bool |
|
|
+GOMP_loop_ull_nonmonotonic_guided_start (bool up, gomp_ull start, gomp_ull end, |
|
|
+ gomp_ull incr, gomp_ull chunk_size, |
|
|
+ gomp_ull *istart, gomp_ull *iend) |
|
|
+{ |
|
|
+ return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart, |
|
|
+ iend); |
|
|
+} |
|
|
+ |
|
|
+bool |
|
|
GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end, |
|
|
gomp_ull incr, gomp_ull chunk_size, |
|
|
gomp_ull *istart, gomp_ull *iend) |
|
|
@@ -534,6 +676,33 @@ GOMP_loop_ull_ordered_guided_start (bool |
|
|
} |
|
|
|
|
|
bool |
|
|
+GOMP_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts, |
|
|
+ gomp_ull chunk_size, gomp_ull *istart, |
|
|
+ gomp_ull *iend) |
|
|
+{ |
|
|
+ return gomp_loop_ull_doacross_static_start (ncounts, counts, chunk_size, |
|
|
+ istart, iend); |
|
|
+} |
|
|
+ |
|
|
+bool |
|
|
+GOMP_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts, |
|
|
+ gomp_ull chunk_size, gomp_ull *istart, |
|
|
+ gomp_ull *iend) |
|
|
+{ |
|
|
+ return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, chunk_size, |
|
|
+ istart, iend); |
|
|
+} |
|
|
+ |
|
|
+bool |
|
|
+GOMP_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts, |
|
|
+ gomp_ull chunk_size, gomp_ull *istart, |
|
|
+ gomp_ull *iend) |
|
|
+{ |
|
|
+ return gomp_loop_ull_doacross_guided_start (ncounts, counts, chunk_size, |
|
|
+ istart, iend); |
|
|
+} |
|
|
+ |
|
|
+bool |
|
|
GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend) |
|
|
{ |
|
|
return gomp_loop_ull_static_next (istart, iend); |
|
|
@@ -550,6 +719,18 @@ GOMP_loop_ull_guided_next (gomp_ull *ist |
|
|
{ |
|
|
return gomp_loop_ull_guided_next (istart, iend); |
|
|
} |
|
|
+ |
|
|
+bool |
|
|
+GOMP_loop_ull_nonmonotonic_dynamic_next (gomp_ull *istart, gomp_ull *iend) |
|
|
+{ |
|
|
+ return gomp_loop_ull_dynamic_next (istart, iend); |
|
|
+} |
|
|
+ |
|
|
+bool |
|
|
+GOMP_loop_ull_nonmonotonic_guided_next (gomp_ull *istart, gomp_ull *iend) |
|
|
+{ |
|
|
+ return gomp_loop_ull_guided_next (istart, iend); |
|
|
+} |
|
|
|
|
|
bool |
|
|
GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend) |
|
|
--- libgomp/team.c.jj 2014-05-15 10:56:32.092524669 +0200 |
|
|
+++ libgomp/team.c 2016-07-13 17:58:01.907291111 +0200 |
|
|
@@ -133,6 +133,25 @@ gomp_thread_start (void *xdata) |
|
|
return NULL; |
|
|
} |
|
|
|
|
|
+static inline struct gomp_team * |
|
|
+get_last_team (unsigned nthreads) |
|
|
+{ |
|
|
+ struct gomp_thread *thr = gomp_thread (); |
|
|
+ if (thr->ts.team == NULL) |
|
|
+ { |
|
|
+ struct gomp_thread_pool *pool = thr->thread_pool; |
|
|
+ if (pool != NULL) |
|
|
+ { |
|
|
+ struct gomp_team *last_team = pool->last_team; |
|
|
+ if (last_team != NULL && last_team->nthreads == nthreads) |
|
|
+ { |
|
|
+ pool->last_team = NULL; |
|
|
+ return last_team; |
|
|
+ } |
|
|
+ } |
|
|
+ } |
|
|
+ return NULL; |
|
|
+} |
|
|
|
|
|
/* Create a new team data structure. */ |
|
|
|
|
|
@@ -140,18 +159,27 @@ struct gomp_team * |
|
|
gomp_new_team (unsigned nthreads) |
|
|
{ |
|
|
struct gomp_team *team; |
|
|
- size_t size; |
|
|
int i; |
|
|
|
|
|
- size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0]) |
|
|
- + sizeof (team->implicit_task[0])); |
|
|
- team = gomp_malloc (size); |
|
|
+ team = get_last_team (nthreads); |
|
|
+ if (team == NULL) |
|
|
+ { |
|
|
+ size_t extra = sizeof (team->ordered_release[0]) |
|
|
+ + sizeof (team->implicit_task[0]); |
|
|
+ team = gomp_malloc (sizeof (*team) + nthreads * extra); |
|
|
+ |
|
|
+#ifndef HAVE_SYNC_BUILTINS |
|
|
+ gomp_mutex_init (&team->work_share_list_free_lock); |
|
|
+#endif |
|
|
+ gomp_barrier_init (&team->barrier, nthreads); |
|
|
+ gomp_mutex_init (&team->task_lock); |
|
|
+ |
|
|
+ team->nthreads = nthreads; |
|
|
+ } |
|
|
|
|
|
team->work_share_chunk = 8; |
|
|
#ifdef HAVE_SYNC_BUILTINS |
|
|
team->single_count = 0; |
|
|
-#else |
|
|
- gomp_mutex_init (&team->work_share_list_free_lock); |
|
|
#endif |
|
|
team->work_shares_to_free = &team->work_shares[0]; |
|
|
gomp_init_work_share (&team->work_shares[0], false, nthreads); |
|
|
@@ -162,15 +190,11 @@ gomp_new_team (unsigned nthreads) |
|
|
team->work_shares[i].next_free = &team->work_shares[i + 1]; |
|
|
team->work_shares[i].next_free = NULL; |
|
|
|
|
|
- team->nthreads = nthreads; |
|
|
- gomp_barrier_init (&team->barrier, nthreads); |
|
|
- |
|
|
gomp_sem_init (&team->master_release, 0); |
|
|
team->ordered_release = (void *) &team->implicit_task[nthreads]; |
|
|
team->ordered_release[0] = &team->master_release; |
|
|
|
|
|
- gomp_mutex_init (&team->task_lock); |
|
|
- team->task_queue = NULL; |
|
|
+ priority_queue_init (&team->task_queue); |
|
|
team->task_count = 0; |
|
|
team->task_queued_count = 0; |
|
|
team->task_running_count = 0; |
|
|
@@ -186,8 +210,12 @@ gomp_new_team (unsigned nthreads) |
|
|
static void |
|
|
free_team (struct gomp_team *team) |
|
|
{ |
|
|
+#ifndef HAVE_SYNC_BUILTINS |
|
|
+ gomp_mutex_destroy (&team->work_share_list_free_lock); |
|
|
+#endif |
|
|
gomp_barrier_destroy (&team->barrier); |
|
|
gomp_mutex_destroy (&team->task_lock); |
|
|
+ priority_queue_free (&team->task_queue); |
|
|
free (team); |
|
|
} |
|
|
|
|
|
@@ -258,6 +286,8 @@ gomp_free_thread (void *arg __attribute_ |
|
|
free (pool); |
|
|
thr->thread_pool = NULL; |
|
|
} |
|
|
+ if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0)) |
|
|
+ gomp_team_end (); |
|
|
if (thr->task != NULL) |
|
|
{ |
|
|
struct gomp_task *task = thr->task; |
|
|
@@ -287,7 +317,7 @@ gomp_team_start (void (*fn) (void *), vo |
|
|
struct gomp_thread **affinity_thr = NULL; |
|
|
|
|
|
thr = gomp_thread (); |
|
|
- nested = thr->ts.team != NULL; |
|
|
+ nested = thr->ts.level; |
|
|
if (__builtin_expect (thr->thread_pool == NULL, 0)) |
|
|
{ |
|
|
thr->thread_pool = gomp_new_thread_pool (); |
|
|
@@ -894,9 +924,6 @@ gomp_team_end (void) |
|
|
while (ws != NULL); |
|
|
} |
|
|
gomp_sem_destroy (&team->master_release); |
|
|
-#ifndef HAVE_SYNC_BUILTINS |
|
|
- gomp_mutex_destroy (&team->work_share_list_free_lock); |
|
|
-#endif |
|
|
|
|
|
if (__builtin_expect (thr->ts.team != NULL, 0) |
|
|
|| __builtin_expect (team->nthreads == 1, 0)) |
|
|
--- libgomp/target.c.jj 2014-05-15 10:56:38.313498020 +0200 |
|
|
+++ libgomp/target.c 2016-07-15 16:58:29.249328861 +0200 |
|
|
@@ -22,14 +22,22 @@ |
|
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
|
|
<http://www.gnu.org/licenses/>. */ |
|
|
|
|
|
-/* This file handles the maintainence of threads in response to team |
|
|
- creation and termination. */ |
|
|
+/* This file contains the support of offloading. */ |
|
|
|
|
|
+#include "config.h" |
|
|
#include "libgomp.h" |
|
|
+#include "oacc-plugin.h" |
|
|
+#include "oacc-int.h" |
|
|
+#include "gomp-constants.h" |
|
|
#include <limits.h> |
|
|
#include <stdbool.h> |
|
|
#include <stdlib.h> |
|
|
+#ifdef HAVE_INTTYPES_H |
|
|
+# include <inttypes.h> /* For PRIu64. */ |
|
|
+#endif |
|
|
#include <string.h> |
|
|
+#include <assert.h> |
|
|
+#include <errno.h> |
|
|
|
|
|
attribute_hidden int |
|
|
gomp_get_num_devices (void) |
|
|
@@ -37,22 +45,87 @@ gomp_get_num_devices (void) |
|
|
return 0; |
|
|
} |
|
|
|
|
|
-/* Called when encountering a target directive. If DEVICE |
|
|
- is -1, it means use device-var ICV. If it is -2 (or any other value |
|
|
- larger than last available hw device, use host fallback. |
|
|
- FN is address of host code, OPENMP_TARGET contains value of the |
|
|
- __OPENMP_TARGET__ symbol in the shared library or binary that invokes |
|
|
- GOMP_target. HOSTADDRS, SIZES and KINDS are arrays |
|
|
- with MAPNUM entries, with addresses of the host objects, |
|
|
- sizes of the host objects (resp. for pointer kind pointer bias |
|
|
- and assumed sizeof (void *) size) and kinds. */ |
|
|
+/* This function should be called from every offload image while loading. |
|
|
+ It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of |
|
|
+ the target, and TARGET_DATA needed by target plugin. */ |
|
|
|
|
|
void |
|
|
-GOMP_target (int device, void (*fn) (void *), const void *openmp_target, |
|
|
- size_t mapnum, void **hostaddrs, size_t *sizes, |
|
|
- unsigned char *kinds) |
|
|
+GOMP_offload_register_ver (unsigned version, const void *host_table, |
|
|
+ int target_type, const void *target_data) |
|
|
+{ |
|
|
+ (void) version; |
|
|
+ (void) host_table; |
|
|
+ (void) target_type; |
|
|
+ (void) target_data; |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+GOMP_offload_register (const void *host_table, int target_type, |
|
|
+ const void *target_data) |
|
|
+{ |
|
|
+ (void) host_table; |
|
|
+ (void) target_type; |
|
|
+ (void) target_data; |
|
|
+} |
|
|
+ |
|
|
+/* This function should be called from every offload image while unloading. |
|
|
+ It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of |
|
|
+ the target, and TARGET_DATA needed by target plugin. */ |
|
|
+ |
|
|
+void |
|
|
+GOMP_offload_unregister_ver (unsigned version, const void *host_table, |
|
|
+ int target_type, const void *target_data) |
|
|
+{ |
|
|
+ (void) version; |
|
|
+ (void) host_table; |
|
|
+ (void) target_type; |
|
|
+ (void) target_data; |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+GOMP_offload_unregister (const void *host_table, int target_type, |
|
|
+ const void *target_data) |
|
|
+{ |
|
|
+ (void) host_table; |
|
|
+ (void) target_type; |
|
|
+ (void) target_data; |
|
|
+} |
|
|
+ |
|
|
+/* This function initializes the target device, specified by DEVICEP. DEVICEP |
|
|
+ must be locked on entry, and remains locked on return. */ |
|
|
+ |
|
|
+attribute_hidden void |
|
|
+gomp_init_device (struct gomp_device_descr *devicep) |
|
|
+{ |
|
|
+ devicep->state = GOMP_DEVICE_INITIALIZED; |
|
|
+} |
|
|
+ |
|
|
+attribute_hidden void |
|
|
+gomp_unload_device (struct gomp_device_descr *devicep) |
|
|
+{ |
|
|
+} |
|
|
+ |
|
|
+/* Free address mapping tables. MM must be locked on entry, and remains locked |
|
|
+ on return. */ |
|
|
+ |
|
|
+attribute_hidden void |
|
|
+gomp_free_memmap (struct splay_tree_s *mem_map) |
|
|
+{ |
|
|
+ while (mem_map->root) |
|
|
+ { |
|
|
+ struct target_mem_desc *tgt = mem_map->root->key.tgt; |
|
|
+ |
|
|
+ splay_tree_remove (mem_map, &mem_map->root->key); |
|
|
+ free (tgt->array); |
|
|
+ free (tgt); |
|
|
+ } |
|
|
+} |
|
|
+ |
|
|
+/* Host fallback for GOMP_target{,_ext} routines. */ |
|
|
+ |
|
|
+static void |
|
|
+gomp_target_fallback (void (*fn) (void *), void **hostaddrs) |
|
|
{ |
|
|
- /* Host fallback. */ |
|
|
struct gomp_thread old_thr, *thr = gomp_thread (); |
|
|
old_thr = *thr; |
|
|
memset (thr, '\0', sizeof (*thr)); |
|
|
@@ -66,10 +139,167 @@ GOMP_target (int device, void (*fn) (voi |
|
|
*thr = old_thr; |
|
|
} |
|
|
|
|
|
+/* Calculate alignment and size requirements of a private copy of data shared |
|
|
+ as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */ |
|
|
+ |
|
|
+static inline void |
|
|
+calculate_firstprivate_requirements (size_t mapnum, size_t *sizes, |
|
|
+ unsigned short *kinds, size_t *tgt_align, |
|
|
+ size_t *tgt_size) |
|
|
+{ |
|
|
+ size_t i; |
|
|
+ for (i = 0; i < mapnum; i++) |
|
|
+ if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) |
|
|
+ { |
|
|
+ size_t align = (size_t) 1 << (kinds[i] >> 8); |
|
|
+ if (*tgt_align < align) |
|
|
+ *tgt_align = align; |
|
|
+ *tgt_size = (*tgt_size + align - 1) & ~(align - 1); |
|
|
+ *tgt_size += sizes[i]; |
|
|
+ } |
|
|
+} |
|
|
+ |
|
|
+/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */ |
|
|
+ |
|
|
+static inline void |
|
|
+copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs, |
|
|
+ size_t *sizes, unsigned short *kinds, size_t tgt_align, |
|
|
+ size_t tgt_size) |
|
|
+{ |
|
|
+ uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); |
|
|
+ if (al) |
|
|
+ tgt += tgt_align - al; |
|
|
+ tgt_size = 0; |
|
|
+ size_t i; |
|
|
+ for (i = 0; i < mapnum; i++) |
|
|
+ if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) |
|
|
+ { |
|
|
+ size_t align = (size_t) 1 << (kinds[i] >> 8); |
|
|
+ tgt_size = (tgt_size + align - 1) & ~(align - 1); |
|
|
+ memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); |
|
|
+ hostaddrs[i] = tgt + tgt_size; |
|
|
+ tgt_size = tgt_size + sizes[i]; |
|
|
+ } |
|
|
+} |
|
|
+ |
|
|
+/* Called when encountering a target directive. If DEVICE |
|
|
+ is GOMP_DEVICE_ICV, it means use device-var ICV. If it is |
|
|
+ GOMP_DEVICE_HOST_FALLBACK (or any value |
|
|
+ larger than last available hw device), use host fallback. |
|
|
+ FN is address of host code, UNUSED is part of the current ABI, but |
|
|
+ we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays |
|
|
+ with MAPNUM entries, with addresses of the host objects, |
|
|
+ sizes of the host objects (resp. for pointer kind pointer bias |
|
|
+ and assumed sizeof (void *) size) and kinds. */ |
|
|
+ |
|
|
+void |
|
|
+GOMP_target (int device, void (*fn) (void *), const void *unused, |
|
|
+ size_t mapnum, void **hostaddrs, size_t *sizes, |
|
|
+ unsigned char *kinds) |
|
|
+{ |
|
|
+ return gomp_target_fallback (fn, hostaddrs); |
|
|
+} |
|
|
+ |
|
|
+/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present, |
|
|
+ and several arguments have been added: |
|
|
+ FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h. |
|
|
+ DEPEND is array of dependencies, see GOMP_task for details. |
|
|
+ |
|
|
+ ARGS is a pointer to an array consisting of a variable number of both |
|
|
+ device-independent and device-specific arguments, which can take one two |
|
|
+ elements where the first specifies for which device it is intended, the type |
|
|
+ and optionally also the value. If the value is not present in the first |
|
|
+ one, the whole second element the actual value. The last element of the |
|
|
+ array is a single NULL. Among the device independent can be for example |
|
|
+ NUM_TEAMS and THREAD_LIMIT. |
|
|
+ |
|
|
+ NUM_TEAMS is positive if GOMP_teams will be called in the body with |
|
|
+ that value, or 1 if teams construct is not present, or 0, if |
|
|
+ teams construct does not have num_teams clause and so the choice is |
|
|
+ implementation defined, and -1 if it can't be determined on the host |
|
|
+ what value will GOMP_teams have on the device. |
|
|
+ THREAD_LIMIT similarly is positive if GOMP_teams will be called in the |
|
|
+ body with that value, or 0, if teams construct does not have thread_limit |
|
|
+ clause or the teams construct is not present, or -1 if it can't be |
|
|
+ determined on the host what value will GOMP_teams have on the device. */ |
|
|
+ |
|
|
+void |
|
|
+GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, |
|
|
+ void **hostaddrs, size_t *sizes, unsigned short *kinds, |
|
|
+ unsigned int flags, void **depend, void **args) |
|
|
+{ |
|
|
+ size_t tgt_align = 0, tgt_size = 0; |
|
|
+ bool fpc_done = false; |
|
|
+ |
|
|
+ if (flags & GOMP_TARGET_FLAG_NOWAIT) |
|
|
+ { |
|
|
+ struct gomp_thread *thr = gomp_thread (); |
|
|
+ if (thr->ts.team |
|
|
+ && !thr->task->final_task) |
|
|
+ { |
|
|
+ gomp_create_target_task (NULL, fn, mapnum, hostaddrs, |
|
|
+ sizes, kinds, flags, depend, args, |
|
|
+ GOMP_TARGET_TASK_BEFORE_MAP); |
|
|
+ return; |
|
|
+ } |
|
|
+ } |
|
|
+ |
|
|
+ /* If there are depend clauses, but nowait is not present |
|
|
+ (or we are in a final task), block the parent task until the |
|
|
+ dependencies are resolved and then just continue with the rest |
|
|
+ of the function as if it is a merged task. */ |
|
|
+ if (depend != NULL) |
|
|
+ { |
|
|
+ struct gomp_thread *thr = gomp_thread (); |
|
|
+ if (thr->task && thr->task->depend_hash) |
|
|
+ { |
|
|
+ /* If we might need to wait, copy firstprivate now. */ |
|
|
+ calculate_firstprivate_requirements (mapnum, sizes, kinds, |
|
|
+ &tgt_align, &tgt_size); |
|
|
+ if (tgt_align) |
|
|
+ { |
|
|
+ char *tgt = gomp_alloca (tgt_size + tgt_align - 1); |
|
|
+ copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, |
|
|
+ tgt_align, tgt_size); |
|
|
+ } |
|
|
+ fpc_done = true; |
|
|
+ gomp_task_maybe_wait_for_dependencies (depend); |
|
|
+ } |
|
|
+ } |
|
|
+ |
|
|
+ if (!fpc_done) |
|
|
+ { |
|
|
+ calculate_firstprivate_requirements (mapnum, sizes, kinds, |
|
|
+ &tgt_align, &tgt_size); |
|
|
+ if (tgt_align) |
|
|
+ { |
|
|
+ char *tgt = gomp_alloca (tgt_size + tgt_align - 1); |
|
|
+ copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, |
|
|
+ tgt_align, tgt_size); |
|
|
+ } |
|
|
+ } |
|
|
+ gomp_target_fallback (fn, hostaddrs); |
|
|
+} |
|
|
+ |
|
|
+/* Host fallback for GOMP_target_data{,_ext} routines. */ |
|
|
+ |
|
|
+static void |
|
|
+gomp_target_data_fallback (void) |
|
|
+{ |
|
|
+} |
|
|
+ |
|
|
void |
|
|
-GOMP_target_data (int device, const void *openmp_target, size_t mapnum, |
|
|
+GOMP_target_data (int device, const void *unused, size_t mapnum, |
|
|
void **hostaddrs, size_t *sizes, unsigned char *kinds) |
|
|
{ |
|
|
+ return gomp_target_data_fallback (); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs, |
|
|
+ size_t *sizes, unsigned short *kinds) |
|
|
+{ |
|
|
+ return gomp_target_data_fallback (); |
|
|
} |
|
|
|
|
|
void |
|
|
@@ -78,12 +308,112 @@ GOMP_target_end_data (void) |
|
|
} |
|
|
|
|
|
void |
|
|
-GOMP_target_update (int device, const void *openmp_target, size_t mapnum, |
|
|
+GOMP_target_update (int device, const void *unused, size_t mapnum, |
|
|
void **hostaddrs, size_t *sizes, unsigned char *kinds) |
|
|
{ |
|
|
} |
|
|
|
|
|
void |
|
|
+GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, |
|
|
+ size_t *sizes, unsigned short *kinds, |
|
|
+ unsigned int flags, void **depend) |
|
|
+{ |
|
|
+ /* If there are depend clauses, but nowait is not present, |
|
|
+ block the parent task until the dependencies are resolved |
|
|
+ and then just continue with the rest of the function as if it |
|
|
+ is a merged task. Until we are able to schedule task during |
|
|
+ variable mapping or unmapping, ignore nowait if depend clauses |
|
|
+ are not present. */ |
|
|
+ if (depend != NULL) |
|
|
+ { |
|
|
+ struct gomp_thread *thr = gomp_thread (); |
|
|
+ if (thr->task && thr->task->depend_hash) |
|
|
+ { |
|
|
+ if ((flags & GOMP_TARGET_FLAG_NOWAIT) |
|
|
+ && thr->ts.team |
|
|
+ && !thr->task->final_task) |
|
|
+ { |
|
|
+ if (gomp_create_target_task (NULL, (void (*) (void *)) NULL, |
|
|
+ mapnum, hostaddrs, sizes, kinds, |
|
|
+ flags | GOMP_TARGET_FLAG_UPDATE, |
|
|
+ depend, NULL, GOMP_TARGET_TASK_DATA)) |
|
|
+ return; |
|
|
+ } |
|
|
+ else |
|
|
+ { |
|
|
+ struct gomp_team *team = thr->ts.team; |
|
|
+ /* If parallel or taskgroup has been cancelled, don't start new |
|
|
+ tasks. */ |
|
|
+ if (team |
|
|
+ && (gomp_team_barrier_cancelled (&team->barrier) |
|
|
+ || (thr->task->taskgroup |
|
|
+ && thr->task->taskgroup->cancelled))) |
|
|
+ return; |
|
|
+ |
|
|
+ gomp_task_maybe_wait_for_dependencies (depend); |
|
|
+ } |
|
|
+ } |
|
|
+ } |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, |
|
|
+ size_t *sizes, unsigned short *kinds, |
|
|
+ unsigned int flags, void **depend) |
|
|
+{ |
|
|
+ /* If there are depend clauses, but nowait is not present, |
|
|
+ block the parent task until the dependencies are resolved |
|
|
+ and then just continue with the rest of the function as if it |
|
|
+ is a merged task. Until we are able to schedule task during |
|
|
+ variable mapping or unmapping, ignore nowait if depend clauses |
|
|
+ are not present. */ |
|
|
+ if (depend != NULL) |
|
|
+ { |
|
|
+ struct gomp_thread *thr = gomp_thread (); |
|
|
+ if (thr->task && thr->task->depend_hash) |
|
|
+ { |
|
|
+ if ((flags & GOMP_TARGET_FLAG_NOWAIT) |
|
|
+ && thr->ts.team |
|
|
+ && !thr->task->final_task) |
|
|
+ { |
|
|
+ if (gomp_create_target_task (NULL, (void (*) (void *)) NULL, |
|
|
+ mapnum, hostaddrs, sizes, kinds, |
|
|
+ flags, depend, NULL, |
|
|
+ GOMP_TARGET_TASK_DATA)) |
|
|
+ return; |
|
|
+ } |
|
|
+ else |
|
|
+ { |
|
|
+ struct gomp_team *team = thr->ts.team; |
|
|
+ /* If parallel or taskgroup has been cancelled, don't start new |
|
|
+ tasks. */ |
|
|
+ if (team |
|
|
+ && (gomp_team_barrier_cancelled (&team->barrier) |
|
|
+ || (thr->task->taskgroup |
|
|
+ && thr->task->taskgroup->cancelled))) |
|
|
+ return; |
|
|
+ |
|
|
+ gomp_task_maybe_wait_for_dependencies (depend); |
|
|
+ } |
|
|
+ } |
|
|
+ } |
|
|
+} |
|
|
+ |
|
|
+bool |
|
|
+gomp_target_task_fn (void *data) |
|
|
+{ |
|
|
+ struct gomp_target_task *ttask = (struct gomp_target_task *) data; |
|
|
+ |
|
|
+ if (ttask->fn != NULL) |
|
|
+ { |
|
|
+ ttask->state = GOMP_TARGET_TASK_FALLBACK; |
|
|
+ gomp_target_fallback (ttask->fn, ttask->hostaddrs); |
|
|
+ return false; |
|
|
+ } |
|
|
+ return false; |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
GOMP_teams (unsigned int num_teams, unsigned int thread_limit) |
|
|
{ |
|
|
if (thread_limit) |
|
|
@@ -94,3 +424,153 @@ GOMP_teams (unsigned int num_teams, unsi |
|
|
} |
|
|
(void) num_teams; |
|
|
} |
|
|
+ |
|
|
+void * |
|
|
+omp_target_alloc (size_t size, int device_num) |
|
|
+{ |
|
|
+ if (device_num == GOMP_DEVICE_HOST_FALLBACK) |
|
|
+ return malloc (size); |
|
|
+ |
|
|
+ return NULL; |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+omp_target_free (void *device_ptr, int device_num) |
|
|
+{ |
|
|
+ if (device_ptr == NULL) |
|
|
+ return; |
|
|
+ |
|
|
+ if (device_num == GOMP_DEVICE_HOST_FALLBACK) |
|
|
+ { |
|
|
+ free (device_ptr); |
|
|
+ return; |
|
|
+ } |
|
|
+} |
|
|
+ |
|
|
+int |
|
|
+omp_target_is_present (void *ptr, int device_num) |
|
|
+{ |
|
|
+ if (ptr == NULL) |
|
|
+ return 1; |
|
|
+ |
|
|
+ if (device_num == GOMP_DEVICE_HOST_FALLBACK) |
|
|
+ return 1; |
|
|
+ |
|
|
+ return 0; |
|
|
+} |
|
|
+ |
|
|
+int |
|
|
+omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset, |
|
|
+ size_t src_offset, int dst_device_num, int src_device_num) |
|
|
+{ |
|
|
+ if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) |
|
|
+ return EINVAL; |
|
|
+ if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) |
|
|
+ return EINVAL; |
|
|
+ memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length); |
|
|
+ return 0; |
|
|
+} |
|
|
+ |
|
|
+#define HALF_SIZE_T (((size_t) 1) << (8 * sizeof (size_t) / 2)) |
|
|
+ |
|
|
+#define __builtin_mul_overflow(x, y, z) \ |
|
|
+ ({ bool retval = false; \ |
|
|
+ size_t xval = (x); \ |
|
|
+ size_t yval = (y); \ |
|
|
+ size_t zval = xval * yval; \ |
|
|
+ if (__builtin_expect ((xval | yval) >= HALF_SIZE_T, 0)) \ |
|
|
+ { \ |
|
|
+ if (xval && zval / xval != yval) \ |
|
|
+ retval = true; \ |
|
|
+ } \ |
|
|
+ *(z) = zval; \ |
|
|
+ retval; }) |
|
|
+ |
|
|
+static int |
|
|
+omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size, |
|
|
+ int num_dims, const size_t *volume, |
|
|
+ const size_t *dst_offsets, |
|
|
+ const size_t *src_offsets, |
|
|
+ const size_t *dst_dimensions, |
|
|
+ const size_t *src_dimensions) |
|
|
+{ |
|
|
+ size_t dst_slice = element_size; |
|
|
+ size_t src_slice = element_size; |
|
|
+ size_t j, dst_off, src_off, length; |
|
|
+ int i, ret; |
|
|
+ |
|
|
+ |
|
|
+ if (num_dims == 1) |
|
|
+ { |
|
|
+ if (__builtin_mul_overflow (element_size, volume[0], &length) |
|
|
+ || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off) |
|
|
+ || __builtin_mul_overflow (element_size, src_offsets[0], &src_off)) |
|
|
+ return EINVAL; |
|
|
+ memcpy ((char *) dst + dst_off, (char *) src + src_off, length); |
|
|
+ ret = 1; |
|
|
+ return ret ? 0 : EINVAL; |
|
|
+ } |
|
|
+ |
|
|
+ /* FIXME: it would be nice to have some plugin function to handle |
|
|
+ num_dims == 2 and num_dims == 3 more efficiently. Larger ones can |
|
|
+ be handled in the generic recursion below, and for host-host it |
|
|
+ should be used even for any num_dims >= 2. */ |
|
|
+ |
|
|
+ for (i = 1; i < num_dims; i++) |
|
|
+ if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice) |
|
|
+ || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice)) |
|
|
+ return EINVAL; |
|
|
+ if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off) |
|
|
+ || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off)) |
|
|
+ return EINVAL; |
|
|
+ for (j = 0; j < volume[0]; j++) |
|
|
+ { |
|
|
+ ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off, |
|
|
+ (char *) src + src_off, |
|
|
+ element_size, num_dims - 1, |
|
|
+ volume + 1, dst_offsets + 1, |
|
|
+ src_offsets + 1, dst_dimensions + 1, |
|
|
+ src_dimensions + 1); |
|
|
+ if (ret) |
|
|
+ return ret; |
|
|
+ dst_off += dst_slice; |
|
|
+ src_off += src_slice; |
|
|
+ } |
|
|
+ return 0; |
|
|
+} |
|
|
+ |
|
|
+int |
|
|
+omp_target_memcpy_rect (void *dst, void *src, size_t element_size, |
|
|
+ int num_dims, const size_t *volume, |
|
|
+ const size_t *dst_offsets, |
|
|
+ const size_t *src_offsets, |
|
|
+ const size_t *dst_dimensions, |
|
|
+ const size_t *src_dimensions, |
|
|
+ int dst_device_num, int src_device_num) |
|
|
+{ |
|
|
+ if (!dst && !src) |
|
|
+ return INT_MAX; |
|
|
+ |
|
|
+ if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) |
|
|
+ return EINVAL; |
|
|
+ if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) |
|
|
+ return EINVAL; |
|
|
+ |
|
|
+ int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims, |
|
|
+ volume, dst_offsets, src_offsets, |
|
|
+ dst_dimensions, src_dimensions); |
|
|
+ return ret; |
|
|
+} |
|
|
+ |
|
|
+int |
|
|
+omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size, |
|
|
+ size_t device_offset, int device_num) |
|
|
+{ |
|
|
+ return EINVAL; |
|
|
+} |
|
|
+ |
|
|
+int |
|
|
+omp_target_disassociate_ptr (void *ptr, int device_num) |
|
|
+{ |
|
|
+ return EINVAL; |
|
|
+} |
|
|
--- libgomp/fortran.c.jj 2014-05-15 10:56:31.593531223 +0200 |
|
|
+++ libgomp/fortran.c 2016-07-13 16:57:04.432535397 +0200 |
|
|
@@ -67,12 +67,20 @@ ialias_redirect (omp_get_active_level) |
|
|
ialias_redirect (omp_in_final) |
|
|
ialias_redirect (omp_get_cancellation) |
|
|
ialias_redirect (omp_get_proc_bind) |
|
|
+ialias_redirect (omp_get_num_places) |
|
|
+ialias_redirect (omp_get_place_num_procs) |
|
|
+ialias_redirect (omp_get_place_proc_ids) |
|
|
+ialias_redirect (omp_get_place_num) |
|
|
+ialias_redirect (omp_get_partition_num_places) |
|
|
+ialias_redirect (omp_get_partition_place_nums) |
|
|
ialias_redirect (omp_set_default_device) |
|
|
ialias_redirect (omp_get_default_device) |
|
|
ialias_redirect (omp_get_num_devices) |
|
|
ialias_redirect (omp_get_num_teams) |
|
|
ialias_redirect (omp_get_team_num) |
|
|
ialias_redirect (omp_is_initial_device) |
|
|
+ialias_redirect (omp_get_initial_device) |
|
|
+ialias_redirect (omp_get_max_task_priority) |
|
|
#endif |
|
|
|
|
|
#ifndef LIBGOMP_GNU_SYMBOL_VERSIONING |
|
|
@@ -342,35 +350,35 @@ omp_get_wtime_ (void) |
|
|
} |
|
|
|
|
|
void |
|
|
-omp_set_schedule_ (const int32_t *kind, const int32_t *modifier) |
|
|
+omp_set_schedule_ (const int32_t *kind, const int32_t *chunk_size) |
|
|
{ |
|
|
- omp_set_schedule (*kind, *modifier); |
|
|
+ omp_set_schedule (*kind, *chunk_size); |
|
|
} |
|
|
|
|
|
void |
|
|
-omp_set_schedule_8_ (const int32_t *kind, const int64_t *modifier) |
|
|
+omp_set_schedule_8_ (const int32_t *kind, const int64_t *chunk_size) |
|
|
{ |
|
|
- omp_set_schedule (*kind, TO_INT (*modifier)); |
|
|
+ omp_set_schedule (*kind, TO_INT (*chunk_size)); |
|
|
} |
|
|
|
|
|
void |
|
|
-omp_get_schedule_ (int32_t *kind, int32_t *modifier) |
|
|
+omp_get_schedule_ (int32_t *kind, int32_t *chunk_size) |
|
|
{ |
|
|
omp_sched_t k; |
|
|
- int m; |
|
|
- omp_get_schedule (&k, &m); |
|
|
+ int cs; |
|
|
+ omp_get_schedule (&k, &cs); |
|
|
*kind = k; |
|
|
- *modifier = m; |
|
|
+ *chunk_size = cs; |
|
|
} |
|
|
|
|
|
void |
|
|
-omp_get_schedule_8_ (int32_t *kind, int64_t *modifier) |
|
|
+omp_get_schedule_8_ (int32_t *kind, int64_t *chunk_size) |
|
|
{ |
|
|
omp_sched_t k; |
|
|
- int m; |
|
|
- omp_get_schedule (&k, &m); |
|
|
+ int cs; |
|
|
+ omp_get_schedule (&k, &cs); |
|
|
*kind = k; |
|
|
- *modifier = m; |
|
|
+ *chunk_size = cs; |
|
|
} |
|
|
|
|
|
int32_t |
|
|
@@ -451,6 +459,69 @@ omp_get_proc_bind_ (void) |
|
|
return omp_get_proc_bind (); |
|
|
} |
|
|
|
|
|
+int32_t |
|
|
+omp_get_num_places_ (void) |
|
|
+{ |
|
|
+ return omp_get_num_places (); |
|
|
+} |
|
|
+ |
|
|
+int32_t |
|
|
+omp_get_place_num_procs_ (const int32_t *place_num) |
|
|
+{ |
|
|
+ return omp_get_place_num_procs (*place_num); |
|
|
+} |
|
|
+ |
|
|
+int32_t |
|
|
+omp_get_place_num_procs_8_ (const int64_t *place_num) |
|
|
+{ |
|
|
+ return omp_get_place_num_procs (TO_INT (*place_num)); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+omp_get_place_proc_ids_ (const int32_t *place_num, int32_t *ids) |
|
|
+{ |
|
|
+ omp_get_place_proc_ids (*place_num, (int *) ids); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+omp_get_place_proc_ids_8_ (const int64_t *place_num, int64_t *ids) |
|
|
+{ |
|
|
+ gomp_get_place_proc_ids_8 (TO_INT (*place_num), ids); |
|
|
+} |
|
|
+ |
|
|
+int32_t |
|
|
+omp_get_place_num_ (void) |
|
|
+{ |
|
|
+ return omp_get_place_num (); |
|
|
+} |
|
|
+ |
|
|
+int32_t |
|
|
+omp_get_partition_num_places_ (void) |
|
|
+{ |
|
|
+ return omp_get_partition_num_places (); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+omp_get_partition_place_nums_ (int32_t *place_nums) |
|
|
+{ |
|
|
+ omp_get_partition_place_nums ((int *) place_nums); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+omp_get_partition_place_nums_8_ (int64_t *place_nums) |
|
|
+{ |
|
|
+ if (gomp_places_list == NULL) |
|
|
+ return; |
|
|
+ |
|
|
+ struct gomp_thread *thr = gomp_thread (); |
|
|
+ if (thr->place == 0) |
|
|
+ gomp_init_affinity (); |
|
|
+ |
|
|
+ unsigned int i; |
|
|
+ for (i = 0; i < thr->ts.place_partition_len; i++) |
|
|
+ *place_nums++ = (int64_t) thr->ts.place_partition_off + i; |
|
|
+} |
|
|
+ |
|
|
void |
|
|
omp_set_default_device_ (const int32_t *device_num) |
|
|
{ |
|
|
@@ -492,3 +563,15 @@ omp_is_initial_device_ (void) |
|
|
{ |
|
|
return omp_is_initial_device (); |
|
|
} |
|
|
+ |
|
|
+int32_t |
|
|
+omp_get_initial_device_ (void) |
|
|
+{ |
|
|
+ return omp_get_initial_device (); |
|
|
+} |
|
|
+ |
|
|
+int32_t |
|
|
+omp_get_max_task_priority_ (void) |
|
|
+{ |
|
|
+ return omp_get_max_task_priority (); |
|
|
+} |
|
|
--- libgomp/libgomp.map.jj 2014-05-15 10:56:31.927533549 +0200 |
|
|
+++ libgomp/libgomp.map 2016-07-13 16:57:04.434535373 +0200 |
|
|
@@ -134,6 +134,36 @@ OMP_4.0 { |
|
|
omp_is_initial_device_; |
|
|
} OMP_3.1; |
|
|
|
|
|
+OMP_4.5 { |
|
|
+ global: |
|
|
+ omp_get_max_task_priority; |
|
|
+ omp_get_max_task_priority_; |
|
|
+ omp_get_num_places; |
|
|
+ omp_get_num_places_; |
|
|
+ omp_get_place_num_procs; |
|
|
+ omp_get_place_num_procs_; |
|
|
+ omp_get_place_num_procs_8_; |
|
|
+ omp_get_place_proc_ids; |
|
|
+ omp_get_place_proc_ids_; |
|
|
+ omp_get_place_proc_ids_8_; |
|
|
+ omp_get_place_num; |
|
|
+ omp_get_place_num_; |
|
|
+ omp_get_partition_num_places; |
|
|
+ omp_get_partition_num_places_; |
|
|
+ omp_get_partition_place_nums; |
|
|
+ omp_get_partition_place_nums_; |
|
|
+ omp_get_partition_place_nums_8_; |
|
|
+ omp_get_initial_device; |
|
|
+ omp_get_initial_device_; |
|
|
+ omp_target_alloc; |
|
|
+ omp_target_free; |
|
|
+ omp_target_is_present; |
|
|
+ omp_target_memcpy; |
|
|
+ omp_target_memcpy_rect; |
|
|
+ omp_target_associate_ptr; |
|
|
+ omp_target_disassociate_ptr; |
|
|
+} OMP_4.0; |
|
|
+ |
|
|
GOMP_1.0 { |
|
|
global: |
|
|
GOMP_atomic_end; |
|
|
@@ -227,3 +257,158 @@ GOMP_4.0 { |
|
|
GOMP_target_update; |
|
|
GOMP_teams; |
|
|
} GOMP_3.0; |
|
|
+ |
|
|
+GOMP_4.0.1 { |
|
|
+ global: |
|
|
+ GOMP_offload_register; |
|
|
+ GOMP_offload_unregister; |
|
|
+} GOMP_4.0; |
|
|
+ |
|
|
+GOMP_4.5 { |
|
|
+ global: |
|
|
+ GOMP_target_ext; |
|
|
+ GOMP_target_data_ext; |
|
|
+ GOMP_target_update_ext; |
|
|
+ GOMP_target_enter_exit_data; |
|
|
+ GOMP_taskloop; |
|
|
+ GOMP_taskloop_ull; |
|
|
+ GOMP_offload_register_ver; |
|
|
+ GOMP_offload_unregister_ver; |
|
|
+ GOMP_loop_doacross_dynamic_start; |
|
|
+ GOMP_loop_doacross_guided_start; |
|
|
+ GOMP_loop_doacross_runtime_start; |
|
|
+ GOMP_loop_doacross_static_start; |
|
|
+ GOMP_doacross_post; |
|
|
+ GOMP_doacross_wait; |
|
|
+ GOMP_loop_ull_doacross_dynamic_start; |
|
|
+ GOMP_loop_ull_doacross_guided_start; |
|
|
+ GOMP_loop_ull_doacross_runtime_start; |
|
|
+ GOMP_loop_ull_doacross_static_start; |
|
|
+ GOMP_doacross_ull_post; |
|
|
+ GOMP_doacross_ull_wait; |
|
|
+ GOMP_loop_nonmonotonic_dynamic_next; |
|
|
+ GOMP_loop_nonmonotonic_dynamic_start; |
|
|
+ GOMP_loop_nonmonotonic_guided_next; |
|
|
+ GOMP_loop_nonmonotonic_guided_start; |
|
|
+ GOMP_loop_ull_nonmonotonic_dynamic_next; |
|
|
+ GOMP_loop_ull_nonmonotonic_dynamic_start; |
|
|
+ GOMP_loop_ull_nonmonotonic_guided_next; |
|
|
+ GOMP_loop_ull_nonmonotonic_guided_start; |
|
|
+ GOMP_parallel_loop_nonmonotonic_dynamic; |
|
|
+ GOMP_parallel_loop_nonmonotonic_guided; |
|
|
+} GOMP_4.0.1; |
|
|
+ |
|
|
+OACC_2.0 { |
|
|
+ global: |
|
|
+ acc_get_num_devices; |
|
|
+ acc_get_num_devices_h_; |
|
|
+ acc_set_device_type; |
|
|
+ acc_set_device_type_h_; |
|
|
+ acc_get_device_type; |
|
|
+ acc_get_device_type_h_; |
|
|
+ acc_set_device_num; |
|
|
+ acc_set_device_num_h_; |
|
|
+ acc_get_device_num; |
|
|
+ acc_get_device_num_h_; |
|
|
+ acc_async_test; |
|
|
+ acc_async_test_h_; |
|
|
+ acc_async_test_all; |
|
|
+ acc_async_test_all_h_; |
|
|
+ acc_wait; |
|
|
+ acc_wait_h_; |
|
|
+ acc_wait_async; |
|
|
+ acc_wait_async_h_; |
|
|
+ acc_wait_all; |
|
|
+ acc_wait_all_h_; |
|
|
+ acc_wait_all_async; |
|
|
+ acc_wait_all_async_h_; |
|
|
+ acc_init; |
|
|
+ acc_init_h_; |
|
|
+ acc_shutdown; |
|
|
+ acc_shutdown_h_; |
|
|
+ acc_on_device; |
|
|
+ acc_on_device_h_; |
|
|
+ acc_malloc; |
|
|
+ acc_free; |
|
|
+ acc_copyin; |
|
|
+ acc_copyin_32_h_; |
|
|
+ acc_copyin_64_h_; |
|
|
+ acc_copyin_array_h_; |
|
|
+ acc_present_or_copyin; |
|
|
+ acc_present_or_copyin_32_h_; |
|
|
+ acc_present_or_copyin_64_h_; |
|
|
+ acc_present_or_copyin_array_h_; |
|
|
+ acc_create; |
|
|
+ acc_create_32_h_; |
|
|
+ acc_create_64_h_; |
|
|
+ acc_create_array_h_; |
|
|
+ acc_present_or_create; |
|
|
+ acc_present_or_create_32_h_; |
|
|
+ acc_present_or_create_64_h_; |
|
|
+ acc_present_or_create_array_h_; |
|
|
+ acc_copyout; |
|
|
+ acc_copyout_32_h_; |
|
|
+ acc_copyout_64_h_; |
|
|
+ acc_copyout_array_h_; |
|
|
+ acc_delete; |
|
|
+ acc_delete_32_h_; |
|
|
+ acc_delete_64_h_; |
|
|
+ acc_delete_array_h_; |
|
|
+ acc_update_device; |
|
|
+ acc_update_device_32_h_; |
|
|
+ acc_update_device_64_h_; |
|
|
+ acc_update_device_array_h_; |
|
|
+ acc_update_self; |
|
|
+ acc_update_self_32_h_; |
|
|
+ acc_update_self_64_h_; |
|
|
+ acc_update_self_array_h_; |
|
|
+ acc_map_data; |
|
|
+ acc_unmap_data; |
|
|
+ acc_deviceptr; |
|
|
+ acc_hostptr; |
|
|
+ acc_is_present; |
|
|
+ acc_is_present_32_h_; |
|
|
+ acc_is_present_64_h_; |
|
|
+ acc_is_present_array_h_; |
|
|
+ acc_memcpy_to_device; |
|
|
+ acc_memcpy_from_device; |
|
|
+ acc_get_current_cuda_device; |
|
|
+ acc_get_current_cuda_context; |
|
|
+ acc_get_cuda_stream; |
|
|
+ acc_set_cuda_stream; |
|
|
+}; |
|
|
+ |
|
|
+GOACC_2.0 { |
|
|
+ global: |
|
|
+ GOACC_data_end; |
|
|
+ GOACC_data_start; |
|
|
+ GOACC_enter_exit_data; |
|
|
+ GOACC_parallel; |
|
|
+ GOACC_update; |
|
|
+ GOACC_wait; |
|
|
+ GOACC_get_thread_num; |
|
|
+ GOACC_get_num_threads; |
|
|
+}; |
|
|
+ |
|
|
+GOACC_2.0.1 { |
|
|
+ global: |
|
|
+ GOACC_declare; |
|
|
+ GOACC_parallel_keyed; |
|
|
+} GOACC_2.0; |
|
|
+ |
|
|
+GOMP_PLUGIN_1.0 { |
|
|
+ global: |
|
|
+ GOMP_PLUGIN_malloc; |
|
|
+ GOMP_PLUGIN_malloc_cleared; |
|
|
+ GOMP_PLUGIN_realloc; |
|
|
+ GOMP_PLUGIN_debug; |
|
|
+ GOMP_PLUGIN_error; |
|
|
+ GOMP_PLUGIN_fatal; |
|
|
+ GOMP_PLUGIN_async_unmap_vars; |
|
|
+ GOMP_PLUGIN_acc_thread; |
|
|
+}; |
|
|
+ |
|
|
+GOMP_PLUGIN_1.1 { |
|
|
+ global: |
|
|
+ GOMP_PLUGIN_target_task_completion; |
|
|
+} GOMP_PLUGIN_1.0; |
|
|
--- libgomp/ordered.c.jj 2013-01-21 16:00:46.137873657 +0100 |
|
|
+++ libgomp/ordered.c 2016-07-13 16:57:18.918355780 +0200 |
|
|
@@ -25,6 +25,9 @@ |
|
|
/* This file handles the ORDERED construct. */ |
|
|
|
|
|
#include "libgomp.h" |
|
|
+#include <stdarg.h> |
|
|
+#include <string.h> |
|
|
+#include "doacross.h" |
|
|
|
|
|
|
|
|
/* This function is called when first allocating an iteration block. That |
|
|
@@ -249,3 +252,533 @@ void |
|
|
GOMP_ordered_end (void) |
|
|
{ |
|
|
} |
|
|
+ |
|
|
+/* DOACROSS initialization. */ |
|
|
+ |
|
|
+#define MAX_COLLAPSED_BITS (__SIZEOF_LONG__ * __CHAR_BIT__) |
|
|
+ |
|
|
+void |
|
|
+gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size) |
|
|
+{ |
|
|
+ struct gomp_thread *thr = gomp_thread (); |
|
|
+ struct gomp_team *team = thr->ts.team; |
|
|
+ struct gomp_work_share *ws = thr->ts.work_share; |
|
|
+ unsigned int i, bits[MAX_COLLAPSED_BITS], num_bits = 0; |
|
|
+ unsigned long ent, num_ents, elt_sz, shift_sz; |
|
|
+ struct gomp_doacross_work_share *doacross; |
|
|
+ |
|
|
+ if (team == NULL || team->nthreads == 1) |
|
|
+ return; |
|
|
+ |
|
|
+ for (i = 0; i < ncounts; i++) |
|
|
+ { |
|
|
+ /* If any count is 0, GOMP_doacross_{post,wait} can't be called. */ |
|
|
+ if (counts[i] == 0) |
|
|
+ return; |
|
|
+ |
|
|
+ if (num_bits <= MAX_COLLAPSED_BITS) |
|
|
+ { |
|
|
+ unsigned int this_bits; |
|
|
+ if (counts[i] == 1) |
|
|
+ this_bits = 1; |
|
|
+ else |
|
|
+ this_bits = __SIZEOF_LONG__ * __CHAR_BIT__ |
|
|
+ - __builtin_clzl (counts[i] - 1); |
|
|
+ if (num_bits + this_bits <= MAX_COLLAPSED_BITS) |
|
|
+ { |
|
|
+ bits[i] = this_bits; |
|
|
+ num_bits += this_bits; |
|
|
+ } |
|
|
+ else |
|
|
+ num_bits = MAX_COLLAPSED_BITS + 1; |
|
|
+ } |
|
|
+ } |
|
|
+ |
|
|
+ if (ws->sched == GFS_STATIC) |
|
|
+ num_ents = team->nthreads; |
|
|
+ else if (ws->sched == GFS_GUIDED) |
|
|
+ num_ents = counts[0]; |
|
|
+ else |
|
|
+ num_ents = (counts[0] - 1) / chunk_size + 1; |
|
|
+ if (num_bits <= MAX_COLLAPSED_BITS) |
|
|
+ { |
|
|
+ elt_sz = sizeof (unsigned long); |
|
|
+ shift_sz = ncounts * sizeof (unsigned int); |
|
|
+ } |
|
|
+ else |
|
|
+ { |
|
|
+ elt_sz = sizeof (unsigned long) * ncounts; |
|
|
+ shift_sz = 0; |
|
|
+ } |
|
|
+ elt_sz = (elt_sz + 63) & ~63UL; |
|
|
+ |
|
|
+ doacross = gomp_malloc (sizeof (*doacross) + 63 + num_ents * elt_sz |
|
|
+ + shift_sz); |
|
|
+ doacross->chunk_size = chunk_size; |
|
|
+ doacross->elt_sz = elt_sz; |
|
|
+ doacross->ncounts = ncounts; |
|
|
+ doacross->flattened = false; |
|
|
+ doacross->array = (unsigned char *) |
|
|
+ ((((uintptr_t) (doacross + 1)) + 63 + shift_sz) |
|
|
+ & ~(uintptr_t) 63); |
|
|
+ if (num_bits <= MAX_COLLAPSED_BITS) |
|
|
+ { |
|
|
+ unsigned int shift_count = 0; |
|
|
+ doacross->flattened = true; |
|
|
+ for (i = ncounts; i > 0; i--) |
|
|
+ { |
|
|
+ doacross->shift_counts[i - 1] = shift_count; |
|
|
+ shift_count += bits[i - 1]; |
|
|
+ } |
|
|
+ for (ent = 0; ent < num_ents; ent++) |
|
|
+ *(unsigned long *) (doacross->array + ent * elt_sz) = 0; |
|
|
+ } |
|
|
+ else |
|
|
+ for (ent = 0; ent < num_ents; ent++) |
|
|
+ memset (doacross->array + ent * elt_sz, '\0', |
|
|
+ sizeof (unsigned long) * ncounts); |
|
|
+ if (ws->sched == GFS_STATIC && chunk_size == 0) |
|
|
+ { |
|
|
+ unsigned long q = counts[0] / num_ents; |
|
|
+ unsigned long t = counts[0] % num_ents; |
|
|
+ doacross->boundary = t * (q + 1); |
|
|
+ doacross->q = q; |
|
|
+ doacross->t = t; |
|
|
+ } |
|
|
+ ws->doacross = doacross; |
|
|
+} |
|
|
+ |
|
|
+/* DOACROSS POST operation. */ |
|
|
+ |
|
|
+void |
|
|
+GOMP_doacross_post (long *counts) |
|
|
+{ |
|
|
+ struct gomp_thread *thr = gomp_thread (); |
|
|
+ struct gomp_work_share *ws = thr->ts.work_share; |
|
|
+ struct gomp_doacross_work_share *doacross = ws->doacross; |
|
|
+ unsigned long ent; |
|
|
+ unsigned int i; |
|
|
+ |
|
|
+ if (__builtin_expect (doacross == NULL, 0)) |
|
|
+ { |
|
|
+ __sync_synchronize (); |
|
|
+ return; |
|
|
+ } |
|
|
+ |
|
|
+ if (__builtin_expect (ws->sched == GFS_STATIC, 1)) |
|
|
+ ent = thr->ts.team_id; |
|
|
+ else if (ws->sched == GFS_GUIDED) |
|
|
+ ent = counts[0]; |
|
|
+ else |
|
|
+ ent = counts[0] / doacross->chunk_size; |
|
|
+ unsigned long *array = (unsigned long *) (doacross->array |
|
|
+ + ent * doacross->elt_sz); |
|
|
+ |
|
|
+ if (__builtin_expect (doacross->flattened, 1)) |
|
|
+ { |
|
|
+ unsigned long flattened |
|
|
+ = (unsigned long) counts[0] << doacross->shift_counts[0]; |
|
|
+ |
|
|
+ for (i = 1; i < doacross->ncounts; i++) |
|
|
+ flattened |= (unsigned long) counts[i] |
|
|
+ << doacross->shift_counts[i]; |
|
|
+ flattened++; |
|
|
+ if (flattened == __atomic_load_n (array, MEMMODEL_ACQUIRE)) |
|
|
+ __atomic_thread_fence (MEMMODEL_RELEASE); |
|
|
+ else |
|
|
+ __atomic_store_n (array, flattened, MEMMODEL_RELEASE); |
|
|
+ return; |
|
|
+ } |
|
|
+ |
|
|
+ __atomic_thread_fence (MEMMODEL_ACQUIRE); |
|
|
+ for (i = doacross->ncounts; i-- > 0; ) |
|
|
+ { |
|
|
+ if (counts[i] + 1UL != __atomic_load_n (&array[i], MEMMODEL_RELAXED)) |
|
|
+ __atomic_store_n (&array[i], counts[i] + 1UL, MEMMODEL_RELEASE); |
|
|
+ } |
|
|
+} |
|
|
+ |
|
|
+/* DOACROSS WAIT operation. */ |
|
|
+ |
|
|
+void |
|
|
+GOMP_doacross_wait (long first, ...) |
|
|
+{ |
|
|
+ struct gomp_thread *thr = gomp_thread (); |
|
|
+ struct gomp_work_share *ws = thr->ts.work_share; |
|
|
+ struct gomp_doacross_work_share *doacross = ws->doacross; |
|
|
+ va_list ap; |
|
|
+ unsigned long ent; |
|
|
+ unsigned int i; |
|
|
+ |
|
|
+ if (__builtin_expect (doacross == NULL, 0)) |
|
|
+ { |
|
|
+ __sync_synchronize (); |
|
|
+ return; |
|
|
+ } |
|
|
+ |
|
|
+ if (__builtin_expect (ws->sched == GFS_STATIC, 1)) |
|
|
+ { |
|
|
+ if (ws->chunk_size == 0) |
|
|
+ { |
|
|
+ if (first < doacross->boundary) |
|
|
+ ent = first / (doacross->q + 1); |
|
|
+ else |
|
|
+ ent = (first - doacross->boundary) / doacross->q |
|
|
+ + doacross->t; |
|
|
+ } |
|
|
+ else |
|
|
+ ent = first / ws->chunk_size % thr->ts.team->nthreads; |
|
|
+ } |
|
|
+ else if (ws->sched == GFS_GUIDED) |
|
|
+ ent = first; |
|
|
+ else |
|
|
+ ent = first / doacross->chunk_size; |
|
|
+ unsigned long *array = (unsigned long *) (doacross->array |
|
|
+ + ent * doacross->elt_sz); |
|
|
+ |
|
|
+ if (__builtin_expect (doacross->flattened, 1)) |
|
|
+ { |
|
|
+ unsigned long flattened |
|
|
+ = (unsigned long) first << doacross->shift_counts[0]; |
|
|
+ unsigned long cur; |
|
|
+ |
|
|
+ va_start (ap, first); |
|
|
+ for (i = 1; i < doacross->ncounts; i++) |
|
|
+ flattened |= (unsigned long) va_arg (ap, long) |
|
|
+ << doacross->shift_counts[i]; |
|
|
+ cur = __atomic_load_n (array, MEMMODEL_ACQUIRE); |
|
|
+ if (flattened < cur) |
|
|
+ { |
|
|
+ __atomic_thread_fence (MEMMODEL_RELEASE); |
|
|
+ va_end (ap); |
|
|
+ return; |
|
|
+ } |
|
|
+ doacross_spin (array, flattened, cur); |
|
|
+ __atomic_thread_fence (MEMMODEL_RELEASE); |
|
|
+ va_end (ap); |
|
|
+ return; |
|
|
+ } |
|
|
+ |
|
|
+ do |
|
|
+ { |
|
|
+ va_start (ap, first); |
|
|
+ for (i = 0; i < doacross->ncounts; i++) |
|
|
+ { |
|
|
+ unsigned long thisv |
|
|
+ = (unsigned long) (i ? va_arg (ap, long) : first) + 1; |
|
|
+ unsigned long cur = __atomic_load_n (&array[i], MEMMODEL_RELAXED); |
|
|
+ if (thisv < cur) |
|
|
+ { |
|
|
+ i = doacross->ncounts; |
|
|
+ break; |
|
|
+ } |
|
|
+ if (thisv > cur) |
|
|
+ break; |
|
|
+ } |
|
|
+ va_end (ap); |
|
|
+ if (i == doacross->ncounts) |
|
|
+ break; |
|
|
+ cpu_relax (); |
|
|
+ } |
|
|
+ while (1); |
|
|
+ __sync_synchronize (); |
|
|
+} |
|
|
+ |
|
|
+typedef unsigned long long gomp_ull; |
|
|
+ |
|
|
+void |
|
|
+gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts, gomp_ull chunk_size) |
|
|
+{ |
|
|
+ struct gomp_thread *thr = gomp_thread (); |
|
|
+ struct gomp_team *team = thr->ts.team; |
|
|
+ struct gomp_work_share *ws = thr->ts.work_share; |
|
|
+ unsigned int i, bits[MAX_COLLAPSED_BITS], num_bits = 0; |
|
|
+ unsigned long ent, num_ents, elt_sz, shift_sz; |
|
|
+ struct gomp_doacross_work_share *doacross; |
|
|
+ |
|
|
+ if (team == NULL || team->nthreads == 1) |
|
|
+ return; |
|
|
+ |
|
|
+ for (i = 0; i < ncounts; i++) |
|
|
+ { |
|
|
+ /* If any count is 0, GOMP_doacross_{post,wait} can't be called. */ |
|
|
+ if (counts[i] == 0) |
|
|
+ return; |
|
|
+ |
|
|
+ if (num_bits <= MAX_COLLAPSED_BITS) |
|
|
+ { |
|
|
+ unsigned int this_bits; |
|
|
+ if (counts[i] == 1) |
|
|
+ this_bits = 1; |
|
|
+ else |
|
|
+ this_bits = __SIZEOF_LONG_LONG__ * __CHAR_BIT__ |
|
|
+ - __builtin_clzll (counts[i] - 1); |
|
|
+ if (num_bits + this_bits <= MAX_COLLAPSED_BITS) |
|
|
+ { |
|
|
+ bits[i] = this_bits; |
|
|
+ num_bits += this_bits; |
|
|
+ } |
|
|
+ else |
|
|
+ num_bits = MAX_COLLAPSED_BITS + 1; |
|
|
+ } |
|
|
+ } |
|
|
+ |
|
|
+ if (ws->sched == GFS_STATIC) |
|
|
+ num_ents = team->nthreads; |
|
|
+ else if (ws->sched == GFS_GUIDED) |
|
|
+ num_ents = counts[0]; |
|
|
+ else |
|
|
+ num_ents = (counts[0] - 1) / chunk_size + 1; |
|
|
+ if (num_bits <= MAX_COLLAPSED_BITS) |
|
|
+ { |
|
|
+ elt_sz = sizeof (unsigned long); |
|
|
+ shift_sz = ncounts * sizeof (unsigned int); |
|
|
+ } |
|
|
+ else |
|
|
+ { |
|
|
+ if (sizeof (gomp_ull) == sizeof (unsigned long)) |
|
|
+ elt_sz = sizeof (gomp_ull) * ncounts; |
|
|
+ else if (sizeof (gomp_ull) == 2 * sizeof (unsigned long)) |
|
|
+ elt_sz = sizeof (unsigned long) * 2 * ncounts; |
|
|
+ else |
|
|
+ abort (); |
|
|
+ shift_sz = 0; |
|
|
+ } |
|
|
+ elt_sz = (elt_sz + 63) & ~63UL; |
|
|
+ |
|
|
+ doacross = gomp_malloc (sizeof (*doacross) + 63 + num_ents * elt_sz |
|
|
+ + shift_sz); |
|
|
+ doacross->chunk_size_ull = chunk_size; |
|
|
+ doacross->elt_sz = elt_sz; |
|
|
+ doacross->ncounts = ncounts; |
|
|
+ doacross->flattened = false; |
|
|
+ doacross->boundary = 0; |
|
|
+ doacross->array = (unsigned char *) |
|
|
+ ((((uintptr_t) (doacross + 1)) + 63 + shift_sz) |
|
|
+ & ~(uintptr_t) 63); |
|
|
+ if (num_bits <= MAX_COLLAPSED_BITS) |
|
|
+ { |
|
|
+ unsigned int shift_count = 0; |
|
|
+ doacross->flattened = true; |
|
|
+ for (i = ncounts; i > 0; i--) |
|
|
+ { |
|
|
+ doacross->shift_counts[i - 1] = shift_count; |
|
|
+ shift_count += bits[i - 1]; |
|
|
+ } |
|
|
+ for (ent = 0; ent < num_ents; ent++) |
|
|
+ *(unsigned long *) (doacross->array + ent * elt_sz) = 0; |
|
|
+ } |
|
|
+ else |
|
|
+ for (ent = 0; ent < num_ents; ent++) |
|
|
+ memset (doacross->array + ent * elt_sz, '\0', |
|
|
+ sizeof (unsigned long) * ncounts); |
|
|
+ if (ws->sched == GFS_STATIC && chunk_size == 0) |
|
|
+ { |
|
|
+ gomp_ull q = counts[0] / num_ents; |
|
|
+ gomp_ull t = counts[0] % num_ents; |
|
|
+ doacross->boundary_ull = t * (q + 1); |
|
|
+ doacross->q_ull = q; |
|
|
+ doacross->t = t; |
|
|
+ } |
|
|
+ ws->doacross = doacross; |
|
|
+} |
|
|
+ |
|
|
+/* DOACROSS POST operation. */ |
|
|
+ |
|
|
+void |
|
|
+GOMP_doacross_ull_post (gomp_ull *counts) |
|
|
+{ |
|
|
+ struct gomp_thread *thr = gomp_thread (); |
|
|
+ struct gomp_work_share *ws = thr->ts.work_share; |
|
|
+ struct gomp_doacross_work_share *doacross = ws->doacross; |
|
|
+ unsigned long ent; |
|
|
+ unsigned int i; |
|
|
+ |
|
|
+ if (__builtin_expect (doacross == NULL, 0)) |
|
|
+ { |
|
|
+ __sync_synchronize (); |
|
|
+ return; |
|
|
+ } |
|
|
+ |
|
|
+ if (__builtin_expect (ws->sched == GFS_STATIC, 1)) |
|
|
+ ent = thr->ts.team_id; |
|
|
+ else if (ws->sched == GFS_GUIDED) |
|
|
+ ent = counts[0]; |
|
|
+ else |
|
|
+ ent = counts[0] / doacross->chunk_size_ull; |
|
|
+ |
|
|
+ if (__builtin_expect (doacross->flattened, 1)) |
|
|
+ { |
|
|
+ unsigned long *array = (unsigned long *) (doacross->array |
|
|
+ + ent * doacross->elt_sz); |
|
|
+ gomp_ull flattened |
|
|
+ = counts[0] << doacross->shift_counts[0]; |
|
|
+ |
|
|
+ for (i = 1; i < doacross->ncounts; i++) |
|
|
+ flattened |= counts[i] << doacross->shift_counts[i]; |
|
|
+ flattened++; |
|
|
+ if (flattened == __atomic_load_n (array, MEMMODEL_ACQUIRE)) |
|
|
+ __atomic_thread_fence (MEMMODEL_RELEASE); |
|
|
+ else |
|
|
+ __atomic_store_n (array, flattened, MEMMODEL_RELEASE); |
|
|
+ return; |
|
|
+ } |
|
|
+ |
|
|
+ __atomic_thread_fence (MEMMODEL_ACQUIRE); |
|
|
+ if (sizeof (gomp_ull) == sizeof (unsigned long)) |
|
|
+ { |
|
|
+ gomp_ull *array = (gomp_ull *) (doacross->array |
|
|
+ + ent * doacross->elt_sz); |
|
|
+ |
|
|
+ for (i = doacross->ncounts; i-- > 0; ) |
|
|
+ { |
|
|
+ if (counts[i] + 1UL != __atomic_load_n (&array[i], MEMMODEL_RELAXED)) |
|
|
+ __atomic_store_n (&array[i], counts[i] + 1UL, MEMMODEL_RELEASE); |
|
|
+ } |
|
|
+ } |
|
|
+ else |
|
|
+ { |
|
|
+ unsigned long *array = (unsigned long *) (doacross->array |
|
|
+ + ent * doacross->elt_sz); |
|
|
+ |
|
|
+ for (i = doacross->ncounts; i-- > 0; ) |
|
|
+ { |
|
|
+ gomp_ull cull = counts[i] + 1UL; |
|
|
+ unsigned long c = (unsigned long) cull; |
|
|
+ if (c != __atomic_load_n (&array[2 * i + 1], MEMMODEL_RELAXED)) |
|
|
+ __atomic_store_n (&array[2 * i + 1], c, MEMMODEL_RELEASE); |
|
|
+ c = cull >> (__SIZEOF_LONG_LONG__ * __CHAR_BIT__ / 2); |
|
|
+ if (c != __atomic_load_n (&array[2 * i], MEMMODEL_RELAXED)) |
|
|
+ __atomic_store_n (&array[2 * i], c, MEMMODEL_RELEASE); |
|
|
+ } |
|
|
+ } |
|
|
+} |
|
|
+ |
|
|
+/* DOACROSS WAIT operation. */ |
|
|
+ |
|
|
+void |
|
|
+GOMP_doacross_ull_wait (gomp_ull first, ...) |
|
|
+{ |
|
|
+ struct gomp_thread *thr = gomp_thread (); |
|
|
+ struct gomp_work_share *ws = thr->ts.work_share; |
|
|
+ struct gomp_doacross_work_share *doacross = ws->doacross; |
|
|
+ va_list ap; |
|
|
+ unsigned long ent; |
|
|
+ unsigned int i; |
|
|
+ |
|
|
+ if (__builtin_expect (doacross == NULL, 0)) |
|
|
+ { |
|
|
+ __sync_synchronize (); |
|
|
+ return; |
|
|
+ } |
|
|
+ |
|
|
+ if (__builtin_expect (ws->sched == GFS_STATIC, 1)) |
|
|
+ { |
|
|
+ if (ws->chunk_size_ull == 0) |
|
|
+ { |
|
|
+ if (first < doacross->boundary_ull) |
|
|
+ ent = first / (doacross->q_ull + 1); |
|
|
+ else |
|
|
+ ent = (first - doacross->boundary_ull) / doacross->q_ull |
|
|
+ + doacross->t; |
|
|
+ } |
|
|
+ else |
|
|
+ ent = first / ws->chunk_size_ull % thr->ts.team->nthreads; |
|
|
+ } |
|
|
+ else if (ws->sched == GFS_GUIDED) |
|
|
+ ent = first; |
|
|
+ else |
|
|
+ ent = first / doacross->chunk_size_ull; |
|
|
+ |
|
|
+ if (__builtin_expect (doacross->flattened, 1)) |
|
|
+ { |
|
|
+ unsigned long *array = (unsigned long *) (doacross->array |
|
|
+ + ent * doacross->elt_sz); |
|
|
+ gomp_ull flattened = first << doacross->shift_counts[0]; |
|
|
+ unsigned long cur; |
|
|
+ |
|
|
+ va_start (ap, first); |
|
|
+ for (i = 1; i < doacross->ncounts; i++) |
|
|
+ flattened |= va_arg (ap, gomp_ull) |
|
|
+ << doacross->shift_counts[i]; |
|
|
+ cur = __atomic_load_n (array, MEMMODEL_ACQUIRE); |
|
|
+ if (flattened < cur) |
|
|
+ { |
|
|
+ __atomic_thread_fence (MEMMODEL_RELEASE); |
|
|
+ va_end (ap); |
|
|
+ return; |
|
|
+ } |
|
|
+ doacross_spin (array, flattened, cur); |
|
|
+ __atomic_thread_fence (MEMMODEL_RELEASE); |
|
|
+ va_end (ap); |
|
|
+ return; |
|
|
+ } |
|
|
+ |
|
|
+ if (sizeof (gomp_ull) == sizeof (unsigned long)) |
|
|
+ { |
|
|
+ gomp_ull *array = (gomp_ull *) (doacross->array |
|
|
+ + ent * doacross->elt_sz); |
|
|
+ do |
|
|
+ { |
|
|
+ va_start (ap, first); |
|
|
+ for (i = 0; i < doacross->ncounts; i++) |
|
|
+ { |
|
|
+ gomp_ull thisv |
|
|
+ = (i ? va_arg (ap, gomp_ull) : first) + 1; |
|
|
+ gomp_ull cur = __atomic_load_n (&array[i], MEMMODEL_RELAXED); |
|
|
+ if (thisv < cur) |
|
|
+ { |
|
|
+ i = doacross->ncounts; |
|
|
+ break; |
|
|
+ } |
|
|
+ if (thisv > cur) |
|
|
+ break; |
|
|
+ } |
|
|
+ va_end (ap); |
|
|
+ if (i == doacross->ncounts) |
|
|
+ break; |
|
|
+ cpu_relax (); |
|
|
+ } |
|
|
+ while (1); |
|
|
+ } |
|
|
+ else |
|
|
+ { |
|
|
+ unsigned long *array = (unsigned long *) (doacross->array |
|
|
+ + ent * doacross->elt_sz); |
|
|
+ do |
|
|
+ { |
|
|
+ va_start (ap, first); |
|
|
+ for (i = 0; i < doacross->ncounts; i++) |
|
|
+ { |
|
|
+ gomp_ull thisv |
|
|
+ = (i ? va_arg (ap, gomp_ull) : first) + 1; |
|
|
+ unsigned long t |
|
|
+ = thisv >> (__SIZEOF_LONG_LONG__ * __CHAR_BIT__ / 2); |
|
|
+ unsigned long cur |
|
|
+ = __atomic_load_n (&array[2 * i], MEMMODEL_RELAXED); |
|
|
+ if (t < cur) |
|
|
+ { |
|
|
+ i = doacross->ncounts; |
|
|
+ break; |
|
|
+ } |
|
|
+ if (t > cur) |
|
|
+ break; |
|
|
+ t = thisv; |
|
|
+ cur = __atomic_load_n (&array[2 * i + 1], MEMMODEL_RELAXED); |
|
|
+ if (t < cur) |
|
|
+ { |
|
|
+ i = doacross->ncounts; |
|
|
+ break; |
|
|
+ } |
|
|
+ if (t > cur) |
|
|
+ break; |
|
|
+ } |
|
|
+ va_end (ap); |
|
|
+ if (i == doacross->ncounts) |
|
|
+ break; |
|
|
+ cpu_relax (); |
|
|
+ } |
|
|
+ while (1); |
|
|
+ } |
|
|
+ __sync_synchronize (); |
|
|
+} |
|
|
--- libgomp/loop.c.jj 2014-05-15 10:56:36.487505570 +0200 |
|
|
+++ libgomp/loop.c 2016-07-13 16:57:13.488423109 +0200 |
|
|
@@ -110,6 +110,11 @@ gomp_loop_static_start (long start, long |
|
|
return !gomp_iter_static_next (istart, iend); |
|
|
} |
|
|
|
|
|
+/* The current dynamic implementation is always monotonic. The |
|
|
+ entrypoints without nonmonotonic in them have to be always monotonic, |
|
|
+ but the nonmonotonic ones could be changed to use work-stealing for |
|
|
+ improved scalability. */ |
|
|
+ |
|
|
static bool |
|
|
gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size, |
|
|
long *istart, long *iend) |
|
|
@@ -135,6 +140,9 @@ gomp_loop_dynamic_start (long start, lon |
|
|
return ret; |
|
|
} |
|
|
|
|
|
+/* Similarly as for dynamic, though the question is how can the chunk sizes |
|
|
+ be decreased without a central locking or atomics. */ |
|
|
+ |
|
|
static bool |
|
|
gomp_loop_guided_start (long start, long end, long incr, long chunk_size, |
|
|
long *istart, long *iend) |
|
|
@@ -168,13 +176,16 @@ GOMP_loop_runtime_start (long start, lon |
|
|
switch (icv->run_sched_var) |
|
|
{ |
|
|
case GFS_STATIC: |
|
|
- return gomp_loop_static_start (start, end, incr, icv->run_sched_modifier, |
|
|
+ return gomp_loop_static_start (start, end, incr, |
|
|
+ icv->run_sched_chunk_size, |
|
|
istart, iend); |
|
|
case GFS_DYNAMIC: |
|
|
- return gomp_loop_dynamic_start (start, end, incr, icv->run_sched_modifier, |
|
|
+ return gomp_loop_dynamic_start (start, end, incr, |
|
|
+ icv->run_sched_chunk_size, |
|
|
istart, iend); |
|
|
case GFS_GUIDED: |
|
|
- return gomp_loop_guided_start (start, end, incr, icv->run_sched_modifier, |
|
|
+ return gomp_loop_guided_start (start, end, incr, |
|
|
+ icv->run_sched_chunk_size, |
|
|
istart, iend); |
|
|
case GFS_AUTO: |
|
|
/* For now map to schedule(static), later on we could play with feedback |
|
|
@@ -265,15 +276,15 @@ GOMP_loop_ordered_runtime_start (long st |
|
|
{ |
|
|
case GFS_STATIC: |
|
|
return gomp_loop_ordered_static_start (start, end, incr, |
|
|
- icv->run_sched_modifier, |
|
|
+ icv->run_sched_chunk_size, |
|
|
istart, iend); |
|
|
case GFS_DYNAMIC: |
|
|
return gomp_loop_ordered_dynamic_start (start, end, incr, |
|
|
- icv->run_sched_modifier, |
|
|
+ icv->run_sched_chunk_size, |
|
|
istart, iend); |
|
|
case GFS_GUIDED: |
|
|
return gomp_loop_ordered_guided_start (start, end, incr, |
|
|
- icv->run_sched_modifier, |
|
|
+ icv->run_sched_chunk_size, |
|
|
istart, iend); |
|
|
case GFS_AUTO: |
|
|
/* For now map to schedule(static), later on we could play with feedback |
|
|
@@ -285,6 +296,111 @@ GOMP_loop_ordered_runtime_start (long st |
|
|
} |
|
|
} |
|
|
|
|
|
+/* The *_doacross_*_start routines are similar. The only difference is that |
|
|
+ this work-share construct is initialized to expect an ORDERED(N) - DOACROSS |
|
|
+ section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1 |
|
|
+ and other COUNTS array elements tell the library number of iterations |
|
|
+ in the ordered inner loops. */ |
|
|
+ |
|
|
+static bool |
|
|
+gomp_loop_doacross_static_start (unsigned ncounts, long *counts, |
|
|
+ long chunk_size, long *istart, long *iend) |
|
|
+{ |
|
|
+ struct gomp_thread *thr = gomp_thread (); |
|
|
+ |
|
|
+ thr->ts.static_trip = 0; |
|
|
+ if (gomp_work_share_start (false)) |
|
|
+ { |
|
|
+ gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, |
|
|
+ GFS_STATIC, chunk_size); |
|
|
+ gomp_doacross_init (ncounts, counts, chunk_size); |
|
|
+ gomp_work_share_init_done (); |
|
|
+ } |
|
|
+ |
|
|
+ return !gomp_iter_static_next (istart, iend); |
|
|
+} |
|
|
+ |
|
|
+static bool |
|
|
+gomp_loop_doacross_dynamic_start (unsigned ncounts, long *counts, |
|
|
+ long chunk_size, long *istart, long *iend) |
|
|
+{ |
|
|
+ struct gomp_thread *thr = gomp_thread (); |
|
|
+ bool ret; |
|
|
+ |
|
|
+ if (gomp_work_share_start (false)) |
|
|
+ { |
|
|
+ gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, |
|
|
+ GFS_DYNAMIC, chunk_size); |
|
|
+ gomp_doacross_init (ncounts, counts, chunk_size); |
|
|
+ gomp_work_share_init_done (); |
|
|
+ } |
|
|
+ |
|
|
+#ifdef HAVE_SYNC_BUILTINS |
|
|
+ ret = gomp_iter_dynamic_next (istart, iend); |
|
|
+#else |
|
|
+ gomp_mutex_lock (&thr->ts.work_share->lock); |
|
|
+ ret = gomp_iter_dynamic_next_locked (istart, iend); |
|
|
+ gomp_mutex_unlock (&thr->ts.work_share->lock); |
|
|
+#endif |
|
|
+ |
|
|
+ return ret; |
|
|
+} |
|
|
+ |
|
|
+static bool |
|
|
+gomp_loop_doacross_guided_start (unsigned ncounts, long *counts, |
|
|
+ long chunk_size, long *istart, long *iend) |
|
|
+{ |
|
|
+ struct gomp_thread *thr = gomp_thread (); |
|
|
+ bool ret; |
|
|
+ |
|
|
+ if (gomp_work_share_start (false)) |
|
|
+ { |
|
|
+ gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, |
|
|
+ GFS_GUIDED, chunk_size); |
|
|
+ gomp_doacross_init (ncounts, counts, chunk_size); |
|
|
+ gomp_work_share_init_done (); |
|
|
+ } |
|
|
+ |
|
|
+#ifdef HAVE_SYNC_BUILTINS |
|
|
+ ret = gomp_iter_guided_next (istart, iend); |
|
|
+#else |
|
|
+ gomp_mutex_lock (&thr->ts.work_share->lock); |
|
|
+ ret = gomp_iter_guided_next_locked (istart, iend); |
|
|
+ gomp_mutex_unlock (&thr->ts.work_share->lock); |
|
|
+#endif |
|
|
+ |
|
|
+ return ret; |
|
|
+} |
|
|
+ |
|
|
+bool |
|
|
+GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts, |
|
|
+ long *istart, long *iend) |
|
|
+{ |
|
|
+ struct gomp_task_icv *icv = gomp_icv (false); |
|
|
+ switch (icv->run_sched_var) |
|
|
+ { |
|
|
+ case GFS_STATIC: |
|
|
+ return gomp_loop_doacross_static_start (ncounts, counts, |
|
|
+ icv->run_sched_chunk_size, |
|
|
+ istart, iend); |
|
|
+ case GFS_DYNAMIC: |
|
|
+ return gomp_loop_doacross_dynamic_start (ncounts, counts, |
|
|
+ icv->run_sched_chunk_size, |
|
|
+ istart, iend); |
|
|
+ case GFS_GUIDED: |
|
|
+ return gomp_loop_doacross_guided_start (ncounts, counts, |
|
|
+ icv->run_sched_chunk_size, |
|
|
+ istart, iend); |
|
|
+ case GFS_AUTO: |
|
|
+ /* For now map to schedule(static), later on we could play with feedback |
|
|
+ driven choice. */ |
|
|
+ return gomp_loop_doacross_static_start (ncounts, counts, |
|
|
+ 0, istart, iend); |
|
|
+ default: |
|
|
+ abort (); |
|
|
+ } |
|
|
+} |
|
|
+ |
|
|
/* The *_next routines are called when the thread completes processing of |
|
|
the iteration block currently assigned to it. If the work-share |
|
|
construct is bound directly to a parallel construct, then the iteration |
|
|
@@ -483,7 +599,7 @@ GOMP_parallel_loop_runtime_start (void ( |
|
|
{ |
|
|
struct gomp_task_icv *icv = gomp_icv (false); |
|
|
gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, |
|
|
- icv->run_sched_var, icv->run_sched_modifier, 0); |
|
|
+ icv->run_sched_var, icv->run_sched_chunk_size, 0); |
|
|
} |
|
|
|
|
|
ialias_redirect (GOMP_parallel_end) |
|
|
@@ -521,6 +637,37 @@ GOMP_parallel_loop_guided (void (*fn) (v |
|
|
GOMP_parallel_end (); |
|
|
} |
|
|
|
|
|
+#ifdef HAVE_ATTRIBUTE_ALIAS |
|
|
+extern __typeof(GOMP_parallel_loop_dynamic) GOMP_parallel_loop_nonmonotonic_dynamic |
|
|
+ __attribute__((alias ("GOMP_parallel_loop_dynamic"))); |
|
|
+extern __typeof(GOMP_parallel_loop_guided) GOMP_parallel_loop_nonmonotonic_guided |
|
|
+ __attribute__((alias ("GOMP_parallel_loop_guided"))); |
|
|
+#else |
|
|
+void |
|
|
+GOMP_parallel_loop_nonmonotonic_dynamic (void (*fn) (void *), void *data, |
|
|
+ unsigned num_threads, long start, |
|
|
+ long end, long incr, long chunk_size, |
|
|
+ unsigned flags) |
|
|
+{ |
|
|
+ gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, |
|
|
+ GFS_DYNAMIC, chunk_size, flags); |
|
|
+ fn (data); |
|
|
+ GOMP_parallel_end (); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+GOMP_parallel_loop_nonmonotonic_guided (void (*fn) (void *), void *data, |
|
|
+ unsigned num_threads, long start, |
|
|
+ long end, long incr, long chunk_size, |
|
|
+ unsigned flags) |
|
|
+{ |
|
|
+ gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, |
|
|
+ GFS_GUIDED, chunk_size, flags); |
|
|
+ fn (data); |
|
|
+ GOMP_parallel_end (); |
|
|
+} |
|
|
+#endif |
|
|
+ |
|
|
void |
|
|
GOMP_parallel_loop_runtime (void (*fn) (void *), void *data, |
|
|
unsigned num_threads, long start, long end, |
|
|
@@ -528,7 +675,7 @@ GOMP_parallel_loop_runtime (void (*fn) ( |
|
|
{ |
|
|
struct gomp_task_icv *icv = gomp_icv (false); |
|
|
gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, |
|
|
- icv->run_sched_var, icv->run_sched_modifier, |
|
|
+ icv->run_sched_var, icv->run_sched_chunk_size, |
|
|
flags); |
|
|
fn (data); |
|
|
GOMP_parallel_end (); |
|
|
@@ -569,6 +716,10 @@ extern __typeof(gomp_loop_dynamic_start) |
|
|
__attribute__((alias ("gomp_loop_dynamic_start"))); |
|
|
extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start |
|
|
__attribute__((alias ("gomp_loop_guided_start"))); |
|
|
+extern __typeof(gomp_loop_dynamic_start) GOMP_loop_nonmonotonic_dynamic_start |
|
|
+ __attribute__((alias ("gomp_loop_dynamic_start"))); |
|
|
+extern __typeof(gomp_loop_guided_start) GOMP_loop_nonmonotonic_guided_start |
|
|
+ __attribute__((alias ("gomp_loop_guided_start"))); |
|
|
|
|
|
extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start |
|
|
__attribute__((alias ("gomp_loop_ordered_static_start"))); |
|
|
@@ -577,12 +728,23 @@ extern __typeof(gomp_loop_ordered_dynami |
|
|
extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start |
|
|
__attribute__((alias ("gomp_loop_ordered_guided_start"))); |
|
|
|
|
|
+extern __typeof(gomp_loop_doacross_static_start) GOMP_loop_doacross_static_start |
|
|
+ __attribute__((alias ("gomp_loop_doacross_static_start"))); |
|
|
+extern __typeof(gomp_loop_doacross_dynamic_start) GOMP_loop_doacross_dynamic_start |
|
|
+ __attribute__((alias ("gomp_loop_doacross_dynamic_start"))); |
|
|
+extern __typeof(gomp_loop_doacross_guided_start) GOMP_loop_doacross_guided_start |
|
|
+ __attribute__((alias ("gomp_loop_doacross_guided_start"))); |
|
|
+ |
|
|
extern __typeof(gomp_loop_static_next) GOMP_loop_static_next |
|
|
__attribute__((alias ("gomp_loop_static_next"))); |
|
|
extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next |
|
|
__attribute__((alias ("gomp_loop_dynamic_next"))); |
|
|
extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next |
|
|
__attribute__((alias ("gomp_loop_guided_next"))); |
|
|
+extern __typeof(gomp_loop_dynamic_next) GOMP_loop_nonmonotonic_dynamic_next |
|
|
+ __attribute__((alias ("gomp_loop_dynamic_next"))); |
|
|
+extern __typeof(gomp_loop_guided_next) GOMP_loop_nonmonotonic_guided_next |
|
|
+ __attribute__((alias ("gomp_loop_guided_next"))); |
|
|
|
|
|
extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next |
|
|
__attribute__((alias ("gomp_loop_ordered_static_next"))); |
|
|
@@ -613,6 +775,21 @@ GOMP_loop_guided_start (long start, long |
|
|
} |
|
|
|
|
|
bool |
|
|
+GOMP_loop_nonmonotonic_dynamic_start (long start, long end, long incr, |
|
|
+ long chunk_size, long *istart, |
|
|
+ long *iend) |
|
|
+{ |
|
|
+ return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend); |
|
|
+} |
|
|
+ |
|
|
+bool |
|
|
+GOMP_loop_nonmonotonic_guided_start (long start, long end, long incr, |
|
|
+ long chunk_size, long *istart, long *iend) |
|
|
+{ |
|
|
+ return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend); |
|
|
+} |
|
|
+ |
|
|
+bool |
|
|
GOMP_loop_ordered_static_start (long start, long end, long incr, |
|
|
long chunk_size, long *istart, long *iend) |
|
|
{ |
|
|
@@ -637,6 +814,30 @@ GOMP_loop_ordered_guided_start (long sta |
|
|
} |
|
|
|
|
|
bool |
|
|
+GOMP_loop_doacross_static_start (unsigned ncounts, long *counts, |
|
|
+ long chunk_size, long *istart, long *iend) |
|
|
+{ |
|
|
+ return gomp_loop_doacross_static_start (ncounts, counts, chunk_size, |
|
|
+ istart, iend); |
|
|
+} |
|
|
+ |
|
|
+bool |
|
|
+GOMP_loop_doacross_dynamic_start (unsigned ncounts, long *counts, |
|
|
+ long chunk_size, long *istart, long *iend) |
|
|
+{ |
|
|
+ return gomp_loop_doacross_dynamic_start (ncounts, counts, chunk_size, |
|
|
+ istart, iend); |
|
|
+} |
|
|
+ |
|
|
+bool |
|
|
+GOMP_loop_doacross_guided_start (unsigned ncounts, long *counts, |
|
|
+ long chunk_size, long *istart, long *iend) |
|
|
+{ |
|
|
+ return gomp_loop_doacross_guided_start (ncounts, counts, chunk_size, |
|
|
+ istart, iend); |
|
|
+} |
|
|
+ |
|
|
+bool |
|
|
GOMP_loop_static_next (long *istart, long *iend) |
|
|
{ |
|
|
return gomp_loop_static_next (istart, iend); |
|
|
@@ -653,6 +854,18 @@ GOMP_loop_guided_next (long *istart, lon |
|
|
{ |
|
|
return gomp_loop_guided_next (istart, iend); |
|
|
} |
|
|
+ |
|
|
+bool |
|
|
+GOMP_loop_nonmonotonic_dynamic_next (long *istart, long *iend) |
|
|
+{ |
|
|
+ return gomp_loop_dynamic_next (istart, iend); |
|
|
+} |
|
|
+ |
|
|
+bool |
|
|
+GOMP_loop_nonmonotonic_guided_next (long *istart, long *iend) |
|
|
+{ |
|
|
+ return gomp_loop_guided_next (istart, iend); |
|
|
+} |
|
|
|
|
|
bool |
|
|
GOMP_loop_ordered_static_next (long *istart, long *iend) |
|
|
--- libgomp/error.c.jj 2013-01-21 16:00:31.834953566 +0100 |
|
|
+++ libgomp/error.c 2016-07-13 16:57:04.437535335 +0200 |
|
|
@@ -35,7 +35,26 @@ |
|
|
#include <stdlib.h> |
|
|
|
|
|
|
|
|
-static void |
|
|
+#undef gomp_vdebug |
|
|
+void |
|
|
+gomp_vdebug (int kind __attribute__ ((unused)), const char *msg, va_list list) |
|
|
+{ |
|
|
+ if (gomp_debug_var) |
|
|
+ vfprintf (stderr, msg, list); |
|
|
+} |
|
|
+ |
|
|
+#undef gomp_debug |
|
|
+void |
|
|
+gomp_debug (int kind, const char *msg, ...) |
|
|
+{ |
|
|
+ va_list list; |
|
|
+ |
|
|
+ va_start (list, msg); |
|
|
+ gomp_vdebug (kind, msg, list); |
|
|
+ va_end (list); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
gomp_verror (const char *fmt, va_list list) |
|
|
{ |
|
|
fputs ("\nlibgomp: ", stderr); |
|
|
@@ -54,13 +73,18 @@ gomp_error (const char *fmt, ...) |
|
|
} |
|
|
|
|
|
void |
|
|
+gomp_vfatal (const char *fmt, va_list list) |
|
|
+{ |
|
|
+ gomp_verror (fmt, list); |
|
|
+ exit (EXIT_FAILURE); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
gomp_fatal (const char *fmt, ...) |
|
|
{ |
|
|
va_list list; |
|
|
|
|
|
va_start (list, fmt); |
|
|
- gomp_verror (fmt, list); |
|
|
+ gomp_vfatal (fmt, list); |
|
|
va_end (list); |
|
|
- |
|
|
- exit (EXIT_FAILURE); |
|
|
} |
|
|
--- libgomp/Makefile.am.jj 2014-05-15 11:12:10.000000000 +0200 |
|
|
+++ libgomp/Makefile.am 2016-07-14 16:10:51.968202878 +0200 |
|
|
@@ -60,7 +60,13 @@ libgomp_la_LINK = $(LINK) $(libgomp_la_L |
|
|
libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \ |
|
|
iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \ |
|
|
task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \ |
|
|
- time.c fortran.c affinity.c target.c |
|
|
+ time.c fortran.c affinity.c target.c splay-tree.c libgomp-plugin.c \ |
|
|
+ oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c oacc-async.c \ |
|
|
+ oacc-plugin.c oacc-cuda.c priority_queue.c |
|
|
+ |
|
|
+if USE_FORTRAN |
|
|
+libgomp_la_SOURCES += openacc.f90 |
|
|
+endif |
|
|
|
|
|
nodist_noinst_HEADERS = libgomp_f.h |
|
|
nodist_libsubinclude_HEADERS = omp.h |
|
|
--- libgomp/Makefile.in.jj 2014-05-15 11:12:10.000000000 +0200 |
|
|
+++ libgomp/Makefile.in 2016-07-14 16:11:10.981954087 +0200 |
|
|
@@ -36,6 +36,7 @@ POST_UNINSTALL = : |
|
|
build_triplet = @build@ |
|
|
host_triplet = @host@ |
|
|
target_triplet = @target@ |
|
|
+@USE_FORTRAN_TRUE@am__append_1 = openacc.f90 |
|
|
subdir = . |
|
|
DIST_COMMON = ChangeLog $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ |
|
|
$(top_srcdir)/configure $(am__configure_deps) \ |
|
|
@@ -92,11 +93,15 @@ am__installdirs = "$(DESTDIR)$(toolexecl |
|
|
"$(DESTDIR)$(toolexeclibdir)" |
|
|
LTLIBRARIES = $(toolexeclib_LTLIBRARIES) |
|
|
libgomp_la_LIBADD = |
|
|
+@USE_FORTRAN_TRUE@am__objects_1 = openacc.lo |
|
|
am_libgomp_la_OBJECTS = alloc.lo barrier.lo critical.lo env.lo \ |
|
|
error.lo iter.lo iter_ull.lo loop.lo loop_ull.lo ordered.lo \ |
|
|
parallel.lo sections.lo single.lo task.lo team.lo work.lo \ |
|
|
lock.lo mutex.lo proc.lo sem.lo bar.lo ptrlock.lo time.lo \ |
|
|
- fortran.lo affinity.lo target.lo |
|
|
+ fortran.lo affinity.lo target.lo splay-tree.lo \ |
|
|
+ libgomp-plugin.lo oacc-parallel.lo oacc-host.lo oacc-init.lo \ |
|
|
+ oacc-mem.lo oacc-async.lo oacc-plugin.lo oacc-cuda.lo \ |
|
|
+ priority_queue.lo $(am__objects_1) |
|
|
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) |
|
|
DEFAULT_INCLUDES = -I.@am__isrc@ |
|
|
depcomp = $(SHELL) $(top_srcdir)/../depcomp |
|
|
@@ -108,6 +113,13 @@ LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIB |
|
|
--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ |
|
|
$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) |
|
|
CCLD = $(CC) |
|
|
+FCCOMPILE = $(FC) $(AM_FCFLAGS) $(FCFLAGS) |
|
|
+LTFCCOMPILE = $(LIBTOOL) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ |
|
|
+ --mode=compile $(FC) $(AM_FCFLAGS) $(FCFLAGS) |
|
|
+FCLD = $(FC) |
|
|
+FCLINK = $(LIBTOOL) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ |
|
|
+ --mode=link $(FCLD) $(AM_FCFLAGS) $(FCFLAGS) $(AM_LDFLAGS) \ |
|
|
+ $(LDFLAGS) -o $@ |
|
|
SOURCES = $(libgomp_la_SOURCES) |
|
|
MULTISRCTOP = |
|
|
MULTIBUILDTOP = |
|
|
@@ -315,10 +327,12 @@ libgomp_la_LDFLAGS = $(libgomp_version_i |
|
|
libgomp_la_DEPENDENCIES = $(libgomp_version_dep) |
|
|
libgomp_la_LINK = $(LINK) $(libgomp_la_LDFLAGS) |
|
|
libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \ |
|
|
- iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \ |
|
|
- task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \ |
|
|
- time.c fortran.c affinity.c target.c |
|
|
- |
|
|
+ iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c \ |
|
|
+ single.c task.c team.c work.c lock.c mutex.c proc.c sem.c \ |
|
|
+ bar.c ptrlock.c time.c fortran.c affinity.c target.c \ |
|
|
+ splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \ |
|
|
+ oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \ |
|
|
+ priority_queue.c $(am__append_1) |
|
|
nodist_noinst_HEADERS = libgomp_f.h |
|
|
nodist_libsubinclude_HEADERS = omp.h |
|
|
@USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod |
|
|
@@ -351,7 +365,7 @@ all: config.h |
|
|
$(MAKE) $(AM_MAKEFLAGS) all-recursive |
|
|
|
|
|
.SUFFIXES: |
|
|
-.SUFFIXES: .c .dvi .lo .o .obj .ps |
|
|
+.SUFFIXES: .c .dvi .f90 .lo .o .obj .ps |
|
|
am--refresh: |
|
|
@: |
|
|
$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) |
|
|
@@ -463,17 +477,27 @@ distclean-compile: |
|
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fortran.Plo@am__quote@ |
|
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter.Plo@am__quote@ |
|
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter_ull.Plo@am__quote@ |
|
|
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp-plugin.Plo@am__quote@ |
|
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lock.Plo@am__quote@ |
|
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop.Plo@am__quote@ |
|
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop_ull.Plo@am__quote@ |
|
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mutex.Plo@am__quote@ |
|
|
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-async.Plo@am__quote@ |
|
|
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-cuda.Plo@am__quote@ |
|
|
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-host.Plo@am__quote@ |
|
|
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-init.Plo@am__quote@ |
|
|
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-mem.Plo@am__quote@ |
|
|
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@ |
|
|
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@ |
|
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@ |
|
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@ |
|
|
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/priority_queue.Plo@am__quote@ |
|
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc.Plo@am__quote@ |
|
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ptrlock.Plo@am__quote@ |
|
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@ |
|
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@ |
|
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@ |
|
|
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@ |
|
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@ |
|
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@ |
|
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/team.Plo@am__quote@ |
|
|
@@ -501,6 +525,15 @@ distclean-compile: |
|
|
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
|
|
@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< |
|
|
|
|
|
+.f90.o: |
|
|
+ $(FCCOMPILE) -c -o $@ $< |
|
|
+ |
|
|
+.f90.obj: |
|
|
+ $(FCCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` |
|
|
+ |
|
|
+.f90.lo: |
|
|
+ $(LTFCCOMPILE) -c -o $@ $< |
|
|
+ |
|
|
mostlyclean-libtool: |
|
|
-rm -f *.lo |
|
|
|
|
|
--- libgomp/task.c.jj 2014-08-06 16:25:16.575091658 +0200 |
|
|
+++ libgomp/task.c 2016-07-13 17:47:58.722758497 +0200 |
|
|
@@ -28,6 +28,7 @@ |
|
|
#include "libgomp.h" |
|
|
#include <stdlib.h> |
|
|
#include <string.h> |
|
|
+#include "gomp-constants.h" |
|
|
|
|
|
typedef struct gomp_task_depend_entry *hash_entry_type; |
|
|
|
|
|
@@ -63,6 +64,14 @@ void |
|
|
gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task, |
|
|
struct gomp_task_icv *prev_icv) |
|
|
{ |
|
|
+ /* It would seem that using memset here would be a win, but it turns |
|
|
+ out that partially filling gomp_task allows us to keep the |
|
|
+ overhead of task creation low. In the nqueens-1.c test, for a |
|
|
+ sufficiently large N, we drop the overhead from 5-6% to 1%. |
|
|
+ |
|
|
+ Note, the nqueens-1.c test in serial mode is a good test to |
|
|
+ benchmark the overhead of creating tasks as there are millions of |
|
|
+ tiny tasks created that all run undeferred. */ |
|
|
task->parent = parent_task; |
|
|
task->icv = *prev_icv; |
|
|
task->kind = GOMP_TASK_IMPLICIT; |
|
|
@@ -71,7 +80,7 @@ gomp_init_task (struct gomp_task *task, |
|
|
task->final_task = false; |
|
|
task->copy_ctors_done = false; |
|
|
task->parent_depends_on = false; |
|
|
- task->children = NULL; |
|
|
+ priority_queue_init (&task->children_queue); |
|
|
task->taskgroup = NULL; |
|
|
task->dependers = NULL; |
|
|
task->depend_hash = NULL; |
|
|
@@ -90,30 +99,194 @@ gomp_end_task (void) |
|
|
thr->task = task->parent; |
|
|
} |
|
|
|
|
|
+/* Clear the parent field of every task in LIST. */ |
|
|
+ |
|
|
static inline void |
|
|
-gomp_clear_parent (struct gomp_task *children) |
|
|
+gomp_clear_parent_in_list (struct priority_list *list) |
|
|
{ |
|
|
- struct gomp_task *task = children; |
|
|
- |
|
|
- if (task) |
|
|
+ struct priority_node *p = list->tasks; |
|
|
+ if (p) |
|
|
do |
|
|
{ |
|
|
- task->parent = NULL; |
|
|
- task = task->next_child; |
|
|
+ priority_node_to_task (PQ_CHILDREN, p)->parent = NULL; |
|
|
+ p = p->next; |
|
|
} |
|
|
- while (task != children); |
|
|
+ while (p != list->tasks); |
|
|
+} |
|
|
+ |
|
|
+/* Splay tree version of gomp_clear_parent_in_list. |
|
|
+ |
|
|
+ Clear the parent field of every task in NODE within SP, and free |
|
|
+ the node when done. */ |
|
|
+ |
|
|
+static void |
|
|
+gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node) |
|
|
+{ |
|
|
+ if (!node) |
|
|
+ return; |
|
|
+ prio_splay_tree_node left = node->left, right = node->right; |
|
|
+ gomp_clear_parent_in_list (&node->key.l); |
|
|
+#if _LIBGOMP_CHECKING_ |
|
|
+ memset (node, 0xaf, sizeof (*node)); |
|
|
+#endif |
|
|
+ /* No need to remove the node from the tree. We're nuking |
|
|
+ everything, so just free the nodes and our caller can clear the |
|
|
+ entire splay tree. */ |
|
|
+ free (node); |
|
|
+ gomp_clear_parent_in_tree (sp, left); |
|
|
+ gomp_clear_parent_in_tree (sp, right); |
|
|
+} |
|
|
+ |
|
|
+/* Clear the parent field of every task in Q and remove every task |
|
|
+ from Q. */ |
|
|
+ |
|
|
+static inline void |
|
|
+gomp_clear_parent (struct priority_queue *q) |
|
|
+{ |
|
|
+ if (priority_queue_multi_p (q)) |
|
|
+ { |
|
|
+ gomp_clear_parent_in_tree (&q->t, q->t.root); |
|
|
+ /* All the nodes have been cleared in gomp_clear_parent_in_tree. |
|
|
+ No need to remove anything. We can just nuke everything. */ |
|
|
+ q->t.root = NULL; |
|
|
+ } |
|
|
+ else |
|
|
+ gomp_clear_parent_in_list (&q->l); |
|
|
} |
|
|
|
|
|
-static void gomp_task_maybe_wait_for_dependencies (void **depend); |
|
|
+/* Helper function for GOMP_task and gomp_create_target_task. |
|
|
+ |
|
|
+ For a TASK with in/out dependencies, fill in the various dependency |
|
|
+ queues. PARENT is the parent of said task. DEPEND is as in |
|
|
+ GOMP_task. */ |
|
|
+ |
|
|
+static void |
|
|
+gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent, |
|
|
+ void **depend) |
|
|
+{ |
|
|
+ size_t ndepend = (uintptr_t) depend[0]; |
|
|
+ size_t nout = (uintptr_t) depend[1]; |
|
|
+ size_t i; |
|
|
+ hash_entry_type ent; |
|
|
+ |
|
|
+ task->depend_count = ndepend; |
|
|
+ task->num_dependees = 0; |
|
|
+ if (parent->depend_hash == NULL) |
|
|
+ parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12); |
|
|
+ for (i = 0; i < ndepend; i++) |
|
|
+ { |
|
|
+ task->depend[i].addr = depend[2 + i]; |
|
|
+ task->depend[i].next = NULL; |
|
|
+ task->depend[i].prev = NULL; |
|
|
+ task->depend[i].task = task; |
|
|
+ task->depend[i].is_in = i >= nout; |
|
|
+ task->depend[i].redundant = false; |
|
|
+ task->depend[i].redundant_out = false; |
|
|
+ |
|
|
+ hash_entry_type *slot = htab_find_slot (&parent->depend_hash, |
|
|
+ &task->depend[i], INSERT); |
|
|
+ hash_entry_type out = NULL, last = NULL; |
|
|
+ if (*slot) |
|
|
+ { |
|
|
+ /* If multiple depends on the same task are the same, all but the |
|
|
+ first one are redundant. As inout/out come first, if any of them |
|
|
+ is inout/out, it will win, which is the right semantics. */ |
|
|
+ if ((*slot)->task == task) |
|
|
+ { |
|
|
+ task->depend[i].redundant = true; |
|
|
+ continue; |
|
|
+ } |
|
|
+ for (ent = *slot; ent; ent = ent->next) |
|
|
+ { |
|
|
+ if (ent->redundant_out) |
|
|
+ break; |
|
|
+ |
|
|
+ last = ent; |
|
|
+ |
|
|
+ /* depend(in:...) doesn't depend on earlier depend(in:...). */ |
|
|
+ if (i >= nout && ent->is_in) |
|
|
+ continue; |
|
|
+ |
|
|
+ if (!ent->is_in) |
|
|
+ out = ent; |
|
|
+ |
|
|
+ struct gomp_task *tsk = ent->task; |
|
|
+ if (tsk->dependers == NULL) |
|
|
+ { |
|
|
+ tsk->dependers |
|
|
+ = gomp_malloc (sizeof (struct gomp_dependers_vec) |
|
|
+ + 6 * sizeof (struct gomp_task *)); |
|
|
+ tsk->dependers->n_elem = 1; |
|
|
+ tsk->dependers->allocated = 6; |
|
|
+ tsk->dependers->elem[0] = task; |
|
|
+ task->num_dependees++; |
|
|
+ continue; |
|
|
+ } |
|
|
+ /* We already have some other dependency on tsk from earlier |
|
|
+ depend clause. */ |
|
|
+ else if (tsk->dependers->n_elem |
|
|
+ && (tsk->dependers->elem[tsk->dependers->n_elem - 1] |
|
|
+ == task)) |
|
|
+ continue; |
|
|
+ else if (tsk->dependers->n_elem == tsk->dependers->allocated) |
|
|
+ { |
|
|
+ tsk->dependers->allocated |
|
|
+ = tsk->dependers->allocated * 2 + 2; |
|
|
+ tsk->dependers |
|
|
+ = gomp_realloc (tsk->dependers, |
|
|
+ sizeof (struct gomp_dependers_vec) |
|
|
+ + (tsk->dependers->allocated |
|
|
+ * sizeof (struct gomp_task *))); |
|
|
+ } |
|
|
+ tsk->dependers->elem[tsk->dependers->n_elem++] = task; |
|
|
+ task->num_dependees++; |
|
|
+ } |
|
|
+ task->depend[i].next = *slot; |
|
|
+ (*slot)->prev = &task->depend[i]; |
|
|
+ } |
|
|
+ *slot = &task->depend[i]; |
|
|
+ |
|
|
+ /* There is no need to store more than one depend({,in}out:) task per |
|
|
+ address in the hash table chain for the purpose of creation of |
|
|
+ deferred tasks, because each out depends on all earlier outs, thus it |
|
|
+ is enough to record just the last depend({,in}out:). For depend(in:), |
|
|
+ we need to keep all of the previous ones not terminated yet, because |
|
|
+ a later depend({,in}out:) might need to depend on all of them. So, if |
|
|
+ the new task's clause is depend({,in}out:), we know there is at most |
|
|
+ one other depend({,in}out:) clause in the list (out). For |
|
|
+ non-deferred tasks we want to see all outs, so they are moved to the |
|
|
+ end of the chain, after first redundant_out entry all following |
|
|
+ entries should be redundant_out. */ |
|
|
+ if (!task->depend[i].is_in && out) |
|
|
+ { |
|
|
+ if (out != last) |
|
|
+ { |
|
|
+ out->next->prev = out->prev; |
|
|
+ out->prev->next = out->next; |
|
|
+ out->next = last->next; |
|
|
+ out->prev = last; |
|
|
+ last->next = out; |
|
|
+ if (out->next) |
|
|
+ out->next->prev = out; |
|
|
+ } |
|
|
+ out->redundant_out = true; |
|
|
+ } |
|
|
+ } |
|
|
+} |
|
|
|
|
|
/* Called when encountering an explicit task directive. If IF_CLAUSE is |
|
|
false, then we must not delay in executing the task. If UNTIED is true, |
|
|
- then the task may be executed by any member of the team. */ |
|
|
+ then the task may be executed by any member of the team. |
|
|
+ |
|
|
+ DEPEND is an array containing: |
|
|
+ depend[0]: number of depend elements. |
|
|
+ depend[1]: number of depend elements of type "out". |
|
|
+ depend[2..N+1]: address of [1..N]th depend element. */ |
|
|
|
|
|
void |
|
|
GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), |
|
|
long arg_size, long arg_align, bool if_clause, unsigned flags, |
|
|
- void **depend) |
|
|
+ void **depend, int priority) |
|
|
{ |
|
|
struct gomp_thread *thr = gomp_thread (); |
|
|
struct gomp_team *team = thr->ts.team; |
|
|
@@ -125,8 +298,7 @@ GOMP_task (void (*fn) (void *), void *da |
|
|
might be running on different thread than FN. */ |
|
|
if (cpyfn) |
|
|
if_clause = false; |
|
|
- if (flags & 1) |
|
|
- flags &= ~1; |
|
|
+ flags &= ~GOMP_TASK_FLAG_UNTIED; |
|
|
#endif |
|
|
|
|
|
/* If parallel or taskgroup has been cancelled, don't start new tasks. */ |
|
|
@@ -135,6 +307,11 @@ GOMP_task (void (*fn) (void *), void *da |
|
|
|| (thr->task->taskgroup && thr->task->taskgroup->cancelled))) |
|
|
return; |
|
|
|
|
|
+ if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0) |
|
|
+ priority = 0; |
|
|
+ else if (priority > gomp_max_task_priority_var) |
|
|
+ priority = gomp_max_task_priority_var; |
|
|
+ |
|
|
if (!if_clause || team == NULL |
|
|
|| (thr->task && thr->task->final_task) |
|
|
|| team->task_count > 64 * team->nthreads) |
|
|
@@ -147,12 +324,15 @@ GOMP_task (void (*fn) (void *), void *da |
|
|
depend clauses for non-deferred tasks other than this, because |
|
|
the parent task is suspended until the child task finishes and thus |
|
|
it can't start further child tasks. */ |
|
|
- if ((flags & 8) && thr->task && thr->task->depend_hash) |
|
|
+ if ((flags & GOMP_TASK_FLAG_DEPEND) |
|
|
+ && thr->task && thr->task->depend_hash) |
|
|
gomp_task_maybe_wait_for_dependencies (depend); |
|
|
|
|
|
gomp_init_task (&task, thr->task, gomp_icv (false)); |
|
|
- task.kind = GOMP_TASK_IFFALSE; |
|
|
- task.final_task = (thr->task && thr->task->final_task) || (flags & 2); |
|
|
+ task.kind = GOMP_TASK_UNDEFERRED; |
|
|
+ task.final_task = (thr->task && thr->task->final_task) |
|
|
+ || (flags & GOMP_TASK_FLAG_FINAL); |
|
|
+ task.priority = priority; |
|
|
if (thr->task) |
|
|
{ |
|
|
task.in_tied_task = thr->task->in_tied_task; |
|
|
@@ -178,10 +358,10 @@ GOMP_task (void (*fn) (void *), void *da |
|
|
child thread, but seeing a stale non-NULL value is not a |
|
|
problem. Once past the task_lock acquisition, this thread |
|
|
will see the real value of task.children. */ |
|
|
- if (task.children != NULL) |
|
|
+ if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED)) |
|
|
{ |
|
|
gomp_mutex_lock (&team->task_lock); |
|
|
- gomp_clear_parent (task.children); |
|
|
+ gomp_clear_parent (&task.children_queue); |
|
|
gomp_mutex_unlock (&team->task_lock); |
|
|
} |
|
|
gomp_end_task (); |
|
|
@@ -195,7 +375,7 @@ GOMP_task (void (*fn) (void *), void *da |
|
|
bool do_wake; |
|
|
size_t depend_size = 0; |
|
|
|
|
|
- if (flags & 8) |
|
|
+ if (flags & GOMP_TASK_FLAG_DEPEND) |
|
|
depend_size = ((uintptr_t) depend[0] |
|
|
* sizeof (struct gomp_task_depend_entry)); |
|
|
task = gomp_malloc (sizeof (*task) + depend_size |
|
|
@@ -203,7 +383,8 @@ GOMP_task (void (*fn) (void *), void *da |
|
|
arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1) |
|
|
& ~(uintptr_t) (arg_align - 1)); |
|
|
gomp_init_task (task, parent, gomp_icv (false)); |
|
|
- task->kind = GOMP_TASK_IFFALSE; |
|
|
+ task->priority = priority; |
|
|
+ task->kind = GOMP_TASK_UNDEFERRED; |
|
|
task->in_tied_task = parent->in_tied_task; |
|
|
task->taskgroup = taskgroup; |
|
|
thr->task = task; |
|
|
@@ -218,7 +399,7 @@ GOMP_task (void (*fn) (void *), void *da |
|
|
task->kind = GOMP_TASK_WAITING; |
|
|
task->fn = fn; |
|
|
task->fn_data = arg; |
|
|
- task->final_task = (flags & 2) >> 1; |
|
|
+ task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1; |
|
|
gomp_mutex_lock (&team->task_lock); |
|
|
/* If parallel or taskgroup has been cancelled, don't start new |
|
|
tasks. */ |
|
|
@@ -235,171 +416,39 @@ GOMP_task (void (*fn) (void *), void *da |
|
|
taskgroup->num_children++; |
|
|
if (depend_size) |
|
|
{ |
|
|
- size_t ndepend = (uintptr_t) depend[0]; |
|
|
- size_t nout = (uintptr_t) depend[1]; |
|
|
- size_t i; |
|
|
- hash_entry_type ent; |
|
|
- |
|
|
- task->depend_count = ndepend; |
|
|
- task->num_dependees = 0; |
|
|
- if (parent->depend_hash == NULL) |
|
|
- parent->depend_hash |
|
|
- = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12); |
|
|
- for (i = 0; i < ndepend; i++) |
|
|
- { |
|
|
- task->depend[i].addr = depend[2 + i]; |
|
|
- task->depend[i].next = NULL; |
|
|
- task->depend[i].prev = NULL; |
|
|
- task->depend[i].task = task; |
|
|
- task->depend[i].is_in = i >= nout; |
|
|
- task->depend[i].redundant = false; |
|
|
- task->depend[i].redundant_out = false; |
|
|
- |
|
|
- hash_entry_type *slot |
|
|
- = htab_find_slot (&parent->depend_hash, &task->depend[i], |
|
|
- INSERT); |
|
|
- hash_entry_type out = NULL, last = NULL; |
|
|
- if (*slot) |
|
|
- { |
|
|
- /* If multiple depends on the same task are the |
|
|
- same, all but the first one are redundant. |
|
|
- As inout/out come first, if any of them is |
|
|
- inout/out, it will win, which is the right |
|
|
- semantics. */ |
|
|
- if ((*slot)->task == task) |
|
|
- { |
|
|
- task->depend[i].redundant = true; |
|
|
- continue; |
|
|
- } |
|
|
- for (ent = *slot; ent; ent = ent->next) |
|
|
- { |
|
|
- if (ent->redundant_out) |
|
|
- break; |
|
|
- |
|
|
- last = ent; |
|
|
- |
|
|
- /* depend(in:...) doesn't depend on earlier |
|
|
- depend(in:...). */ |
|
|
- if (i >= nout && ent->is_in) |
|
|
- continue; |
|
|
- |
|
|
- if (!ent->is_in) |
|
|
- out = ent; |
|
|
- |
|
|
- struct gomp_task *tsk = ent->task; |
|
|
- if (tsk->dependers == NULL) |
|
|
- { |
|
|
- tsk->dependers |
|
|
- = gomp_malloc (sizeof (struct gomp_dependers_vec) |
|
|
- + 6 * sizeof (struct gomp_task *)); |
|
|
- tsk->dependers->n_elem = 1; |
|
|
- tsk->dependers->allocated = 6; |
|
|
- tsk->dependers->elem[0] = task; |
|
|
- task->num_dependees++; |
|
|
- continue; |
|
|
- } |
|
|
- /* We already have some other dependency on tsk |
|
|
- from earlier depend clause. */ |
|
|
- else if (tsk->dependers->n_elem |
|
|
- && (tsk->dependers->elem[tsk->dependers->n_elem |
|
|
- - 1] |
|
|
- == task)) |
|
|
- continue; |
|
|
- else if (tsk->dependers->n_elem |
|
|
- == tsk->dependers->allocated) |
|
|
- { |
|
|
- tsk->dependers->allocated |
|
|
- = tsk->dependers->allocated * 2 + 2; |
|
|
- tsk->dependers |
|
|
- = gomp_realloc (tsk->dependers, |
|
|
- sizeof (struct gomp_dependers_vec) |
|
|
- + (tsk->dependers->allocated |
|
|
- * sizeof (struct gomp_task *))); |
|
|
- } |
|
|
- tsk->dependers->elem[tsk->dependers->n_elem++] = task; |
|
|
- task->num_dependees++; |
|
|
- } |
|
|
- task->depend[i].next = *slot; |
|
|
- (*slot)->prev = &task->depend[i]; |
|
|
- } |
|
|
- *slot = &task->depend[i]; |
|
|
- |
|
|
- /* There is no need to store more than one depend({,in}out:) |
|
|
- task per address in the hash table chain for the purpose |
|
|
- of creation of deferred tasks, because each out |
|
|
- depends on all earlier outs, thus it is enough to record |
|
|
- just the last depend({,in}out:). For depend(in:), we need |
|
|
- to keep all of the previous ones not terminated yet, because |
|
|
- a later depend({,in}out:) might need to depend on all of |
|
|
- them. So, if the new task's clause is depend({,in}out:), |
|
|
- we know there is at most one other depend({,in}out:) clause |
|
|
- in the list (out). For non-deferred tasks we want to see |
|
|
- all outs, so they are moved to the end of the chain, |
|
|
- after first redundant_out entry all following entries |
|
|
- should be redundant_out. */ |
|
|
- if (!task->depend[i].is_in && out) |
|
|
- { |
|
|
- if (out != last) |
|
|
- { |
|
|
- out->next->prev = out->prev; |
|
|
- out->prev->next = out->next; |
|
|
- out->next = last->next; |
|
|
- out->prev = last; |
|
|
- last->next = out; |
|
|
- if (out->next) |
|
|
- out->next->prev = out; |
|
|
- } |
|
|
- out->redundant_out = true; |
|
|
- } |
|
|
- } |
|
|
+ gomp_task_handle_depend (task, parent, depend); |
|
|
if (task->num_dependees) |
|
|
{ |
|
|
+ /* Tasks that depend on other tasks are not put into the |
|
|
+ various waiting queues, so we are done for now. Said |
|
|
+ tasks are instead put into the queues via |
|
|
+ gomp_task_run_post_handle_dependers() after their |
|
|
+ dependencies have been satisfied. After which, they |
|
|
+ can be picked up by the various scheduling |
|
|
+ points. */ |
|
|
gomp_mutex_unlock (&team->task_lock); |
|
|
return; |
|
|
} |
|
|
} |
|
|
- if (parent->children) |
|
|
- { |
|
|
- task->next_child = parent->children; |
|
|
- task->prev_child = parent->children->prev_child; |
|
|
- task->next_child->prev_child = task; |
|
|
- task->prev_child->next_child = task; |
|
|
- } |
|
|
- else |
|
|
- { |
|
|
- task->next_child = task; |
|
|
- task->prev_child = task; |
|
|
- } |
|
|
- parent->children = task; |
|
|
+ |
|
|
+ priority_queue_insert (PQ_CHILDREN, &parent->children_queue, |
|
|
+ task, priority, |
|
|
+ PRIORITY_INSERT_BEGIN, |
|
|
+ /*adjust_parent_depends_on=*/false, |
|
|
+ task->parent_depends_on); |
|
|
if (taskgroup) |
|
|
- { |
|
|
- if (taskgroup->children) |
|
|
- { |
|
|
- task->next_taskgroup = taskgroup->children; |
|
|
- task->prev_taskgroup = taskgroup->children->prev_taskgroup; |
|
|
- task->next_taskgroup->prev_taskgroup = task; |
|
|
- task->prev_taskgroup->next_taskgroup = task; |
|
|
- } |
|
|
- else |
|
|
- { |
|
|
- task->next_taskgroup = task; |
|
|
- task->prev_taskgroup = task; |
|
|
- } |
|
|
- taskgroup->children = task; |
|
|
- } |
|
|
- if (team->task_queue) |
|
|
- { |
|
|
- task->next_queue = team->task_queue; |
|
|
- task->prev_queue = team->task_queue->prev_queue; |
|
|
- task->next_queue->prev_queue = task; |
|
|
- task->prev_queue->next_queue = task; |
|
|
- } |
|
|
- else |
|
|
- { |
|
|
- task->next_queue = task; |
|
|
- task->prev_queue = task; |
|
|
- team->task_queue = task; |
|
|
- } |
|
|
+ priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, |
|
|
+ task, priority, |
|
|
+ PRIORITY_INSERT_BEGIN, |
|
|
+ /*adjust_parent_depends_on=*/false, |
|
|
+ task->parent_depends_on); |
|
|
+ |
|
|
+ priority_queue_insert (PQ_TEAM, &team->task_queue, |
|
|
+ task, priority, |
|
|
+ PRIORITY_INSERT_END, |
|
|
+ /*adjust_parent_depends_on=*/false, |
|
|
+ task->parent_depends_on); |
|
|
+ |
|
|
++team->task_count; |
|
|
++team->task_queued_count; |
|
|
gomp_team_barrier_set_task_pending (&team->barrier); |
|
|
@@ -411,36 +460,529 @@ GOMP_task (void (*fn) (void *), void *da |
|
|
} |
|
|
} |
|
|
|
|
|
-static inline bool |
|
|
-gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent, |
|
|
- struct gomp_taskgroup *taskgroup, struct gomp_team *team) |
|
|
+ialias (GOMP_taskgroup_start) |
|
|
+ialias (GOMP_taskgroup_end) |
|
|
+ |
|
|
+#define TYPE long |
|
|
+#define UTYPE unsigned long |
|
|
+#define TYPE_is_long 1 |
|
|
+#include "taskloop.c" |
|
|
+#undef TYPE |
|
|
+#undef UTYPE |
|
|
+#undef TYPE_is_long |
|
|
+ |
|
|
+#define TYPE unsigned long long |
|
|
+#define UTYPE TYPE |
|
|
+#define GOMP_taskloop GOMP_taskloop_ull |
|
|
+#include "taskloop.c" |
|
|
+#undef TYPE |
|
|
+#undef UTYPE |
|
|
+#undef GOMP_taskloop |
|
|
+ |
|
|
+static void inline |
|
|
+priority_queue_move_task_first (enum priority_queue_type type, |
|
|
+ struct priority_queue *head, |
|
|
+ struct gomp_task *task) |
|
|
{ |
|
|
+#if _LIBGOMP_CHECKING_ |
|
|
+ if (!priority_queue_task_in_queue_p (type, head, task)) |
|
|
+ gomp_fatal ("Attempt to move first missing task %p", task); |
|
|
+#endif |
|
|
+ struct priority_list *list; |
|
|
+ if (priority_queue_multi_p (head)) |
|
|
+ { |
|
|
+ list = priority_queue_lookup_priority (head, task->priority); |
|
|
+#if _LIBGOMP_CHECKING_ |
|
|
+ if (!list) |
|
|
+ gomp_fatal ("Unable to find priority %d", task->priority); |
|
|
+#endif |
|
|
+ } |
|
|
+ else |
|
|
+ list = &head->l; |
|
|
+ priority_list_remove (list, task_to_priority_node (type, task), 0); |
|
|
+ priority_list_insert (type, list, task, task->priority, |
|
|
+ PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN, |
|
|
+ task->parent_depends_on); |
|
|
+} |
|
|
+ |
|
|
+/* Actual body of GOMP_PLUGIN_target_task_completion that is executed |
|
|
+ with team->task_lock held, or is executed in the thread that called |
|
|
+ gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been |
|
|
+ run before it acquires team->task_lock. */ |
|
|
+ |
|
|
+static void |
|
|
+gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task) |
|
|
+{ |
|
|
+ struct gomp_task *parent = task->parent; |
|
|
if (parent) |
|
|
+ priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue, |
|
|
+ task); |
|
|
+ |
|
|
+ struct gomp_taskgroup *taskgroup = task->taskgroup; |
|
|
+ if (taskgroup) |
|
|
+ priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue, |
|
|
+ task); |
|
|
+ |
|
|
+ priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority, |
|
|
+ PRIORITY_INSERT_BEGIN, false, |
|
|
+ task->parent_depends_on); |
|
|
+ task->kind = GOMP_TASK_WAITING; |
|
|
+ if (parent && parent->taskwait) |
|
|
{ |
|
|
- if (parent->children == child_task) |
|
|
- parent->children = child_task->next_child; |
|
|
- if (__builtin_expect (child_task->parent_depends_on, 0) |
|
|
- && parent->taskwait->last_parent_depends_on == child_task) |
|
|
- { |
|
|
- if (child_task->prev_child->kind == GOMP_TASK_WAITING |
|
|
- && child_task->prev_child->parent_depends_on) |
|
|
- parent->taskwait->last_parent_depends_on = child_task->prev_child; |
|
|
- else |
|
|
- parent->taskwait->last_parent_depends_on = NULL; |
|
|
+ if (parent->taskwait->in_taskwait) |
|
|
+ { |
|
|
+ /* One more task has had its dependencies met. |
|
|
+ Inform any waiters. */ |
|
|
+ parent->taskwait->in_taskwait = false; |
|
|
+ gomp_sem_post (&parent->taskwait->taskwait_sem); |
|
|
} |
|
|
+ else if (parent->taskwait->in_depend_wait) |
|
|
+ { |
|
|
+ /* One more task has had its dependencies met. |
|
|
+ Inform any waiters. */ |
|
|
+ parent->taskwait->in_depend_wait = false; |
|
|
+ gomp_sem_post (&parent->taskwait->taskwait_sem); |
|
|
+ } |
|
|
+ } |
|
|
+ if (taskgroup && taskgroup->in_taskgroup_wait) |
|
|
+ { |
|
|
+ /* One more task has had its dependencies met. |
|
|
+ Inform any waiters. */ |
|
|
+ taskgroup->in_taskgroup_wait = false; |
|
|
+ gomp_sem_post (&taskgroup->taskgroup_sem); |
|
|
} |
|
|
- if (taskgroup && taskgroup->children == child_task) |
|
|
- taskgroup->children = child_task->next_taskgroup; |
|
|
- child_task->prev_queue->next_queue = child_task->next_queue; |
|
|
- child_task->next_queue->prev_queue = child_task->prev_queue; |
|
|
- if (team->task_queue == child_task) |
|
|
+ |
|
|
+ ++team->task_queued_count; |
|
|
+ gomp_team_barrier_set_task_pending (&team->barrier); |
|
|
+ /* I'm afraid this can't be done after releasing team->task_lock, |
|
|
+ as gomp_target_task_completion is run from unrelated thread and |
|
|
+ therefore in between gomp_mutex_unlock and gomp_team_barrier_wake |
|
|
+ the team could be gone already. */ |
|
|
+ if (team->nthreads > team->task_running_count) |
|
|
+ gomp_team_barrier_wake (&team->barrier, 1); |
|
|
+} |
|
|
+ |
|
|
+/* Signal that a target task TTASK has completed the asynchronously |
|
|
+ running phase and should be requeued as a task to handle the |
|
|
+ variable unmapping. */ |
|
|
+ |
|
|
+void |
|
|
+GOMP_PLUGIN_target_task_completion (void *data) |
|
|
+{ |
|
|
+ struct gomp_target_task *ttask = (struct gomp_target_task *) data; |
|
|
+ struct gomp_task *task = ttask->task; |
|
|
+ struct gomp_team *team = ttask->team; |
|
|
+ |
|
|
+ gomp_mutex_lock (&team->task_lock); |
|
|
+ if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN) |
|
|
{ |
|
|
- if (child_task->next_queue != child_task) |
|
|
- team->task_queue = child_task->next_queue; |
|
|
+ ttask->state = GOMP_TARGET_TASK_FINISHED; |
|
|
+ gomp_mutex_unlock (&team->task_lock); |
|
|
+ return; |
|
|
+ } |
|
|
+ ttask->state = GOMP_TARGET_TASK_FINISHED; |
|
|
+ gomp_target_task_completion (team, task); |
|
|
+ gomp_mutex_unlock (&team->task_lock); |
|
|
+} |
|
|
+ |
|
|
+static void gomp_task_run_post_handle_depend_hash (struct gomp_task *); |
|
|
+ |
|
|
+/* Called for nowait target tasks. */ |
|
|
+ |
|
|
+bool |
|
|
+gomp_create_target_task (struct gomp_device_descr *devicep, |
|
|
+ void (*fn) (void *), size_t mapnum, void **hostaddrs, |
|
|
+ size_t *sizes, unsigned short *kinds, |
|
|
+ unsigned int flags, void **depend, void **args, |
|
|
+ enum gomp_target_task_state state) |
|
|
+{ |
|
|
+ struct gomp_thread *thr = gomp_thread (); |
|
|
+ struct gomp_team *team = thr->ts.team; |
|
|
+ |
|
|
+ /* If parallel or taskgroup has been cancelled, don't start new tasks. */ |
|
|
+ if (team |
|
|
+ && (gomp_team_barrier_cancelled (&team->barrier) |
|
|
+ || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) |
|
|
+ return true; |
|
|
+ |
|
|
+ struct gomp_target_task *ttask; |
|
|
+ struct gomp_task *task; |
|
|
+ struct gomp_task *parent = thr->task; |
|
|
+ struct gomp_taskgroup *taskgroup = parent->taskgroup; |
|
|
+ bool do_wake; |
|
|
+ size_t depend_size = 0; |
|
|
+ uintptr_t depend_cnt = 0; |
|
|
+ size_t tgt_align = 0, tgt_size = 0; |
|
|
+ |
|
|
+ if (depend != NULL) |
|
|
+ { |
|
|
+ depend_cnt = (uintptr_t) depend[0]; |
|
|
+ depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry); |
|
|
+ } |
|
|
+ if (fn) |
|
|
+ { |
|
|
+ /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are |
|
|
+ firstprivate on the target task. */ |
|
|
+ size_t i; |
|
|
+ for (i = 0; i < mapnum; i++) |
|
|
+ if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) |
|
|
+ { |
|
|
+ size_t align = (size_t) 1 << (kinds[i] >> 8); |
|
|
+ if (tgt_align < align) |
|
|
+ tgt_align = align; |
|
|
+ tgt_size = (tgt_size + align - 1) & ~(align - 1); |
|
|
+ tgt_size += sizes[i]; |
|
|
+ } |
|
|
+ if (tgt_align) |
|
|
+ tgt_size += tgt_align - 1; |
|
|
else |
|
|
- team->task_queue = NULL; |
|
|
+ tgt_size = 0; |
|
|
} |
|
|
+ |
|
|
+ task = gomp_malloc (sizeof (*task) + depend_size |
|
|
+ + sizeof (*ttask) |
|
|
+ + mapnum * (sizeof (void *) + sizeof (size_t) |
|
|
+ + sizeof (unsigned short)) |
|
|
+ + tgt_size); |
|
|
+ gomp_init_task (task, parent, gomp_icv (false)); |
|
|
+ task->priority = 0; |
|
|
+ task->kind = GOMP_TASK_WAITING; |
|
|
+ task->in_tied_task = parent->in_tied_task; |
|
|
+ task->taskgroup = taskgroup; |
|
|
+ ttask = (struct gomp_target_task *) &task->depend[depend_cnt]; |
|
|
+ ttask->devicep = devicep; |
|
|
+ ttask->fn = fn; |
|
|
+ ttask->mapnum = mapnum; |
|
|
+ ttask->args = args; |
|
|
+ memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *)); |
|
|
+ ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum]; |
|
|
+ memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t)); |
|
|
+ ttask->kinds = (unsigned short *) &ttask->sizes[mapnum]; |
|
|
+ memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short)); |
|
|
+ if (tgt_align) |
|
|
+ { |
|
|
+ char *tgt = (char *) &ttask->kinds[mapnum]; |
|
|
+ size_t i; |
|
|
+ uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); |
|
|
+ if (al) |
|
|
+ tgt += tgt_align - al; |
|
|
+ tgt_size = 0; |
|
|
+ for (i = 0; i < mapnum; i++) |
|
|
+ if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) |
|
|
+ { |
|
|
+ size_t align = (size_t) 1 << (kinds[i] >> 8); |
|
|
+ tgt_size = (tgt_size + align - 1) & ~(align - 1); |
|
|
+ memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); |
|
|
+ ttask->hostaddrs[i] = tgt + tgt_size; |
|
|
+ tgt_size = tgt_size + sizes[i]; |
|
|
+ } |
|
|
+ } |
|
|
+ ttask->flags = flags; |
|
|
+ ttask->state = state; |
|
|
+ ttask->task = task; |
|
|
+ ttask->team = team; |
|
|
+ task->fn = NULL; |
|
|
+ task->fn_data = ttask; |
|
|
+ task->final_task = 0; |
|
|
+ gomp_mutex_lock (&team->task_lock); |
|
|
+ /* If parallel or taskgroup has been cancelled, don't start new tasks. */ |
|
|
+ if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier) |
|
|
+ || (taskgroup && taskgroup->cancelled), 0)) |
|
|
+ { |
|
|
+ gomp_mutex_unlock (&team->task_lock); |
|
|
+ gomp_finish_task (task); |
|
|
+ free (task); |
|
|
+ return true; |
|
|
+ } |
|
|
+ if (depend_size) |
|
|
+ { |
|
|
+ gomp_task_handle_depend (task, parent, depend); |
|
|
+ if (task->num_dependees) |
|
|
+ { |
|
|
+ if (taskgroup) |
|
|
+ taskgroup->num_children++; |
|
|
+ gomp_mutex_unlock (&team->task_lock); |
|
|
+ return true; |
|
|
+ } |
|
|
+ } |
|
|
+ if (state == GOMP_TARGET_TASK_DATA) |
|
|
+ { |
|
|
+ gomp_task_run_post_handle_depend_hash (task); |
|
|
+ gomp_mutex_unlock (&team->task_lock); |
|
|
+ gomp_finish_task (task); |
|
|
+ free (task); |
|
|
+ return false; |
|
|
+ } |
|
|
+ if (taskgroup) |
|
|
+ taskgroup->num_children++; |
|
|
+ /* For async offloading, if we don't need to wait for dependencies, |
|
|
+ run the gomp_target_task_fn right away, essentially schedule the |
|
|
+ mapping part of the task in the current thread. */ |
|
|
+ if (devicep != NULL |
|
|
+ && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) |
|
|
+ { |
|
|
+ priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0, |
|
|
+ PRIORITY_INSERT_END, |
|
|
+ /*adjust_parent_depends_on=*/false, |
|
|
+ task->parent_depends_on); |
|
|
+ if (taskgroup) |
|
|
+ priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, |
|
|
+ task, 0, PRIORITY_INSERT_END, |
|
|
+ /*adjust_parent_depends_on=*/false, |
|
|
+ task->parent_depends_on); |
|
|
+ task->pnode[PQ_TEAM].next = NULL; |
|
|
+ task->pnode[PQ_TEAM].prev = NULL; |
|
|
+ task->kind = GOMP_TASK_TIED; |
|
|
+ ++team->task_count; |
|
|
+ gomp_mutex_unlock (&team->task_lock); |
|
|
+ |
|
|
+ thr->task = task; |
|
|
+ gomp_target_task_fn (task->fn_data); |
|
|
+ thr->task = parent; |
|
|
+ |
|
|
+ gomp_mutex_lock (&team->task_lock); |
|
|
+ task->kind = GOMP_TASK_ASYNC_RUNNING; |
|
|
+ /* If GOMP_PLUGIN_target_task_completion has run already |
|
|
+ in between gomp_target_task_fn and the mutex lock, |
|
|
+ perform the requeuing here. */ |
|
|
+ if (ttask->state == GOMP_TARGET_TASK_FINISHED) |
|
|
+ gomp_target_task_completion (team, task); |
|
|
+ else |
|
|
+ ttask->state = GOMP_TARGET_TASK_RUNNING; |
|
|
+ gomp_mutex_unlock (&team->task_lock); |
|
|
+ return true; |
|
|
+ } |
|
|
+ priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0, |
|
|
+ PRIORITY_INSERT_BEGIN, |
|
|
+ /*adjust_parent_depends_on=*/false, |
|
|
+ task->parent_depends_on); |
|
|
+ if (taskgroup) |
|
|
+ priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0, |
|
|
+ PRIORITY_INSERT_BEGIN, |
|
|
+ /*adjust_parent_depends_on=*/false, |
|
|
+ task->parent_depends_on); |
|
|
+ priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0, |
|
|
+ PRIORITY_INSERT_END, |
|
|
+ /*adjust_parent_depends_on=*/false, |
|
|
+ task->parent_depends_on); |
|
|
+ ++team->task_count; |
|
|
+ ++team->task_queued_count; |
|
|
+ gomp_team_barrier_set_task_pending (&team->barrier); |
|
|
+ do_wake = team->task_running_count + !parent->in_tied_task |
|
|
+ < team->nthreads; |
|
|
+ gomp_mutex_unlock (&team->task_lock); |
|
|
+ if (do_wake) |
|
|
+ gomp_team_barrier_wake (&team->barrier, 1); |
|
|
+ return true; |
|
|
+} |
|
|
+ |
|
|
+/* Given a parent_depends_on task in LIST, move it to the front of its |
|
|
+ priority so it is run as soon as possible. |
|
|
+ |
|
|
+ Care is taken to update the list's LAST_PARENT_DEPENDS_ON field. |
|
|
+ |
|
|
+ We rearrange the queue such that all parent_depends_on tasks are |
|
|
+ first, and last_parent_depends_on points to the last such task we |
|
|
+ rearranged. For example, given the following tasks in a queue |
|
|
+ where PD[123] are the parent_depends_on tasks: |
|
|
+ |
|
|
+ task->children |
|
|
+ | |
|
|
+ V |
|
|
+ C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4 |
|
|
+ |
|
|
+ We rearrange such that: |
|
|
+ |
|
|
+ task->children |
|
|
+ | +--- last_parent_depends_on |
|
|
+ | | |
|
|
+ V V |
|
|
+ PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */ |
|
|
+ |
|
|
+static void inline |
|
|
+priority_list_upgrade_task (struct priority_list *list, |
|
|
+ struct priority_node *node) |
|
|
+{ |
|
|
+ struct priority_node *last_parent_depends_on |
|
|
+ = list->last_parent_depends_on; |
|
|
+ if (last_parent_depends_on) |
|
|
+ { |
|
|
+ node->prev->next = node->next; |
|
|
+ node->next->prev = node->prev; |
|
|
+ node->prev = last_parent_depends_on; |
|
|
+ node->next = last_parent_depends_on->next; |
|
|
+ node->prev->next = node; |
|
|
+ node->next->prev = node; |
|
|
+ } |
|
|
+ else if (node != list->tasks) |
|
|
+ { |
|
|
+ node->prev->next = node->next; |
|
|
+ node->next->prev = node->prev; |
|
|
+ node->prev = list->tasks->prev; |
|
|
+ node->next = list->tasks; |
|
|
+ list->tasks = node; |
|
|
+ node->prev->next = node; |
|
|
+ node->next->prev = node; |
|
|
+ } |
|
|
+ list->last_parent_depends_on = node; |
|
|
+} |
|
|
+ |
|
|
+/* Given a parent_depends_on TASK in its parent's children_queue, move |
|
|
+ it to the front of its priority so it is run as soon as possible. |
|
|
+ |
|
|
+ PARENT is passed as an optimization. |
|
|
+ |
|
|
+ (This function could be defined in priority_queue.c, but we want it |
|
|
+ inlined, and putting it in priority_queue.h is not an option, given |
|
|
+ that gomp_task has not been properly defined at that point). */ |
|
|
+ |
|
|
+static void inline |
|
|
+priority_queue_upgrade_task (struct gomp_task *task, |
|
|
+ struct gomp_task *parent) |
|
|
+{ |
|
|
+ struct priority_queue *head = &parent->children_queue; |
|
|
+ struct priority_node *node = &task->pnode[PQ_CHILDREN]; |
|
|
+#if _LIBGOMP_CHECKING_ |
|
|
+ if (!task->parent_depends_on) |
|
|
+ gomp_fatal ("priority_queue_upgrade_task: task must be a " |
|
|
+ "parent_depends_on task"); |
|
|
+ if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task)) |
|
|
+ gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task); |
|
|
+#endif |
|
|
+ if (priority_queue_multi_p (head)) |
|
|
+ { |
|
|
+ struct priority_list *list |
|
|
+ = priority_queue_lookup_priority (head, task->priority); |
|
|
+ priority_list_upgrade_task (list, node); |
|
|
+ } |
|
|
+ else |
|
|
+ priority_list_upgrade_task (&head->l, node); |
|
|
+} |
|
|
+ |
|
|
+/* Given a CHILD_TASK in LIST that is about to be executed, move it out of |
|
|
+ the way in LIST so that other tasks can be considered for |
|
|
+ execution. LIST contains tasks of type TYPE. |
|
|
+ |
|
|
+ Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field |
|
|
+ if applicable. */ |
|
|
+ |
|
|
+static void inline |
|
|
+priority_list_downgrade_task (enum priority_queue_type type, |
|
|
+ struct priority_list *list, |
|
|
+ struct gomp_task *child_task) |
|
|
+{ |
|
|
+ struct priority_node *node = task_to_priority_node (type, child_task); |
|
|
+ if (list->tasks == node) |
|
|
+ list->tasks = node->next; |
|
|
+ else if (node->next != list->tasks) |
|
|
+ { |
|
|
+ /* The task in NODE is about to become TIED and TIED tasks |
|
|
+ cannot come before WAITING tasks. If we're about to |
|
|
+ leave the queue in such an indeterminate state, rewire |
|
|
+ things appropriately. However, a TIED task at the end is |
|
|
+ perfectly fine. */ |
|
|
+ struct gomp_task *next_task = priority_node_to_task (type, node->next); |
|
|
+ if (next_task->kind == GOMP_TASK_WAITING) |
|
|
+ { |
|
|
+ /* Remove from list. */ |
|
|
+ node->prev->next = node->next; |
|
|
+ node->next->prev = node->prev; |
|
|
+ /* Rewire at the end. */ |
|
|
+ node->next = list->tasks; |
|
|
+ node->prev = list->tasks->prev; |
|
|
+ list->tasks->prev->next = node; |
|
|
+ list->tasks->prev = node; |
|
|
+ } |
|
|
+ } |
|
|
+ |
|
|
+ /* If the current task is the last_parent_depends_on for its |
|
|
+ priority, adjust last_parent_depends_on appropriately. */ |
|
|
+ if (__builtin_expect (child_task->parent_depends_on, 0) |
|
|
+ && list->last_parent_depends_on == node) |
|
|
+ { |
|
|
+ struct gomp_task *prev_child = priority_node_to_task (type, node->prev); |
|
|
+ if (node->prev != node |
|
|
+ && prev_child->kind == GOMP_TASK_WAITING |
|
|
+ && prev_child->parent_depends_on) |
|
|
+ list->last_parent_depends_on = node->prev; |
|
|
+ else |
|
|
+ { |
|
|
+ /* There are no more parent_depends_on entries waiting |
|
|
+ to run, clear the list. */ |
|
|
+ list->last_parent_depends_on = NULL; |
|
|
+ } |
|
|
+ } |
|
|
+} |
|
|
+ |
|
|
+/* Given a TASK in HEAD that is about to be executed, move it out of |
|
|
+ the way so that other tasks can be considered for execution. HEAD |
|
|
+ contains tasks of type TYPE. |
|
|
+ |
|
|
+ Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field |
|
|
+ if applicable. |
|
|
+ |
|
|
+ (This function could be defined in priority_queue.c, but we want it |
|
|
+ inlined, and putting it in priority_queue.h is not an option, given |
|
|
+ that gomp_task has not been properly defined at that point). */ |
|
|
+ |
|
|
+static void inline |
|
|
+priority_queue_downgrade_task (enum priority_queue_type type, |
|
|
+ struct priority_queue *head, |
|
|
+ struct gomp_task *task) |
|
|
+{ |
|
|
+#if _LIBGOMP_CHECKING_ |
|
|
+ if (!priority_queue_task_in_queue_p (type, head, task)) |
|
|
+ gomp_fatal ("Attempt to downgrade missing task %p", task); |
|
|
+#endif |
|
|
+ if (priority_queue_multi_p (head)) |
|
|
+ { |
|
|
+ struct priority_list *list |
|
|
+ = priority_queue_lookup_priority (head, task->priority); |
|
|
+ priority_list_downgrade_task (type, list, task); |
|
|
+ } |
|
|
+ else |
|
|
+ priority_list_downgrade_task (type, &head->l, task); |
|
|
+} |
|
|
+ |
|
|
+/* Setup CHILD_TASK to execute. This is done by setting the task to |
|
|
+ TIED, and updating all relevant queues so that CHILD_TASK is no |
|
|
+ longer chosen for scheduling. Also, remove CHILD_TASK from the |
|
|
+ overall team task queue entirely. |
|
|
+ |
|
|
+ Return TRUE if task or its containing taskgroup has been |
|
|
+ cancelled. */ |
|
|
+ |
|
|
+static inline bool |
|
|
+gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent, |
|
|
+ struct gomp_team *team) |
|
|
+{ |
|
|
+#if _LIBGOMP_CHECKING_ |
|
|
+ if (child_task->parent) |
|
|
+ priority_queue_verify (PQ_CHILDREN, |
|
|
+ &child_task->parent->children_queue, true); |
|
|
+ if (child_task->taskgroup) |
|
|
+ priority_queue_verify (PQ_TASKGROUP, |
|
|
+ &child_task->taskgroup->taskgroup_queue, false); |
|
|
+ priority_queue_verify (PQ_TEAM, &team->task_queue, false); |
|
|
+#endif |
|
|
+ |
|
|
+ /* Task is about to go tied, move it out of the way. */ |
|
|
+ if (parent) |
|
|
+ priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue, |
|
|
+ child_task); |
|
|
+ |
|
|
+ /* Task is about to go tied, move it out of the way. */ |
|
|
+ struct gomp_taskgroup *taskgroup = child_task->taskgroup; |
|
|
+ if (taskgroup) |
|
|
+ priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue, |
|
|
+ child_task); |
|
|
+ |
|
|
+ priority_queue_remove (PQ_TEAM, &team->task_queue, child_task, |
|
|
+ MEMMODEL_RELAXED); |
|
|
+ child_task->pnode[PQ_TEAM].next = NULL; |
|
|
+ child_task->pnode[PQ_TEAM].prev = NULL; |
|
|
child_task->kind = GOMP_TASK_TIED; |
|
|
+ |
|
|
if (--team->task_queued_count == 0) |
|
|
gomp_team_barrier_clear_task_pending (&team->barrier); |
|
|
if ((gomp_team_barrier_cancelled (&team->barrier) |
|
|
@@ -478,6 +1020,14 @@ gomp_task_run_post_handle_depend_hash (s |
|
|
} |
|
|
} |
|
|
|
|
|
+/* After a CHILD_TASK has been run, adjust the dependency queue for |
|
|
+ each task that depends on CHILD_TASK, to record the fact that there |
|
|
+ is one less dependency to worry about. If a task that depended on |
|
|
+ CHILD_TASK now has no dependencies, place it in the various queues |
|
|
+ so it gets scheduled to run. |
|
|
+ |
|
|
+ TEAM is the team to which CHILD_TASK belongs to. */ |
|
|
+ |
|
|
static size_t |
|
|
gomp_task_run_post_handle_dependers (struct gomp_task *child_task, |
|
|
struct gomp_team *team) |
|
|
@@ -487,91 +1037,60 @@ gomp_task_run_post_handle_dependers (str |
|
|
for (i = 0; i < count; i++) |
|
|
{ |
|
|
struct gomp_task *task = child_task->dependers->elem[i]; |
|
|
+ |
|
|
+ /* CHILD_TASK satisfies a dependency for TASK. Keep track of |
|
|
+ TASK's remaining dependencies. Once TASK has no other |
|
|
+ depenencies, put it into the various queues so it will get |
|
|
+ scheduled for execution. */ |
|
|
if (--task->num_dependees != 0) |
|
|
continue; |
|
|
|
|
|
struct gomp_taskgroup *taskgroup = task->taskgroup; |
|
|
if (parent) |
|
|
{ |
|
|
- if (parent->children) |
|
|
- { |
|
|
- /* If parent is in gomp_task_maybe_wait_for_dependencies |
|
|
- and it doesn't need to wait for this task, put it after |
|
|
- all ready to run tasks it needs to wait for. */ |
|
|
- if (parent->taskwait && parent->taskwait->last_parent_depends_on |
|
|
- && !task->parent_depends_on) |
|
|
- { |
|
|
- struct gomp_task *last_parent_depends_on |
|
|
- = parent->taskwait->last_parent_depends_on; |
|
|
- task->next_child = last_parent_depends_on->next_child; |
|
|
- task->prev_child = last_parent_depends_on; |
|
|
- } |
|
|
- else |
|
|
- { |
|
|
- task->next_child = parent->children; |
|
|
- task->prev_child = parent->children->prev_child; |
|
|
- parent->children = task; |
|
|
- } |
|
|
- task->next_child->prev_child = task; |
|
|
- task->prev_child->next_child = task; |
|
|
- } |
|
|
- else |
|
|
- { |
|
|
- task->next_child = task; |
|
|
- task->prev_child = task; |
|
|
- parent->children = task; |
|
|
- } |
|
|
+ priority_queue_insert (PQ_CHILDREN, &parent->children_queue, |
|
|
+ task, task->priority, |
|
|
+ PRIORITY_INSERT_BEGIN, |
|
|
+ /*adjust_parent_depends_on=*/true, |
|
|
+ task->parent_depends_on); |
|
|
if (parent->taskwait) |
|
|
{ |
|
|
if (parent->taskwait->in_taskwait) |
|
|
{ |
|
|
+ /* One more task has had its dependencies met. |
|
|
+ Inform any waiters. */ |
|
|
parent->taskwait->in_taskwait = false; |
|
|
gomp_sem_post (&parent->taskwait->taskwait_sem); |
|
|
} |
|
|
else if (parent->taskwait->in_depend_wait) |
|
|
{ |
|
|
+ /* One more task has had its dependencies met. |
|
|
+ Inform any waiters. */ |
|
|
parent->taskwait->in_depend_wait = false; |
|
|
gomp_sem_post (&parent->taskwait->taskwait_sem); |
|
|
} |
|
|
- if (parent->taskwait->last_parent_depends_on == NULL |
|
|
- && task->parent_depends_on) |
|
|
- parent->taskwait->last_parent_depends_on = task; |
|
|
} |
|
|
} |
|
|
if (taskgroup) |
|
|
{ |
|
|
- if (taskgroup->children) |
|
|
- { |
|
|
- task->next_taskgroup = taskgroup->children; |
|
|
- task->prev_taskgroup = taskgroup->children->prev_taskgroup; |
|
|
- task->next_taskgroup->prev_taskgroup = task; |
|
|
- task->prev_taskgroup->next_taskgroup = task; |
|
|
- } |
|
|
- else |
|
|
- { |
|
|
- task->next_taskgroup = task; |
|
|
- task->prev_taskgroup = task; |
|
|
- } |
|
|
- taskgroup->children = task; |
|
|
+ priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, |
|
|
+ task, task->priority, |
|
|
+ PRIORITY_INSERT_BEGIN, |
|
|
+ /*adjust_parent_depends_on=*/false, |
|
|
+ task->parent_depends_on); |
|
|
if (taskgroup->in_taskgroup_wait) |
|
|
{ |
|
|
+ /* One more task has had its dependencies met. |
|
|
+ Inform any waiters. */ |
|
|
taskgroup->in_taskgroup_wait = false; |
|
|
gomp_sem_post (&taskgroup->taskgroup_sem); |
|
|
} |
|
|
} |
|
|
- if (team->task_queue) |
|
|
- { |
|
|
- task->next_queue = team->task_queue; |
|
|
- task->prev_queue = team->task_queue->prev_queue; |
|
|
- task->next_queue->prev_queue = task; |
|
|
- task->prev_queue->next_queue = task; |
|
|
- } |
|
|
- else |
|
|
- { |
|
|
- task->next_queue = task; |
|
|
- task->prev_queue = task; |
|
|
- team->task_queue = task; |
|
|
- } |
|
|
+ priority_queue_insert (PQ_TEAM, &team->task_queue, |
|
|
+ task, task->priority, |
|
|
+ PRIORITY_INSERT_END, |
|
|
+ /*adjust_parent_depends_on=*/false, |
|
|
+ task->parent_depends_on); |
|
|
++team->task_count; |
|
|
++team->task_queued_count; |
|
|
++ret; |
|
|
@@ -601,12 +1120,18 @@ gomp_task_run_post_handle_depend (struct |
|
|
return gomp_task_run_post_handle_dependers (child_task, team); |
|
|
} |
|
|
|
|
|
+/* Remove CHILD_TASK from its parent. */ |
|
|
+ |
|
|
static inline void |
|
|
gomp_task_run_post_remove_parent (struct gomp_task *child_task) |
|
|
{ |
|
|
struct gomp_task *parent = child_task->parent; |
|
|
if (parent == NULL) |
|
|
return; |
|
|
+ |
|
|
+ /* If this was the last task the parent was depending on, |
|
|
+ synchronize with gomp_task_maybe_wait_for_dependencies so it can |
|
|
+ clean up and return. */ |
|
|
if (__builtin_expect (child_task->parent_depends_on, 0) |
|
|
&& --parent->taskwait->n_depend == 0 |
|
|
&& parent->taskwait->in_depend_wait) |
|
|
@@ -614,36 +1139,31 @@ gomp_task_run_post_remove_parent (struct |
|
|
parent->taskwait->in_depend_wait = false; |
|
|
gomp_sem_post (&parent->taskwait->taskwait_sem); |
|
|
} |
|
|
- child_task->prev_child->next_child = child_task->next_child; |
|
|
- child_task->next_child->prev_child = child_task->prev_child; |
|
|
- if (parent->children != child_task) |
|
|
- return; |
|
|
- if (child_task->next_child != child_task) |
|
|
- parent->children = child_task->next_child; |
|
|
- else |
|
|
+ |
|
|
+ if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue, |
|
|
+ child_task, MEMMODEL_RELEASE) |
|
|
+ && parent->taskwait && parent->taskwait->in_taskwait) |
|
|
{ |
|
|
- /* We access task->children in GOMP_taskwait |
|
|
- outside of the task lock mutex region, so |
|
|
- need a release barrier here to ensure memory |
|
|
- written by child_task->fn above is flushed |
|
|
- before the NULL is written. */ |
|
|
- __atomic_store_n (&parent->children, NULL, MEMMODEL_RELEASE); |
|
|
- if (parent->taskwait && parent->taskwait->in_taskwait) |
|
|
- { |
|
|
- parent->taskwait->in_taskwait = false; |
|
|
- gomp_sem_post (&parent->taskwait->taskwait_sem); |
|
|
- } |
|
|
+ parent->taskwait->in_taskwait = false; |
|
|
+ gomp_sem_post (&parent->taskwait->taskwait_sem); |
|
|
} |
|
|
+ child_task->pnode[PQ_CHILDREN].next = NULL; |
|
|
+ child_task->pnode[PQ_CHILDREN].prev = NULL; |
|
|
} |
|
|
|
|
|
+/* Remove CHILD_TASK from its taskgroup. */ |
|
|
+ |
|
|
static inline void |
|
|
gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task) |
|
|
{ |
|
|
struct gomp_taskgroup *taskgroup = child_task->taskgroup; |
|
|
if (taskgroup == NULL) |
|
|
return; |
|
|
- child_task->prev_taskgroup->next_taskgroup = child_task->next_taskgroup; |
|
|
- child_task->next_taskgroup->prev_taskgroup = child_task->prev_taskgroup; |
|
|
+ bool empty = priority_queue_remove (PQ_TASKGROUP, |
|
|
+ &taskgroup->taskgroup_queue, |
|
|
+ child_task, MEMMODEL_RELAXED); |
|
|
+ child_task->pnode[PQ_TASKGROUP].next = NULL; |
|
|
+ child_task->pnode[PQ_TASKGROUP].prev = NULL; |
|
|
if (taskgroup->num_children > 1) |
|
|
--taskgroup->num_children; |
|
|
else |
|
|
@@ -655,18 +1175,10 @@ gomp_task_run_post_remove_taskgroup (str |
|
|
before the NULL is written. */ |
|
|
__atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE); |
|
|
} |
|
|
- if (taskgroup->children != child_task) |
|
|
- return; |
|
|
- if (child_task->next_taskgroup != child_task) |
|
|
- taskgroup->children = child_task->next_taskgroup; |
|
|
- else |
|
|
+ if (empty && taskgroup->in_taskgroup_wait) |
|
|
{ |
|
|
- taskgroup->children = NULL; |
|
|
- if (taskgroup->in_taskgroup_wait) |
|
|
- { |
|
|
- taskgroup->in_taskgroup_wait = false; |
|
|
- gomp_sem_post (&taskgroup->taskgroup_sem); |
|
|
- } |
|
|
+ taskgroup->in_taskgroup_wait = false; |
|
|
+ gomp_sem_post (&taskgroup->taskgroup_sem); |
|
|
} |
|
|
} |
|
|
|
|
|
@@ -696,11 +1208,15 @@ gomp_barrier_handle_tasks (gomp_barrier_ |
|
|
while (1) |
|
|
{ |
|
|
bool cancelled = false; |
|
|
- if (team->task_queue != NULL) |
|
|
+ if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED)) |
|
|
{ |
|
|
- child_task = team->task_queue; |
|
|
+ bool ignored; |
|
|
+ child_task |
|
|
+ = priority_queue_next_task (PQ_TEAM, &team->task_queue, |
|
|
+ PQ_IGNORED, NULL, |
|
|
+ &ignored); |
|
|
cancelled = gomp_task_run_pre (child_task, child_task->parent, |
|
|
- child_task->taskgroup, team); |
|
|
+ team); |
|
|
if (__builtin_expect (cancelled, 0)) |
|
|
{ |
|
|
if (to_free) |
|
|
@@ -729,7 +1245,29 @@ gomp_barrier_handle_tasks (gomp_barrier_ |
|
|
if (child_task) |
|
|
{ |
|
|
thr->task = child_task; |
|
|
- child_task->fn (child_task->fn_data); |
|
|
+ if (__builtin_expect (child_task->fn == NULL, 0)) |
|
|
+ { |
|
|
+ if (gomp_target_task_fn (child_task->fn_data)) |
|
|
+ { |
|
|
+ thr->task = task; |
|
|
+ gomp_mutex_lock (&team->task_lock); |
|
|
+ child_task->kind = GOMP_TASK_ASYNC_RUNNING; |
|
|
+ team->task_running_count--; |
|
|
+ struct gomp_target_task *ttask |
|
|
+ = (struct gomp_target_task *) child_task->fn_data; |
|
|
+ /* If GOMP_PLUGIN_target_task_completion has run already |
|
|
+ in between gomp_target_task_fn and the mutex lock, |
|
|
+ perform the requeuing here. */ |
|
|
+ if (ttask->state == GOMP_TARGET_TASK_FINISHED) |
|
|
+ gomp_target_task_completion (team, child_task); |
|
|
+ else |
|
|
+ ttask->state = GOMP_TARGET_TASK_RUNNING; |
|
|
+ child_task = NULL; |
|
|
+ continue; |
|
|
+ } |
|
|
+ } |
|
|
+ else |
|
|
+ child_task->fn (child_task->fn_data); |
|
|
thr->task = task; |
|
|
} |
|
|
else |
|
|
@@ -741,7 +1279,7 @@ gomp_barrier_handle_tasks (gomp_barrier_ |
|
|
size_t new_tasks |
|
|
= gomp_task_run_post_handle_depend (child_task, team); |
|
|
gomp_task_run_post_remove_parent (child_task); |
|
|
- gomp_clear_parent (child_task->children); |
|
|
+ gomp_clear_parent (&child_task->children_queue); |
|
|
gomp_task_run_post_remove_taskgroup (child_task); |
|
|
to_free = child_task; |
|
|
child_task = NULL; |
|
|
@@ -765,7 +1303,9 @@ gomp_barrier_handle_tasks (gomp_barrier_ |
|
|
} |
|
|
} |
|
|
|
|
|
-/* Called when encountering a taskwait directive. */ |
|
|
+/* Called when encountering a taskwait directive. |
|
|
+ |
|
|
+ Wait for all children of the current task. */ |
|
|
|
|
|
void |
|
|
GOMP_taskwait (void) |
|
|
@@ -785,15 +1325,16 @@ GOMP_taskwait (void) |
|
|
child thread task work function are seen before we exit from |
|
|
GOMP_taskwait. */ |
|
|
if (task == NULL |
|
|
- || __atomic_load_n (&task->children, MEMMODEL_ACQUIRE) == NULL) |
|
|
+ || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE)) |
|
|
return; |
|
|
|
|
|
memset (&taskwait, 0, sizeof (taskwait)); |
|
|
+ bool child_q = false; |
|
|
gomp_mutex_lock (&team->task_lock); |
|
|
while (1) |
|
|
{ |
|
|
bool cancelled = false; |
|
|
- if (task->children == NULL) |
|
|
+ if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED)) |
|
|
{ |
|
|
bool destroy_taskwait = task->taskwait != NULL; |
|
|
task->taskwait = NULL; |
|
|
@@ -807,12 +1348,14 @@ GOMP_taskwait (void) |
|
|
gomp_sem_destroy (&taskwait.taskwait_sem); |
|
|
return; |
|
|
} |
|
|
- if (task->children->kind == GOMP_TASK_WAITING) |
|
|
+ struct gomp_task *next_task |
|
|
+ = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, |
|
|
+ PQ_TEAM, &team->task_queue, &child_q); |
|
|
+ if (next_task->kind == GOMP_TASK_WAITING) |
|
|
{ |
|
|
- child_task = task->children; |
|
|
+ child_task = next_task; |
|
|
cancelled |
|
|
- = gomp_task_run_pre (child_task, task, child_task->taskgroup, |
|
|
- team); |
|
|
+ = gomp_task_run_pre (child_task, task, team); |
|
|
if (__builtin_expect (cancelled, 0)) |
|
|
{ |
|
|
if (to_free) |
|
|
@@ -826,8 +1369,10 @@ GOMP_taskwait (void) |
|
|
} |
|
|
else |
|
|
{ |
|
|
- /* All tasks we are waiting for are already running |
|
|
- in other threads. Wait for them. */ |
|
|
+ /* All tasks we are waiting for are either running in other |
|
|
+ threads, or they are tasks that have not had their |
|
|
+ dependencies met (so they're not even in the queue). Wait |
|
|
+ for them. */ |
|
|
if (task->taskwait == NULL) |
|
|
{ |
|
|
taskwait.in_depend_wait = false; |
|
|
@@ -851,7 +1396,28 @@ GOMP_taskwait (void) |
|
|
if (child_task) |
|
|
{ |
|
|
thr->task = child_task; |
|
|
- child_task->fn (child_task->fn_data); |
|
|
+ if (__builtin_expect (child_task->fn == NULL, 0)) |
|
|
+ { |
|
|
+ if (gomp_target_task_fn (child_task->fn_data)) |
|
|
+ { |
|
|
+ thr->task = task; |
|
|
+ gomp_mutex_lock (&team->task_lock); |
|
|
+ child_task->kind = GOMP_TASK_ASYNC_RUNNING; |
|
|
+ struct gomp_target_task *ttask |
|
|
+ = (struct gomp_target_task *) child_task->fn_data; |
|
|
+ /* If GOMP_PLUGIN_target_task_completion has run already |
|
|
+ in between gomp_target_task_fn and the mutex lock, |
|
|
+ perform the requeuing here. */ |
|
|
+ if (ttask->state == GOMP_TARGET_TASK_FINISHED) |
|
|
+ gomp_target_task_completion (team, child_task); |
|
|
+ else |
|
|
+ ttask->state = GOMP_TARGET_TASK_RUNNING; |
|
|
+ child_task = NULL; |
|
|
+ continue; |
|
|
+ } |
|
|
+ } |
|
|
+ else |
|
|
+ child_task->fn (child_task->fn_data); |
|
|
thr->task = task; |
|
|
} |
|
|
else |
|
|
@@ -862,17 +1428,19 @@ GOMP_taskwait (void) |
|
|
finish_cancelled:; |
|
|
size_t new_tasks |
|
|
= gomp_task_run_post_handle_depend (child_task, team); |
|
|
- child_task->prev_child->next_child = child_task->next_child; |
|
|
- child_task->next_child->prev_child = child_task->prev_child; |
|
|
- if (task->children == child_task) |
|
|
- { |
|
|
- if (child_task->next_child != child_task) |
|
|
- task->children = child_task->next_child; |
|
|
- else |
|
|
- task->children = NULL; |
|
|
+ |
|
|
+ if (child_q) |
|
|
+ { |
|
|
+ priority_queue_remove (PQ_CHILDREN, &task->children_queue, |
|
|
+ child_task, MEMMODEL_RELAXED); |
|
|
+ child_task->pnode[PQ_CHILDREN].next = NULL; |
|
|
+ child_task->pnode[PQ_CHILDREN].prev = NULL; |
|
|
} |
|
|
- gomp_clear_parent (child_task->children); |
|
|
+ |
|
|
+ gomp_clear_parent (&child_task->children_queue); |
|
|
+ |
|
|
gomp_task_run_post_remove_taskgroup (child_task); |
|
|
+ |
|
|
to_free = child_task; |
|
|
child_task = NULL; |
|
|
team->task_count--; |
|
|
@@ -887,10 +1455,20 @@ GOMP_taskwait (void) |
|
|
} |
|
|
} |
|
|
|
|
|
-/* This is like GOMP_taskwait, but we only wait for tasks that the |
|
|
- upcoming task depends on. */ |
|
|
+/* An undeferred task is about to run. Wait for all tasks that this |
|
|
+ undeferred task depends on. |
|
|
|
|
|
-static void |
|
|
+ This is done by first putting all known ready dependencies |
|
|
+ (dependencies that have their own dependencies met) at the top of |
|
|
+ the scheduling queues. Then we iterate through these imminently |
|
|
+ ready tasks (and possibly other high priority tasks), and run them. |
|
|
+ If we run out of ready dependencies to execute, we either wait for |
|
|
+ the reamining dependencies to finish, or wait for them to get |
|
|
+ scheduled so we can run them. |
|
|
+ |
|
|
+ DEPEND is as in GOMP_task. */ |
|
|
+ |
|
|
+void |
|
|
gomp_task_maybe_wait_for_dependencies (void **depend) |
|
|
{ |
|
|
struct gomp_thread *thr = gomp_thread (); |
|
|
@@ -898,7 +1476,6 @@ gomp_task_maybe_wait_for_dependencies (v |
|
|
struct gomp_team *team = thr->ts.team; |
|
|
struct gomp_task_depend_entry elem, *ent = NULL; |
|
|
struct gomp_taskwait taskwait; |
|
|
- struct gomp_task *last_parent_depends_on = NULL; |
|
|
size_t ndepend = (uintptr_t) depend[0]; |
|
|
size_t nout = (uintptr_t) depend[1]; |
|
|
size_t i; |
|
|
@@ -922,32 +1499,11 @@ gomp_task_maybe_wait_for_dependencies (v |
|
|
{ |
|
|
tsk->parent_depends_on = true; |
|
|
++num_awaited; |
|
|
+ /* If depenency TSK itself has no dependencies and is |
|
|
+ ready to run, move it up front so that we run it as |
|
|
+ soon as possible. */ |
|
|
if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING) |
|
|
- { |
|
|
- /* If a task we need to wait for is not already |
|
|
- running and is ready to be scheduled, move it |
|
|
- to front, so that we run it as soon as possible. */ |
|
|
- if (last_parent_depends_on) |
|
|
- { |
|
|
- tsk->prev_child->next_child = tsk->next_child; |
|
|
- tsk->next_child->prev_child = tsk->prev_child; |
|
|
- tsk->prev_child = last_parent_depends_on; |
|
|
- tsk->next_child = last_parent_depends_on->next_child; |
|
|
- tsk->prev_child->next_child = tsk; |
|
|
- tsk->next_child->prev_child = tsk; |
|
|
- } |
|
|
- else if (tsk != task->children) |
|
|
- { |
|
|
- tsk->prev_child->next_child = tsk->next_child; |
|
|
- tsk->next_child->prev_child = tsk->prev_child; |
|
|
- tsk->prev_child = task->children; |
|
|
- tsk->next_child = task->children->next_child; |
|
|
- task->children = tsk; |
|
|
- tsk->prev_child->next_child = tsk; |
|
|
- tsk->next_child->prev_child = tsk; |
|
|
- } |
|
|
- last_parent_depends_on = tsk; |
|
|
- } |
|
|
+ priority_queue_upgrade_task (tsk, task); |
|
|
} |
|
|
} |
|
|
} |
|
|
@@ -959,7 +1515,6 @@ gomp_task_maybe_wait_for_dependencies (v |
|
|
|
|
|
memset (&taskwait, 0, sizeof (taskwait)); |
|
|
taskwait.n_depend = num_awaited; |
|
|
- taskwait.last_parent_depends_on = last_parent_depends_on; |
|
|
gomp_sem_init (&taskwait.taskwait_sem, 0); |
|
|
task->taskwait = &taskwait; |
|
|
|
|
|
@@ -978,12 +1533,30 @@ gomp_task_maybe_wait_for_dependencies (v |
|
|
gomp_sem_destroy (&taskwait.taskwait_sem); |
|
|
return; |
|
|
} |
|
|
- if (task->children->kind == GOMP_TASK_WAITING) |
|
|
+ |
|
|
+ /* Theoretically when we have multiple priorities, we should |
|
|
+ chose between the highest priority item in |
|
|
+ task->children_queue and team->task_queue here, so we should |
|
|
+ use priority_queue_next_task(). However, since we are |
|
|
+ running an undeferred task, perhaps that makes all tasks it |
|
|
+ depends on undeferred, thus a priority of INF? This would |
|
|
+ make it unnecessary to take anything into account here, |
|
|
+ but the dependencies. |
|
|
+ |
|
|
+ On the other hand, if we want to use priority_queue_next_task(), |
|
|
+ care should be taken to only use priority_queue_remove() |
|
|
+ below if the task was actually removed from the children |
|
|
+ queue. */ |
|
|
+ bool ignored; |
|
|
+ struct gomp_task *next_task |
|
|
+ = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, |
|
|
+ PQ_IGNORED, NULL, &ignored); |
|
|
+ |
|
|
+ if (next_task->kind == GOMP_TASK_WAITING) |
|
|
{ |
|
|
- child_task = task->children; |
|
|
+ child_task = next_task; |
|
|
cancelled |
|
|
- = gomp_task_run_pre (child_task, task, child_task->taskgroup, |
|
|
- team); |
|
|
+ = gomp_task_run_pre (child_task, task, team); |
|
|
if (__builtin_expect (cancelled, 0)) |
|
|
{ |
|
|
if (to_free) |
|
|
@@ -996,8 +1569,10 @@ gomp_task_maybe_wait_for_dependencies (v |
|
|
} |
|
|
} |
|
|
else |
|
|
- /* All tasks we are waiting for are already running |
|
|
- in other threads. Wait for them. */ |
|
|
+ /* All tasks we are waiting for are either running in other |
|
|
+ threads, or they are tasks that have not had their |
|
|
+ dependencies met (so they're not even in the queue). Wait |
|
|
+ for them. */ |
|
|
taskwait.in_depend_wait = true; |
|
|
gomp_mutex_unlock (&team->task_lock); |
|
|
if (do_wake) |
|
|
@@ -1014,7 +1589,28 @@ gomp_task_maybe_wait_for_dependencies (v |
|
|
if (child_task) |
|
|
{ |
|
|
thr->task = child_task; |
|
|
- child_task->fn (child_task->fn_data); |
|
|
+ if (__builtin_expect (child_task->fn == NULL, 0)) |
|
|
+ { |
|
|
+ if (gomp_target_task_fn (child_task->fn_data)) |
|
|
+ { |
|
|
+ thr->task = task; |
|
|
+ gomp_mutex_lock (&team->task_lock); |
|
|
+ child_task->kind = GOMP_TASK_ASYNC_RUNNING; |
|
|
+ struct gomp_target_task *ttask |
|
|
+ = (struct gomp_target_task *) child_task->fn_data; |
|
|
+ /* If GOMP_PLUGIN_target_task_completion has run already |
|
|
+ in between gomp_target_task_fn and the mutex lock, |
|
|
+ perform the requeuing here. */ |
|
|
+ if (ttask->state == GOMP_TARGET_TASK_FINISHED) |
|
|
+ gomp_target_task_completion (team, child_task); |
|
|
+ else |
|
|
+ ttask->state = GOMP_TARGET_TASK_RUNNING; |
|
|
+ child_task = NULL; |
|
|
+ continue; |
|
|
+ } |
|
|
+ } |
|
|
+ else |
|
|
+ child_task->fn (child_task->fn_data); |
|
|
thr->task = task; |
|
|
} |
|
|
else |
|
|
@@ -1027,16 +1623,13 @@ gomp_task_maybe_wait_for_dependencies (v |
|
|
= gomp_task_run_post_handle_depend (child_task, team); |
|
|
if (child_task->parent_depends_on) |
|
|
--taskwait.n_depend; |
|
|
- child_task->prev_child->next_child = child_task->next_child; |
|
|
- child_task->next_child->prev_child = child_task->prev_child; |
|
|
- if (task->children == child_task) |
|
|
- { |
|
|
- if (child_task->next_child != child_task) |
|
|
- task->children = child_task->next_child; |
|
|
- else |
|
|
- task->children = NULL; |
|
|
- } |
|
|
- gomp_clear_parent (child_task->children); |
|
|
+ |
|
|
+ priority_queue_remove (PQ_CHILDREN, &task->children_queue, |
|
|
+ child_task, MEMMODEL_RELAXED); |
|
|
+ child_task->pnode[PQ_CHILDREN].next = NULL; |
|
|
+ child_task->pnode[PQ_CHILDREN].prev = NULL; |
|
|
+ |
|
|
+ gomp_clear_parent (&child_task->children_queue); |
|
|
gomp_task_run_post_remove_taskgroup (child_task); |
|
|
to_free = child_task; |
|
|
child_task = NULL; |
|
|
@@ -1069,14 +1662,14 @@ GOMP_taskgroup_start (void) |
|
|
struct gomp_taskgroup *taskgroup; |
|
|
|
|
|
/* If team is NULL, all tasks are executed as |
|
|
- GOMP_TASK_IFFALSE tasks and thus all children tasks of |
|
|
+ GOMP_TASK_UNDEFERRED tasks and thus all children tasks of |
|
|
taskgroup and their descendant tasks will be finished |
|
|
by the time GOMP_taskgroup_end is called. */ |
|
|
if (team == NULL) |
|
|
return; |
|
|
taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup)); |
|
|
taskgroup->prev = task->taskgroup; |
|
|
- taskgroup->children = NULL; |
|
|
+ priority_queue_init (&taskgroup->taskgroup_queue); |
|
|
taskgroup->in_taskgroup_wait = false; |
|
|
taskgroup->cancelled = false; |
|
|
taskgroup->num_children = 0; |
|
|
@@ -1098,6 +1691,17 @@ GOMP_taskgroup_end (void) |
|
|
if (team == NULL) |
|
|
return; |
|
|
taskgroup = task->taskgroup; |
|
|
+ if (__builtin_expect (taskgroup == NULL, 0) |
|
|
+ && thr->ts.level == 0) |
|
|
+ { |
|
|
+ /* This can happen if GOMP_taskgroup_start is called when |
|
|
+ thr->ts.team == NULL, but inside of the taskgroup there |
|
|
+ is #pragma omp target nowait that creates an implicit |
|
|
+ team with a single thread. In this case, we want to wait |
|
|
+ for all outstanding tasks in this team. */ |
|
|
+ gomp_team_barrier_wait (&team->barrier); |
|
|
+ return; |
|
|
+ } |
|
|
|
|
|
/* The acquire barrier on load of taskgroup->num_children here |
|
|
synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup. |
|
|
@@ -1108,19 +1712,25 @@ GOMP_taskgroup_end (void) |
|
|
if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0) |
|
|
goto finish; |
|
|
|
|
|
+ bool unused; |
|
|
gomp_mutex_lock (&team->task_lock); |
|
|
while (1) |
|
|
{ |
|
|
bool cancelled = false; |
|
|
- if (taskgroup->children == NULL) |
|
|
+ if (priority_queue_empty_p (&taskgroup->taskgroup_queue, |
|
|
+ MEMMODEL_RELAXED)) |
|
|
{ |
|
|
if (taskgroup->num_children) |
|
|
{ |
|
|
- if (task->children == NULL) |
|
|
+ if (priority_queue_empty_p (&task->children_queue, |
|
|
+ MEMMODEL_RELAXED)) |
|
|
goto do_wait; |
|
|
- child_task = task->children; |
|
|
- } |
|
|
- else |
|
|
+ child_task |
|
|
+ = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, |
|
|
+ PQ_TEAM, &team->task_queue, |
|
|
+ &unused); |
|
|
+ } |
|
|
+ else |
|
|
{ |
|
|
gomp_mutex_unlock (&team->task_lock); |
|
|
if (to_free) |
|
|
@@ -1132,12 +1742,13 @@ GOMP_taskgroup_end (void) |
|
|
} |
|
|
} |
|
|
else |
|
|
- child_task = taskgroup->children; |
|
|
+ child_task |
|
|
+ = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue, |
|
|
+ PQ_TEAM, &team->task_queue, &unused); |
|
|
if (child_task->kind == GOMP_TASK_WAITING) |
|
|
{ |
|
|
cancelled |
|
|
- = gomp_task_run_pre (child_task, child_task->parent, taskgroup, |
|
|
- team); |
|
|
+ = gomp_task_run_pre (child_task, child_task->parent, team); |
|
|
if (__builtin_expect (cancelled, 0)) |
|
|
{ |
|
|
if (to_free) |
|
|
@@ -1153,8 +1764,10 @@ GOMP_taskgroup_end (void) |
|
|
{ |
|
|
child_task = NULL; |
|
|
do_wait: |
|
|
- /* All tasks we are waiting for are already running |
|
|
- in other threads. Wait for them. */ |
|
|
+ /* All tasks we are waiting for are either running in other |
|
|
+ threads, or they are tasks that have not had their |
|
|
+ dependencies met (so they're not even in the queue). Wait |
|
|
+ for them. */ |
|
|
taskgroup->in_taskgroup_wait = true; |
|
|
} |
|
|
gomp_mutex_unlock (&team->task_lock); |
|
|
@@ -1172,7 +1785,28 @@ GOMP_taskgroup_end (void) |
|
|
if (child_task) |
|
|
{ |
|
|
thr->task = child_task; |
|
|
- child_task->fn (child_task->fn_data); |
|
|
+ if (__builtin_expect (child_task->fn == NULL, 0)) |
|
|
+ { |
|
|
+ if (gomp_target_task_fn (child_task->fn_data)) |
|
|
+ { |
|
|
+ thr->task = task; |
|
|
+ gomp_mutex_lock (&team->task_lock); |
|
|
+ child_task->kind = GOMP_TASK_ASYNC_RUNNING; |
|
|
+ struct gomp_target_task *ttask |
|
|
+ = (struct gomp_target_task *) child_task->fn_data; |
|
|
+ /* If GOMP_PLUGIN_target_task_completion has run already |
|
|
+ in between gomp_target_task_fn and the mutex lock, |
|
|
+ perform the requeuing here. */ |
|
|
+ if (ttask->state == GOMP_TARGET_TASK_FINISHED) |
|
|
+ gomp_target_task_completion (team, child_task); |
|
|
+ else |
|
|
+ ttask->state = GOMP_TARGET_TASK_RUNNING; |
|
|
+ child_task = NULL; |
|
|
+ continue; |
|
|
+ } |
|
|
+ } |
|
|
+ else |
|
|
+ child_task->fn (child_task->fn_data); |
|
|
thr->task = task; |
|
|
} |
|
|
else |
|
|
@@ -1184,7 +1818,7 @@ GOMP_taskgroup_end (void) |
|
|
size_t new_tasks |
|
|
= gomp_task_run_post_handle_depend (child_task, team); |
|
|
gomp_task_run_post_remove_parent (child_task); |
|
|
- gomp_clear_parent (child_task->children); |
|
|
+ gomp_clear_parent (&child_task->children_queue); |
|
|
gomp_task_run_post_remove_taskgroup (child_task); |
|
|
to_free = child_task; |
|
|
child_task = NULL; |
|
|
--- libgomp/libgomp_g.h.jj 2014-05-15 10:56:31.429532978 +0200 |
|
|
+++ libgomp/libgomp_g.h 2016-07-13 16:57:04.422535521 +0200 |
|
|
@@ -29,6 +29,7 @@ |
|
|
#define LIBGOMP_G_H 1 |
|
|
|
|
|
#include <stdbool.h> |
|
|
+#include <stddef.h> |
|
|
|
|
|
/* barrier.c */ |
|
|
|
|
|
@@ -50,6 +51,10 @@ extern bool GOMP_loop_static_start (long |
|
|
extern bool GOMP_loop_dynamic_start (long, long, long, long, long *, long *); |
|
|
extern bool GOMP_loop_guided_start (long, long, long, long, long *, long *); |
|
|
extern bool GOMP_loop_runtime_start (long, long, long, long *, long *); |
|
|
+extern bool GOMP_loop_nonmonotonic_dynamic_start (long, long, long, long, |
|
|
+ long *, long *); |
|
|
+extern bool GOMP_loop_nonmonotonic_guided_start (long, long, long, long, |
|
|
+ long *, long *); |
|
|
|
|
|
extern bool GOMP_loop_ordered_static_start (long, long, long, long, |
|
|
long *, long *); |
|
|
@@ -63,12 +68,23 @@ extern bool GOMP_loop_static_next (long |
|
|
extern bool GOMP_loop_dynamic_next (long *, long *); |
|
|
extern bool GOMP_loop_guided_next (long *, long *); |
|
|
extern bool GOMP_loop_runtime_next (long *, long *); |
|
|
+extern bool GOMP_loop_nonmonotonic_dynamic_next (long *, long *); |
|
|
+extern bool GOMP_loop_nonmonotonic_guided_next (long *, long *); |
|
|
|
|
|
extern bool GOMP_loop_ordered_static_next (long *, long *); |
|
|
extern bool GOMP_loop_ordered_dynamic_next (long *, long *); |
|
|
extern bool GOMP_loop_ordered_guided_next (long *, long *); |
|
|
extern bool GOMP_loop_ordered_runtime_next (long *, long *); |
|
|
|
|
|
+extern bool GOMP_loop_doacross_static_start (unsigned, long *, long, long *, |
|
|
+ long *); |
|
|
+extern bool GOMP_loop_doacross_dynamic_start (unsigned, long *, long, long *, |
|
|
+ long *); |
|
|
+extern bool GOMP_loop_doacross_guided_start (unsigned, long *, long, long *, |
|
|
+ long *); |
|
|
+extern bool GOMP_loop_doacross_runtime_start (unsigned, long *, long *, |
|
|
+ long *); |
|
|
+ |
|
|
extern void GOMP_parallel_loop_static_start (void (*)(void *), void *, |
|
|
unsigned, long, long, long, long); |
|
|
extern void GOMP_parallel_loop_dynamic_start (void (*)(void *), void *, |
|
|
@@ -89,6 +105,12 @@ extern void GOMP_parallel_loop_guided (v |
|
|
extern void GOMP_parallel_loop_runtime (void (*)(void *), void *, |
|
|
unsigned, long, long, long, |
|
|
unsigned); |
|
|
+extern void GOMP_parallel_loop_nonmonotonic_dynamic (void (*)(void *), void *, |
|
|
+ unsigned, long, long, |
|
|
+ long, long, unsigned); |
|
|
+extern void GOMP_parallel_loop_nonmonotonic_guided (void (*)(void *), void *, |
|
|
+ unsigned, long, long, |
|
|
+ long, long, unsigned); |
|
|
|
|
|
extern void GOMP_loop_end (void); |
|
|
extern void GOMP_loop_end_nowait (void); |
|
|
@@ -119,6 +141,18 @@ extern bool GOMP_loop_ull_runtime_start |
|
|
unsigned long long, |
|
|
unsigned long long *, |
|
|
unsigned long long *); |
|
|
+extern bool GOMP_loop_ull_nonmonotonic_dynamic_start (bool, unsigned long long, |
|
|
+ unsigned long long, |
|
|
+ unsigned long long, |
|
|
+ unsigned long long, |
|
|
+ unsigned long long *, |
|
|
+ unsigned long long *); |
|
|
+extern bool GOMP_loop_ull_nonmonotonic_guided_start (bool, unsigned long long, |
|
|
+ unsigned long long, |
|
|
+ unsigned long long, |
|
|
+ unsigned long long, |
|
|
+ unsigned long long *, |
|
|
+ unsigned long long *); |
|
|
|
|
|
extern bool GOMP_loop_ull_ordered_static_start (bool, unsigned long long, |
|
|
unsigned long long, |
|
|
@@ -152,6 +186,10 @@ extern bool GOMP_loop_ull_guided_next (u |
|
|
unsigned long long *); |
|
|
extern bool GOMP_loop_ull_runtime_next (unsigned long long *, |
|
|
unsigned long long *); |
|
|
+extern bool GOMP_loop_ull_nonmonotonic_dynamic_next (unsigned long long *, |
|
|
+ unsigned long long *); |
|
|
+extern bool GOMP_loop_ull_nonmonotonic_guided_next (unsigned long long *, |
|
|
+ unsigned long long *); |
|
|
|
|
|
extern bool GOMP_loop_ull_ordered_static_next (unsigned long long *, |
|
|
unsigned long long *); |
|
|
@@ -162,10 +200,34 @@ extern bool GOMP_loop_ull_ordered_guided |
|
|
extern bool GOMP_loop_ull_ordered_runtime_next (unsigned long long *, |
|
|
unsigned long long *); |
|
|
|
|
|
+extern bool GOMP_loop_ull_doacross_static_start (unsigned, |
|
|
+ unsigned long long *, |
|
|
+ unsigned long long, |
|
|
+ unsigned long long *, |
|
|
+ unsigned long long *); |
|
|
+extern bool GOMP_loop_ull_doacross_dynamic_start (unsigned, |
|
|
+ unsigned long long *, |
|
|
+ unsigned long long, |
|
|
+ unsigned long long *, |
|
|
+ unsigned long long *); |
|
|
+extern bool GOMP_loop_ull_doacross_guided_start (unsigned, |
|
|
+ unsigned long long *, |
|
|
+ unsigned long long, |
|
|
+ unsigned long long *, |
|
|
+ unsigned long long *); |
|
|
+extern bool GOMP_loop_ull_doacross_runtime_start (unsigned, |
|
|
+ unsigned long long *, |
|
|
+ unsigned long long *, |
|
|
+ unsigned long long *); |
|
|
+ |
|
|
/* ordered.c */ |
|
|
|
|
|
extern void GOMP_ordered_start (void); |
|
|
extern void GOMP_ordered_end (void); |
|
|
+extern void GOMP_doacross_post (long *); |
|
|
+extern void GOMP_doacross_wait (long, ...); |
|
|
+extern void GOMP_doacross_ull_post (unsigned long long *); |
|
|
+extern void GOMP_doacross_ull_wait (unsigned long long, ...); |
|
|
|
|
|
/* parallel.c */ |
|
|
|
|
|
@@ -178,7 +240,15 @@ extern bool GOMP_cancellation_point (int |
|
|
/* task.c */ |
|
|
|
|
|
extern void GOMP_task (void (*) (void *), void *, void (*) (void *, void *), |
|
|
- long, long, bool, unsigned, void **); |
|
|
+ long, long, bool, unsigned, void **, int); |
|
|
+extern void GOMP_taskloop (void (*) (void *), void *, |
|
|
+ void (*) (void *, void *), long, long, unsigned, |
|
|
+ unsigned long, int, long, long, long); |
|
|
+extern void GOMP_taskloop_ull (void (*) (void *), void *, |
|
|
+ void (*) (void *, void *), long, long, |
|
|
+ unsigned, unsigned long, int, |
|
|
+ unsigned long long, unsigned long long, |
|
|
+ unsigned long long); |
|
|
extern void GOMP_taskwait (void); |
|
|
extern void GOMP_taskyield (void); |
|
|
extern void GOMP_taskgroup_start (void); |
|
|
@@ -206,11 +276,38 @@ extern void GOMP_single_copy_end (void * |
|
|
|
|
|
extern void GOMP_target (int, void (*) (void *), const void *, |
|
|
size_t, void **, size_t *, unsigned char *); |
|
|
+extern void GOMP_target_ext (int, void (*) (void *), size_t, void **, size_t *, |
|
|
+ unsigned short *, unsigned int, void **, void **); |
|
|
extern void GOMP_target_data (int, const void *, |
|
|
size_t, void **, size_t *, unsigned char *); |
|
|
+extern void GOMP_target_data_ext (int, size_t, void **, size_t *, |
|
|
+ unsigned short *); |
|
|
extern void GOMP_target_end_data (void); |
|
|
extern void GOMP_target_update (int, const void *, |
|
|
size_t, void **, size_t *, unsigned char *); |
|
|
+extern void GOMP_target_update_ext (int, size_t, void **, size_t *, |
|
|
+ unsigned short *, unsigned int, void **); |
|
|
+extern void GOMP_target_enter_exit_data (int, size_t, void **, size_t *, |
|
|
+ unsigned short *, unsigned int, |
|
|
+ void **); |
|
|
extern void GOMP_teams (unsigned int, unsigned int); |
|
|
|
|
|
+/* oacc-parallel.c */ |
|
|
+ |
|
|
+extern void GOACC_parallel_keyed (int, void (*) (void *), size_t, |
|
|
+ void **, size_t *, unsigned short *, ...); |
|
|
+extern void GOACC_parallel (int, void (*) (void *), size_t, void **, size_t *, |
|
|
+ unsigned short *, int, int, int, int, int, ...); |
|
|
+extern void GOACC_data_start (int, size_t, void **, size_t *, |
|
|
+ unsigned short *); |
|
|
+extern void GOACC_data_end (void); |
|
|
+extern void GOACC_enter_exit_data (int, size_t, void **, |
|
|
+ size_t *, unsigned short *, int, int, ...); |
|
|
+extern void GOACC_update (int, size_t, void **, size_t *, |
|
|
+ unsigned short *, int, int, ...); |
|
|
+extern void GOACC_wait (int, int, ...); |
|
|
+extern int GOACC_get_num_threads (void); |
|
|
+extern int GOACC_get_thread_num (void); |
|
|
+extern void GOACC_declare (int, size_t, void **, size_t *, unsigned short *); |
|
|
+ |
|
|
#endif /* LIBGOMP_G_H */ |
|
|
--- libgomp/libgomp.h.jj 2014-08-01 15:59:49.145188127 +0200 |
|
|
+++ libgomp/libgomp.h 2016-07-14 17:40:24.038243456 +0200 |
|
|
@@ -34,12 +34,35 @@ |
|
|
#ifndef LIBGOMP_H |
|
|
#define LIBGOMP_H 1 |
|
|
|
|
|
+#ifndef _LIBGOMP_CHECKING_ |
|
|
+/* Define to 1 to perform internal sanity checks. */ |
|
|
+#define _LIBGOMP_CHECKING_ 0 |
|
|
+#endif |
|
|
+ |
|
|
#include "config.h" |
|
|
#include "gstdint.h" |
|
|
+#include "libgomp-plugin.h" |
|
|
|
|
|
#include <pthread.h> |
|
|
#include <stdbool.h> |
|
|
#include <stdlib.h> |
|
|
+#include <stdarg.h> |
|
|
+ |
|
|
+/* Needed for memset in priority_queue.c. */ |
|
|
+#if _LIBGOMP_CHECKING_ |
|
|
+# ifdef STRING_WITH_STRINGS |
|
|
+# include <string.h> |
|
|
+# include <strings.h> |
|
|
+# else |
|
|
+# ifdef HAVE_STRING_H |
|
|
+# include <string.h> |
|
|
+# else |
|
|
+# ifdef HAVE_STRINGS_H |
|
|
+# include <strings.h> |
|
|
+# endif |
|
|
+# endif |
|
|
+# endif |
|
|
+#endif |
|
|
|
|
|
#ifdef HAVE_ATTRIBUTE_VISIBILITY |
|
|
# pragma GCC visibility push(hidden) |
|
|
@@ -56,6 +79,44 @@ enum memmodel |
|
|
MEMMODEL_SEQ_CST = 5 |
|
|
}; |
|
|
|
|
|
+/* alloc.c */ |
|
|
+ |
|
|
+extern void *gomp_malloc (size_t) __attribute__((malloc)); |
|
|
+extern void *gomp_malloc_cleared (size_t) __attribute__((malloc)); |
|
|
+extern void *gomp_realloc (void *, size_t); |
|
|
+ |
|
|
+/* Avoid conflicting prototypes of alloca() in system headers by using |
|
|
+ GCC's builtin alloca(). */ |
|
|
+#define gomp_alloca(x) __builtin_alloca(x) |
|
|
+ |
|
|
+/* error.c */ |
|
|
+ |
|
|
+extern void gomp_vdebug (int, const char *, va_list); |
|
|
+extern void gomp_debug (int, const char *, ...) |
|
|
+ __attribute__ ((format (printf, 2, 3))); |
|
|
+#define gomp_vdebug(KIND, FMT, VALIST) \ |
|
|
+ do { \ |
|
|
+ if (__builtin_expect (gomp_debug_var, 0)) \ |
|
|
+ (gomp_vdebug) ((KIND), (FMT), (VALIST)); \ |
|
|
+ } while (0) |
|
|
+#define gomp_debug(KIND, ...) \ |
|
|
+ do { \ |
|
|
+ if (__builtin_expect (gomp_debug_var, 0)) \ |
|
|
+ (gomp_debug) ((KIND), __VA_ARGS__); \ |
|
|
+ } while (0) |
|
|
+extern void gomp_verror (const char *, va_list); |
|
|
+extern void gomp_error (const char *, ...) |
|
|
+ __attribute__ ((format (printf, 1, 2))); |
|
|
+extern void gomp_vfatal (const char *, va_list) |
|
|
+ __attribute__ ((noreturn)); |
|
|
+extern void gomp_fatal (const char *, ...) |
|
|
+ __attribute__ ((noreturn, format (printf, 1, 2))); |
|
|
+ |
|
|
+struct gomp_task; |
|
|
+struct gomp_taskgroup; |
|
|
+struct htab; |
|
|
+ |
|
|
+#include "priority_queue.h" |
|
|
#include "sem.h" |
|
|
#include "mutex.h" |
|
|
#include "bar.h" |
|
|
@@ -74,6 +135,44 @@ enum gomp_schedule_type |
|
|
GFS_AUTO |
|
|
}; |
|
|
|
|
|
+struct gomp_doacross_work_share |
|
|
+{ |
|
|
+ union { |
|
|
+ /* chunk_size copy, as ws->chunk_size is multiplied by incr for |
|
|
+ GFS_DYNAMIC. */ |
|
|
+ long chunk_size; |
|
|
+ /* Likewise, but for ull implementation. */ |
|
|
+ unsigned long long chunk_size_ull; |
|
|
+ /* For schedule(static,0) this is the number |
|
|
+ of iterations assigned to the last thread, i.e. number of |
|
|
+ iterations / number of threads. */ |
|
|
+ long q; |
|
|
+ /* Likewise, but for ull implementation. */ |
|
|
+ unsigned long long q_ull; |
|
|
+ }; |
|
|
+ /* Size of each array entry (padded to cache line size). */ |
|
|
+ unsigned long elt_sz; |
|
|
+ /* Number of dimensions in sink vectors. */ |
|
|
+ unsigned int ncounts; |
|
|
+ /* True if the iterations can be flattened. */ |
|
|
+ bool flattened; |
|
|
+ /* Actual array (of elt_sz sized units), aligned to cache line size. |
|
|
+ This is indexed by team_id for GFS_STATIC and outermost iteration |
|
|
+ / chunk_size for other schedules. */ |
|
|
+ unsigned char *array; |
|
|
+ /* These two are only used for schedule(static,0). */ |
|
|
+ /* This one is number of iterations % number of threads. */ |
|
|
+ long t; |
|
|
+ union { |
|
|
+ /* And this one is cached t * (q + 1). */ |
|
|
+ long boundary; |
|
|
+ /* Likewise, but for the ull implementation. */ |
|
|
+ unsigned long long boundary_ull; |
|
|
+ }; |
|
|
+ /* Array of shift counts for each dimension if they can be flattened. */ |
|
|
+ unsigned int shift_counts[]; |
|
|
+}; |
|
|
+ |
|
|
struct gomp_work_share |
|
|
{ |
|
|
/* This member records the SCHEDULE clause to be used for this construct. |
|
|
@@ -105,13 +204,18 @@ struct gomp_work_share |
|
|
}; |
|
|
}; |
|
|
|
|
|
- /* This is a circular queue that details which threads will be allowed |
|
|
- into the ordered region and in which order. When a thread allocates |
|
|
- iterations on which it is going to work, it also registers itself at |
|
|
- the end of the array. When a thread reaches the ordered region, it |
|
|
- checks to see if it is the one at the head of the queue. If not, it |
|
|
- blocks on its RELEASE semaphore. */ |
|
|
- unsigned *ordered_team_ids; |
|
|
+ union { |
|
|
+ /* This is a circular queue that details which threads will be allowed |
|
|
+ into the ordered region and in which order. When a thread allocates |
|
|
+ iterations on which it is going to work, it also registers itself at |
|
|
+ the end of the array. When a thread reaches the ordered region, it |
|
|
+ checks to see if it is the one at the head of the queue. If not, it |
|
|
+ blocks on its RELEASE semaphore. */ |
|
|
+ unsigned *ordered_team_ids; |
|
|
+ |
|
|
+ /* This is a pointer to DOACROSS work share data. */ |
|
|
+ struct gomp_doacross_work_share *doacross; |
|
|
+ }; |
|
|
|
|
|
/* This is the number of threads that have registered themselves in |
|
|
the circular queue ordered_team_ids. */ |
|
|
@@ -230,7 +334,7 @@ struct gomp_task_icv |
|
|
{ |
|
|
unsigned long nthreads_var; |
|
|
enum gomp_schedule_type run_sched_var; |
|
|
- int run_sched_modifier; |
|
|
+ int run_sched_chunk_size; |
|
|
int default_device_var; |
|
|
unsigned int thread_limit_var; |
|
|
bool dyn_var; |
|
|
@@ -246,6 +350,7 @@ extern gomp_mutex_t gomp_managed_threads |
|
|
#endif |
|
|
extern unsigned long gomp_max_active_levels_var; |
|
|
extern bool gomp_cancel_var; |
|
|
+extern int gomp_max_task_priority_var; |
|
|
extern unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var; |
|
|
extern unsigned long gomp_available_cpus, gomp_managed_threads; |
|
|
extern unsigned long *gomp_nthreads_var_list, gomp_nthreads_var_list_len; |
|
|
@@ -253,25 +358,36 @@ extern char *gomp_bind_var_list; |
|
|
extern unsigned long gomp_bind_var_list_len; |
|
|
extern void **gomp_places_list; |
|
|
extern unsigned long gomp_places_list_len; |
|
|
+extern int gomp_debug_var; |
|
|
+extern int goacc_device_num; |
|
|
+extern char *goacc_device_type; |
|
|
|
|
|
enum gomp_task_kind |
|
|
{ |
|
|
+ /* Implicit task. */ |
|
|
GOMP_TASK_IMPLICIT, |
|
|
- GOMP_TASK_IFFALSE, |
|
|
+ /* Undeferred task. */ |
|
|
+ GOMP_TASK_UNDEFERRED, |
|
|
+ /* Task created by GOMP_task and waiting to be run. */ |
|
|
GOMP_TASK_WAITING, |
|
|
- GOMP_TASK_TIED |
|
|
+ /* Task currently executing or scheduled and about to execute. */ |
|
|
+ GOMP_TASK_TIED, |
|
|
+ /* Used for target tasks that have vars mapped and async run started, |
|
|
+ but not yet completed. Once that completes, they will be readded |
|
|
+ into the queues as GOMP_TASK_WAITING in order to perform the var |
|
|
+ unmapping. */ |
|
|
+ GOMP_TASK_ASYNC_RUNNING |
|
|
}; |
|
|
|
|
|
-struct gomp_task; |
|
|
-struct gomp_taskgroup; |
|
|
-struct htab; |
|
|
- |
|
|
struct gomp_task_depend_entry |
|
|
{ |
|
|
+ /* Address of dependency. */ |
|
|
void *addr; |
|
|
struct gomp_task_depend_entry *next; |
|
|
struct gomp_task_depend_entry *prev; |
|
|
+ /* Task that provides the dependency in ADDR. */ |
|
|
struct gomp_task *task; |
|
|
+ /* Depend entry is of type "IN". */ |
|
|
bool is_in; |
|
|
bool redundant; |
|
|
bool redundant_out; |
|
|
@@ -290,8 +406,8 @@ struct gomp_taskwait |
|
|
{ |
|
|
bool in_taskwait; |
|
|
bool in_depend_wait; |
|
|
+ /* Number of tasks we are waiting for. */ |
|
|
size_t n_depend; |
|
|
- struct gomp_task *last_parent_depends_on; |
|
|
gomp_sem_t taskwait_sem; |
|
|
}; |
|
|
|
|
|
@@ -299,20 +415,31 @@ struct gomp_taskwait |
|
|
|
|
|
struct gomp_task |
|
|
{ |
|
|
+ /* Parent of this task. */ |
|
|
struct gomp_task *parent; |
|
|
- struct gomp_task *children; |
|
|
- struct gomp_task *next_child; |
|
|
- struct gomp_task *prev_child; |
|
|
- struct gomp_task *next_queue; |
|
|
- struct gomp_task *prev_queue; |
|
|
- struct gomp_task *next_taskgroup; |
|
|
- struct gomp_task *prev_taskgroup; |
|
|
+ /* Children of this task. */ |
|
|
+ struct priority_queue children_queue; |
|
|
+ /* Taskgroup this task belongs in. */ |
|
|
struct gomp_taskgroup *taskgroup; |
|
|
+ /* Tasks that depend on this task. */ |
|
|
struct gomp_dependers_vec *dependers; |
|
|
struct htab *depend_hash; |
|
|
struct gomp_taskwait *taskwait; |
|
|
+ /* Number of items in DEPEND. */ |
|
|
size_t depend_count; |
|
|
+ /* Number of tasks this task depends on. Once this counter reaches |
|
|
+ 0, we have no unsatisfied dependencies, and this task can be put |
|
|
+ into the various queues to be scheduled. */ |
|
|
size_t num_dependees; |
|
|
+ |
|
|
+ /* Priority of this task. */ |
|
|
+ int priority; |
|
|
+ /* The priority node for this task in each of the different queues. |
|
|
+ We put this here to avoid allocating space for each priority |
|
|
+ node. Then we play offsetof() games to convert between pnode[] |
|
|
+ entries and the gomp_task in which they reside. */ |
|
|
+ struct priority_node pnode[3]; |
|
|
+ |
|
|
struct gomp_task_icv icv; |
|
|
void (*fn) (void *); |
|
|
void *fn_data; |
|
|
@@ -320,20 +447,58 @@ struct gomp_task |
|
|
bool in_tied_task; |
|
|
bool final_task; |
|
|
bool copy_ctors_done; |
|
|
+ /* Set for undeferred tasks with unsatisfied dependencies which |
|
|
+ block further execution of their parent until the dependencies |
|
|
+ are satisfied. */ |
|
|
bool parent_depends_on; |
|
|
+ /* Dependencies provided and/or needed for this task. DEPEND_COUNT |
|
|
+ is the number of items available. */ |
|
|
struct gomp_task_depend_entry depend[]; |
|
|
}; |
|
|
|
|
|
+/* This structure describes a single #pragma omp taskgroup. */ |
|
|
+ |
|
|
struct gomp_taskgroup |
|
|
{ |
|
|
struct gomp_taskgroup *prev; |
|
|
- struct gomp_task *children; |
|
|
+ /* Queue of tasks that belong in this taskgroup. */ |
|
|
+ struct priority_queue taskgroup_queue; |
|
|
bool in_taskgroup_wait; |
|
|
bool cancelled; |
|
|
gomp_sem_t taskgroup_sem; |
|
|
size_t num_children; |
|
|
}; |
|
|
|
|
|
+/* Various state of OpenMP async offloading tasks. */ |
|
|
+enum gomp_target_task_state |
|
|
+{ |
|
|
+ GOMP_TARGET_TASK_DATA, |
|
|
+ GOMP_TARGET_TASK_BEFORE_MAP, |
|
|
+ GOMP_TARGET_TASK_FALLBACK, |
|
|
+ GOMP_TARGET_TASK_READY_TO_RUN, |
|
|
+ GOMP_TARGET_TASK_RUNNING, |
|
|
+ GOMP_TARGET_TASK_FINISHED |
|
|
+}; |
|
|
+ |
|
|
+/* This structure describes a target task. */ |
|
|
+ |
|
|
+struct gomp_target_task |
|
|
+{ |
|
|
+ struct gomp_device_descr *devicep; |
|
|
+ void (*fn) (void *); |
|
|
+ size_t mapnum; |
|
|
+ size_t *sizes; |
|
|
+ unsigned short *kinds; |
|
|
+ unsigned int flags; |
|
|
+ enum gomp_target_task_state state; |
|
|
+ struct target_mem_desc *tgt; |
|
|
+ struct gomp_task *task; |
|
|
+ struct gomp_team *team; |
|
|
+ /* Device-specific target arguments. */ |
|
|
+ void **args; |
|
|
+ void *hostaddrs[]; |
|
|
+}; |
|
|
+ |
|
|
/* This structure describes a "team" of threads. These are the threads |
|
|
that are spawned by a PARALLEL constructs, as well as the work sharing |
|
|
constructs that the team encounters. */ |
|
|
@@ -396,7 +561,8 @@ struct gomp_team |
|
|
struct gomp_work_share work_shares[8]; |
|
|
|
|
|
gomp_mutex_t task_lock; |
|
|
- struct gomp_task *task_queue; |
|
|
+ /* Scheduled tasks. */ |
|
|
+ struct priority_queue task_queue; |
|
|
/* Number of all GOMP_TASK_{WAITING,TIED} tasks in the team. */ |
|
|
unsigned int task_count; |
|
|
/* Number of GOMP_TASK_WAITING tasks currently waiting to be scheduled. */ |
|
|
@@ -451,6 +617,9 @@ struct gomp_thread_pool |
|
|
struct gomp_thread **threads; |
|
|
unsigned threads_size; |
|
|
unsigned threads_used; |
|
|
+ /* The last team is used for non-nested teams to delay their destruction to |
|
|
+ make sure all the threads in the team move on to the pool's barrier before |
|
|
+ the team's barrier is destroyed. */ |
|
|
struct gomp_team *last_team; |
|
|
/* Number of threads running in this contention group. */ |
|
|
unsigned long threads_busy; |
|
|
@@ -519,23 +688,7 @@ extern bool gomp_affinity_same_place (vo |
|
|
extern bool gomp_affinity_finalize_place_list (bool); |
|
|
extern bool gomp_affinity_init_level (int, unsigned long, bool); |
|
|
extern void gomp_affinity_print_place (void *); |
|
|
- |
|
|
-/* alloc.c */ |
|
|
- |
|
|
-extern void *gomp_malloc (size_t) __attribute__((malloc)); |
|
|
-extern void *gomp_malloc_cleared (size_t) __attribute__((malloc)); |
|
|
-extern void *gomp_realloc (void *, size_t); |
|
|
- |
|
|
-/* Avoid conflicting prototypes of alloca() in system headers by using |
|
|
- GCC's builtin alloca(). */ |
|
|
-#define gomp_alloca(x) __builtin_alloca(x) |
|
|
- |
|
|
-/* error.c */ |
|
|
- |
|
|
-extern void gomp_error (const char *, ...) |
|
|
- __attribute__((format (printf, 1, 2))); |
|
|
-extern void gomp_fatal (const char *, ...) |
|
|
- __attribute__((noreturn, format (printf, 1, 2))); |
|
|
+extern void gomp_get_place_proc_ids_8 (int, int64_t *); |
|
|
|
|
|
/* iter.c */ |
|
|
|
|
|
@@ -572,6 +725,9 @@ extern void gomp_ordered_next (void); |
|
|
extern void gomp_ordered_static_init (void); |
|
|
extern void gomp_ordered_static_next (void); |
|
|
extern void gomp_ordered_sync (void); |
|
|
+extern void gomp_doacross_init (unsigned, long *, long); |
|
|
+extern void gomp_doacross_ull_init (unsigned, unsigned long long *, |
|
|
+ unsigned long long); |
|
|
|
|
|
/* parallel.c */ |
|
|
|
|
|
@@ -588,6 +744,12 @@ extern void gomp_init_task (struct gomp_ |
|
|
struct gomp_task_icv *); |
|
|
extern void gomp_end_task (void); |
|
|
extern void gomp_barrier_handle_tasks (gomp_barrier_state_t); |
|
|
+extern void gomp_task_maybe_wait_for_dependencies (void **); |
|
|
+extern bool gomp_create_target_task (struct gomp_device_descr *, |
|
|
+ void (*) (void *), size_t, void **, |
|
|
+ size_t *, unsigned short *, unsigned int, |
|
|
+ void **, void **, |
|
|
+ enum gomp_target_task_state); |
|
|
|
|
|
static void inline |
|
|
gomp_finish_task (struct gomp_task *task) |
|
|
@@ -606,7 +768,213 @@ extern void gomp_free_thread (void *); |
|
|
|
|
|
/* target.c */ |
|
|
|
|
|
+extern void gomp_init_targets_once (void); |
|
|
extern int gomp_get_num_devices (void); |
|
|
+extern bool gomp_target_task_fn (void *); |
|
|
+ |
|
|
+/* Splay tree definitions. */ |
|
|
+typedef struct splay_tree_node_s *splay_tree_node; |
|
|
+typedef struct splay_tree_s *splay_tree; |
|
|
+typedef struct splay_tree_key_s *splay_tree_key; |
|
|
+ |
|
|
+struct target_var_desc { |
|
|
+ /* Splay key. */ |
|
|
+ splay_tree_key key; |
|
|
+ /* True if data should be copied from device to host at the end. */ |
|
|
+ bool copy_from; |
|
|
+ /* True if data always should be copied from device to host at the end. */ |
|
|
+ bool always_copy_from; |
|
|
+ /* Relative offset against key host_start. */ |
|
|
+ uintptr_t offset; |
|
|
+ /* Actual length. */ |
|
|
+ uintptr_t length; |
|
|
+}; |
|
|
+ |
|
|
+struct target_mem_desc { |
|
|
+ /* Reference count. */ |
|
|
+ uintptr_t refcount; |
|
|
+ /* All the splay nodes allocated together. */ |
|
|
+ splay_tree_node array; |
|
|
+ /* Start of the target region. */ |
|
|
+ uintptr_t tgt_start; |
|
|
+ /* End of the targer region. */ |
|
|
+ uintptr_t tgt_end; |
|
|
+ /* Handle to free. */ |
|
|
+ void *to_free; |
|
|
+ /* Previous target_mem_desc. */ |
|
|
+ struct target_mem_desc *prev; |
|
|
+ /* Number of items in following list. */ |
|
|
+ size_t list_count; |
|
|
+ |
|
|
+ /* Corresponding target device descriptor. */ |
|
|
+ struct gomp_device_descr *device_descr; |
|
|
+ |
|
|
+ /* List of target items to remove (or decrease refcount) |
|
|
+ at the end of region. */ |
|
|
+ struct target_var_desc list[]; |
|
|
+}; |
|
|
+ |
|
|
+/* Special value for refcount - infinity. */ |
|
|
+#define REFCOUNT_INFINITY (~(uintptr_t) 0) |
|
|
+/* Special value for refcount - tgt_offset contains target address of the |
|
|
+ artificial pointer to "omp declare target link" object. */ |
|
|
+#define REFCOUNT_LINK (~(uintptr_t) 1) |
|
|
+ |
|
|
+struct splay_tree_key_s { |
|
|
+ /* Address of the host object. */ |
|
|
+ uintptr_t host_start; |
|
|
+ /* Address immediately after the host object. */ |
|
|
+ uintptr_t host_end; |
|
|
+ /* Descriptor of the target memory. */ |
|
|
+ struct target_mem_desc *tgt; |
|
|
+ /* Offset from tgt->tgt_start to the start of the target object. */ |
|
|
+ uintptr_t tgt_offset; |
|
|
+ /* Reference count. */ |
|
|
+ uintptr_t refcount; |
|
|
+ /* Pointer to the original mapping of "omp declare target link" object. */ |
|
|
+ splay_tree_key link_key; |
|
|
+}; |
|
|
+ |
|
|
+/* The comparison function. */ |
|
|
+ |
|
|
+static inline int |
|
|
+splay_compare (splay_tree_key x, splay_tree_key y) |
|
|
+{ |
|
|
+ if (x->host_start == x->host_end |
|
|
+ && y->host_start == y->host_end) |
|
|
+ return 0; |
|
|
+ if (x->host_end <= y->host_start) |
|
|
+ return -1; |
|
|
+ if (x->host_start >= y->host_end) |
|
|
+ return 1; |
|
|
+ return 0; |
|
|
+} |
|
|
+ |
|
|
+#include "splay-tree.h" |
|
|
+ |
|
|
+typedef struct acc_dispatch_t |
|
|
+{ |
|
|
+ /* This is a linked list of data mapped using the |
|
|
+ acc_map_data/acc_unmap_data or "acc enter data"/"acc exit data" pragmas. |
|
|
+ Unlike mapped_data in the goacc_thread struct, unmapping can |
|
|
+ happen out-of-order with respect to mapping. */ |
|
|
+ /* This is guarded by the lock in the "outer" struct gomp_device_descr. */ |
|
|
+ struct target_mem_desc *data_environ; |
|
|
+ |
|
|
+ /* Execute. */ |
|
|
+ void (*exec_func) (void (*) (void *), size_t, void **, void **, int, |
|
|
+ unsigned *, void *); |
|
|
+ |
|
|
+ /* Async cleanup callback registration. */ |
|
|
+ void (*register_async_cleanup_func) (void *, int); |
|
|
+ |
|
|
+ /* Asynchronous routines. */ |
|
|
+ int (*async_test_func) (int); |
|
|
+ int (*async_test_all_func) (void); |
|
|
+ void (*async_wait_func) (int); |
|
|
+ void (*async_wait_async_func) (int, int); |
|
|
+ void (*async_wait_all_func) (void); |
|
|
+ void (*async_wait_all_async_func) (int); |
|
|
+ void (*async_set_async_func) (int); |
|
|
+ |
|
|
+ /* Create/destroy TLS data. */ |
|
|
+ void *(*create_thread_data_func) (int); |
|
|
+ void (*destroy_thread_data_func) (void *); |
|
|
+ |
|
|
+ /* NVIDIA target specific routines. */ |
|
|
+ struct { |
|
|
+ void *(*get_current_device_func) (void); |
|
|
+ void *(*get_current_context_func) (void); |
|
|
+ void *(*get_stream_func) (int); |
|
|
+ int (*set_stream_func) (int, void *); |
|
|
+ } cuda; |
|
|
+} acc_dispatch_t; |
|
|
+ |
|
|
+/* Various state of the accelerator device. */ |
|
|
+enum gomp_device_state |
|
|
+{ |
|
|
+ GOMP_DEVICE_UNINITIALIZED, |
|
|
+ GOMP_DEVICE_INITIALIZED, |
|
|
+ GOMP_DEVICE_FINALIZED |
|
|
+}; |
|
|
+ |
|
|
+/* This structure describes accelerator device. |
|
|
+ It contains name of the corresponding libgomp plugin, function handlers for |
|
|
+ interaction with the device, ID-number of the device, and information about |
|
|
+ mapped memory. */ |
|
|
+struct gomp_device_descr |
|
|
+{ |
|
|
+ /* Immutable data, which is only set during initialization, and which is not |
|
|
+ guarded by the lock. */ |
|
|
+ |
|
|
+ /* The name of the device. */ |
|
|
+ const char *name; |
|
|
+ |
|
|
+ /* Capabilities of device (supports OpenACC, OpenMP). */ |
|
|
+ unsigned int capabilities; |
|
|
+ |
|
|
+ /* This is the ID number of device among devices of the same type. */ |
|
|
+ int target_id; |
|
|
+ |
|
|
+ /* This is the TYPE of device. */ |
|
|
+ enum offload_target_type type; |
|
|
+ |
|
|
+ /* Function handlers. */ |
|
|
+ const char *(*get_name_func) (void); |
|
|
+ unsigned int (*get_caps_func) (void); |
|
|
+ int (*get_type_func) (void); |
|
|
+ int (*get_num_devices_func) (void); |
|
|
+ bool (*init_device_func) (int); |
|
|
+ bool (*fini_device_func) (int); |
|
|
+ unsigned (*version_func) (void); |
|
|
+ int (*load_image_func) (int, unsigned, const void *, struct addr_pair **); |
|
|
+ bool (*unload_image_func) (int, unsigned, const void *); |
|
|
+ void *(*alloc_func) (int, size_t); |
|
|
+ bool (*free_func) (int, void *); |
|
|
+ bool (*dev2host_func) (int, void *, const void *, size_t); |
|
|
+ bool (*host2dev_func) (int, void *, const void *, size_t); |
|
|
+ bool (*dev2dev_func) (int, void *, const void *, size_t); |
|
|
+ bool (*can_run_func) (void *); |
|
|
+ void (*run_func) (int, void *, void *, void **); |
|
|
+ void (*async_run_func) (int, void *, void *, void **, void *); |
|
|
+ |
|
|
+ /* Splay tree containing information about mapped memory regions. */ |
|
|
+ struct splay_tree_s mem_map; |
|
|
+ |
|
|
+ /* Mutex for the mutable data. */ |
|
|
+ gomp_mutex_t lock; |
|
|
+ |
|
|
+ /* Current state of the device. OpenACC allows to move from INITIALIZED state |
|
|
+ back to UNINITIALIZED state. OpenMP allows only to move from INITIALIZED |
|
|
+ to FINALIZED state (at program shutdown). */ |
|
|
+ enum gomp_device_state state; |
|
|
+ |
|
|
+ /* OpenACC-specific data and functions. */ |
|
|
+ /* This is mutable because of its mutable data_environ and target_data |
|
|
+ members. */ |
|
|
+ acc_dispatch_t openacc; |
|
|
+}; |
|
|
+ |
|
|
+/* Kind of the pragma, for which gomp_map_vars () is called. */ |
|
|
+enum gomp_map_vars_kind |
|
|
+{ |
|
|
+ GOMP_MAP_VARS_OPENACC, |
|
|
+ GOMP_MAP_VARS_TARGET, |
|
|
+ GOMP_MAP_VARS_DATA, |
|
|
+ GOMP_MAP_VARS_ENTER_DATA |
|
|
+}; |
|
|
+ |
|
|
+extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *); |
|
|
+extern void gomp_acc_remove_pointer (void *, bool, int, int); |
|
|
+ |
|
|
+extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *, |
|
|
+ size_t, void **, void **, |
|
|
+ size_t *, void *, bool, |
|
|
+ enum gomp_map_vars_kind); |
|
|
+extern void gomp_unmap_vars (struct target_mem_desc *, bool); |
|
|
+extern void gomp_init_device (struct gomp_device_descr *); |
|
|
+extern void gomp_free_memmap (struct splay_tree_s *); |
|
|
+extern void gomp_unload_device (struct gomp_device_descr *); |
|
|
|
|
|
/* work.c */ |
|
|
|
|
|
@@ -646,8 +1014,28 @@ typedef enum omp_proc_bind_t |
|
|
omp_proc_bind_spread = 4 |
|
|
} omp_proc_bind_t; |
|
|
|
|
|
+typedef enum omp_lock_hint_t |
|
|
+{ |
|
|
+ omp_lock_hint_none = 0, |
|
|
+ omp_lock_hint_uncontended = 1, |
|
|
+ omp_lock_hint_contended = 2, |
|
|
+ omp_lock_hint_nonspeculative = 4, |
|
|
+ omp_lock_hint_speculative = 8, |
|
|
+} omp_lock_hint_t; |
|
|
+ |
|
|
+extern void omp_init_lock_with_hint (omp_lock_t *, omp_lock_hint_t) |
|
|
+ __GOMP_NOTHROW; |
|
|
+extern void omp_init_nest_lock_with_hint (omp_lock_t *, omp_lock_hint_t) |
|
|
+ __GOMP_NOTHROW; |
|
|
+ |
|
|
extern int omp_get_cancellation (void) __GOMP_NOTHROW; |
|
|
extern omp_proc_bind_t omp_get_proc_bind (void) __GOMP_NOTHROW; |
|
|
+extern int omp_get_num_places (void) __GOMP_NOTHROW; |
|
|
+extern int omp_get_place_num_procs (int) __GOMP_NOTHROW; |
|
|
+extern void omp_get_place_proc_ids (int, int *) __GOMP_NOTHROW; |
|
|
+extern int omp_get_place_num (void) __GOMP_NOTHROW; |
|
|
+extern int omp_get_partition_num_places (void) __GOMP_NOTHROW; |
|
|
+extern void omp_get_partition_place_nums (int *) __GOMP_NOTHROW; |
|
|
|
|
|
extern void omp_set_default_device (int) __GOMP_NOTHROW; |
|
|
extern int omp_get_default_device (void) __GOMP_NOTHROW; |
|
|
@@ -656,6 +1044,24 @@ extern int omp_get_num_teams (void) __GO |
|
|
extern int omp_get_team_num (void) __GOMP_NOTHROW; |
|
|
|
|
|
extern int omp_is_initial_device (void) __GOMP_NOTHROW; |
|
|
+extern int omp_get_initial_device (void) __GOMP_NOTHROW; |
|
|
+extern int omp_get_max_task_priority (void) __GOMP_NOTHROW; |
|
|
+ |
|
|
+extern void *omp_target_alloc (__SIZE_TYPE__, int) __GOMP_NOTHROW; |
|
|
+extern void omp_target_free (void *, int) __GOMP_NOTHROW; |
|
|
+extern int omp_target_is_present (void *, int) __GOMP_NOTHROW; |
|
|
+extern int omp_target_memcpy (void *, void *, __SIZE_TYPE__, __SIZE_TYPE__, |
|
|
+ __SIZE_TYPE__, int, int) __GOMP_NOTHROW; |
|
|
+extern int omp_target_memcpy_rect (void *, void *, __SIZE_TYPE__, int, |
|
|
+ const __SIZE_TYPE__ *, |
|
|
+ const __SIZE_TYPE__ *, |
|
|
+ const __SIZE_TYPE__ *, |
|
|
+ const __SIZE_TYPE__ *, |
|
|
+ const __SIZE_TYPE__ *, int, int) |
|
|
+ __GOMP_NOTHROW; |
|
|
+extern int omp_target_associate_ptr (void *, void *, __SIZE_TYPE__, |
|
|
+ __SIZE_TYPE__, int) __GOMP_NOTHROW; |
|
|
+extern int omp_target_disassociate_ptr (void *, int) __GOMP_NOTHROW; |
|
|
|
|
|
#if !defined (HAVE_ATTRIBUTE_VISIBILITY) \ |
|
|
|| !defined (HAVE_ATTRIBUTE_ALIAS) \ |
|
|
@@ -728,4 +1134,34 @@ extern int gomp_test_nest_lock_25 (omp_n |
|
|
# define ialias_call(fn) fn |
|
|
#endif |
|
|
|
|
|
+/* Helper function for priority_node_to_task() and |
|
|
+ task_to_priority_node(). |
|
|
+ |
|
|
+ Return the offset from a task to its priority_node entry. The |
|
|
+ priority_node entry is has a type of TYPE. */ |
|
|
+ |
|
|
+static inline size_t |
|
|
+priority_queue_offset (enum priority_queue_type type) |
|
|
+{ |
|
|
+ return offsetof (struct gomp_task, pnode[(int) type]); |
|
|
+} |
|
|
+ |
|
|
+/* Return the task associated with a priority NODE of type TYPE. */ |
|
|
+ |
|
|
+static inline struct gomp_task * |
|
|
+priority_node_to_task (enum priority_queue_type type, |
|
|
+ struct priority_node *node) |
|
|
+{ |
|
|
+ return (struct gomp_task *) ((char *) node - priority_queue_offset (type)); |
|
|
+} |
|
|
+ |
|
|
+/* Return the priority node of type TYPE for a given TASK. */ |
|
|
+ |
|
|
+static inline struct priority_node * |
|
|
+task_to_priority_node (enum priority_queue_type type, |
|
|
+ struct gomp_task *task) |
|
|
+{ |
|
|
+ return (struct priority_node *) ((char *) task |
|
|
+ + priority_queue_offset (type)); |
|
|
+} |
|
|
#endif /* LIBGOMP_H */ |
|
|
--- libgomp/env.c.jj 2014-05-15 10:56:32.420522486 +0200 |
|
|
+++ libgomp/env.c 2016-07-13 16:57:04.437535335 +0200 |
|
|
@@ -27,6 +27,8 @@ |
|
|
|
|
|
#include "libgomp.h" |
|
|
#include "libgomp_f.h" |
|
|
+#include "oacc-int.h" |
|
|
+#include "gomp-constants.h" |
|
|
#include <ctype.h> |
|
|
#include <stdlib.h> |
|
|
#include <stdio.h> |
|
|
@@ -56,7 +58,7 @@ struct gomp_task_icv gomp_global_icv = { |
|
|
.nthreads_var = 1, |
|
|
.thread_limit_var = UINT_MAX, |
|
|
.run_sched_var = GFS_DYNAMIC, |
|
|
- .run_sched_modifier = 1, |
|
|
+ .run_sched_chunk_size = 1, |
|
|
.default_device_var = 0, |
|
|
.dyn_var = false, |
|
|
.nest_var = false, |
|
|
@@ -66,6 +68,7 @@ struct gomp_task_icv gomp_global_icv = { |
|
|
|
|
|
unsigned long gomp_max_active_levels_var = INT_MAX; |
|
|
bool gomp_cancel_var = false; |
|
|
+int gomp_max_task_priority_var = 0; |
|
|
#ifndef HAVE_SYNC_BUILTINS |
|
|
gomp_mutex_t gomp_managed_threads_lock; |
|
|
#endif |
|
|
@@ -76,6 +79,9 @@ char *gomp_bind_var_list; |
|
|
unsigned long gomp_bind_var_list_len; |
|
|
void **gomp_places_list; |
|
|
unsigned long gomp_places_list_len; |
|
|
+int gomp_debug_var; |
|
|
+char *goacc_device_type; |
|
|
+int goacc_device_num; |
|
|
|
|
|
/* Parse the OMP_SCHEDULE environment variable. */ |
|
|
|
|
|
@@ -118,7 +124,7 @@ parse_schedule (void) |
|
|
++env; |
|
|
if (*env == '\0') |
|
|
{ |
|
|
- gomp_global_icv.run_sched_modifier |
|
|
+ gomp_global_icv.run_sched_chunk_size |
|
|
= gomp_global_icv.run_sched_var != GFS_STATIC; |
|
|
return; |
|
|
} |
|
|
@@ -144,7 +150,7 @@ parse_schedule (void) |
|
|
|
|
|
if (value == 0 && gomp_global_icv.run_sched_var != GFS_STATIC) |
|
|
value = 1; |
|
|
- gomp_global_icv.run_sched_modifier = value; |
|
|
+ gomp_global_icv.run_sched_chunk_size = value; |
|
|
return; |
|
|
|
|
|
unknown: |
|
|
@@ -1011,6 +1017,16 @@ parse_affinity (bool ignore) |
|
|
return false; |
|
|
} |
|
|
|
|
|
+static void |
|
|
+parse_acc_device_type (void) |
|
|
+{ |
|
|
+ const char *env = getenv ("ACC_DEVICE_TYPE"); |
|
|
+ |
|
|
+ if (env && *env != '\0') |
|
|
+ goacc_device_type = strdup (env); |
|
|
+ else |
|
|
+ goacc_device_type = NULL; |
|
|
+} |
|
|
|
|
|
static void |
|
|
handle_omp_display_env (unsigned long stacksize, int wait_policy) |
|
|
@@ -1054,7 +1070,7 @@ handle_omp_display_env (unsigned long st |
|
|
|
|
|
fputs ("\nOPENMP DISPLAY ENVIRONMENT BEGIN\n", stderr); |
|
|
|
|
|
- fputs (" _OPENMP = '201307'\n", stderr); |
|
|
+ fputs (" _OPENMP = '201511'\n", stderr); |
|
|
fprintf (stderr, " OMP_DYNAMIC = '%s'\n", |
|
|
gomp_global_icv.dyn_var ? "TRUE" : "FALSE"); |
|
|
fprintf (stderr, " OMP_NESTED = '%s'\n", |
|
|
@@ -1142,6 +1158,8 @@ handle_omp_display_env (unsigned long st |
|
|
gomp_cancel_var ? "TRUE" : "FALSE"); |
|
|
fprintf (stderr, " OMP_DEFAULT_DEVICE = '%d'\n", |
|
|
gomp_global_icv.default_device_var); |
|
|
+ fprintf (stderr, " OMP_MAX_TASK_PRIORITY = '%d'\n", |
|
|
+ gomp_max_task_priority_var); |
|
|
|
|
|
if (verbose) |
|
|
{ |
|
|
@@ -1174,6 +1192,7 @@ initialize_env (void) |
|
|
parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var); |
|
|
parse_boolean ("OMP_CANCELLATION", &gomp_cancel_var); |
|
|
parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true); |
|
|
+ parse_int ("OMP_MAX_TASK_PRIORITY", &gomp_max_task_priority_var, true); |
|
|
parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var, |
|
|
true); |
|
|
if (parse_unsigned_long ("OMP_THREAD_LIMIT", &thread_limit_var, false)) |
|
|
@@ -1181,6 +1200,7 @@ initialize_env (void) |
|
|
gomp_global_icv.thread_limit_var |
|
|
= thread_limit_var > INT_MAX ? UINT_MAX : thread_limit_var; |
|
|
} |
|
|
+ parse_int ("GOMP_DEBUG", &gomp_debug_var, true); |
|
|
#ifndef HAVE_SYNC_BUILTINS |
|
|
gomp_mutex_init (&gomp_managed_threads_lock); |
|
|
#endif |
|
|
@@ -1271,6 +1291,15 @@ initialize_env (void) |
|
|
} |
|
|
|
|
|
handle_omp_display_env (stacksize, wait_policy); |
|
|
+ |
|
|
+ /* OpenACC. */ |
|
|
+ |
|
|
+ if (!parse_int ("ACC_DEVICE_NUM", &goacc_device_num, true)) |
|
|
+ goacc_device_num = 0; |
|
|
+ |
|
|
+ parse_acc_device_type (); |
|
|
+ |
|
|
+ goacc_runtime_initialize (); |
|
|
} |
|
|
|
|
|
|
|
|
@@ -1312,21 +1341,21 @@ omp_get_nested (void) |
|
|
} |
|
|
|
|
|
void |
|
|
-omp_set_schedule (omp_sched_t kind, int modifier) |
|
|
+omp_set_schedule (omp_sched_t kind, int chunk_size) |
|
|
{ |
|
|
struct gomp_task_icv *icv = gomp_icv (true); |
|
|
switch (kind) |
|
|
{ |
|
|
case omp_sched_static: |
|
|
- if (modifier < 1) |
|
|
- modifier = 0; |
|
|
- icv->run_sched_modifier = modifier; |
|
|
+ if (chunk_size < 1) |
|
|
+ chunk_size = 0; |
|
|
+ icv->run_sched_chunk_size = chunk_size; |
|
|
break; |
|
|
case omp_sched_dynamic: |
|
|
case omp_sched_guided: |
|
|
- if (modifier < 1) |
|
|
- modifier = 1; |
|
|
- icv->run_sched_modifier = modifier; |
|
|
+ if (chunk_size < 1) |
|
|
+ chunk_size = 1; |
|
|
+ icv->run_sched_chunk_size = chunk_size; |
|
|
break; |
|
|
case omp_sched_auto: |
|
|
break; |
|
|
@@ -1337,11 +1366,11 @@ omp_set_schedule (omp_sched_t kind, int |
|
|
} |
|
|
|
|
|
void |
|
|
-omp_get_schedule (omp_sched_t *kind, int *modifier) |
|
|
+omp_get_schedule (omp_sched_t *kind, int *chunk_size) |
|
|
{ |
|
|
struct gomp_task_icv *icv = gomp_icv (false); |
|
|
*kind = icv->run_sched_var; |
|
|
- *modifier = icv->run_sched_modifier; |
|
|
+ *chunk_size = icv->run_sched_chunk_size; |
|
|
} |
|
|
|
|
|
int |
|
|
@@ -1377,6 +1406,12 @@ omp_get_cancellation (void) |
|
|
return gomp_cancel_var; |
|
|
} |
|
|
|
|
|
+int |
|
|
+omp_get_max_task_priority (void) |
|
|
+{ |
|
|
+ return gomp_max_task_priority_var; |
|
|
+} |
|
|
+ |
|
|
omp_proc_bind_t |
|
|
omp_get_proc_bind (void) |
|
|
{ |
|
|
@@ -1425,6 +1460,59 @@ omp_is_initial_device (void) |
|
|
return 1; |
|
|
} |
|
|
|
|
|
+int |
|
|
+omp_get_initial_device (void) |
|
|
+{ |
|
|
+ return GOMP_DEVICE_HOST_FALLBACK; |
|
|
+} |
|
|
+ |
|
|
+int |
|
|
+omp_get_num_places (void) |
|
|
+{ |
|
|
+ return gomp_places_list_len; |
|
|
+} |
|
|
+ |
|
|
+int |
|
|
+omp_get_place_num (void) |
|
|
+{ |
|
|
+ if (gomp_places_list == NULL) |
|
|
+ return -1; |
|
|
+ |
|
|
+ struct gomp_thread *thr = gomp_thread (); |
|
|
+ if (thr->place == 0) |
|
|
+ gomp_init_affinity (); |
|
|
+ |
|
|
+ return (int) thr->place - 1; |
|
|
+} |
|
|
+ |
|
|
+int |
|
|
+omp_get_partition_num_places (void) |
|
|
+{ |
|
|
+ if (gomp_places_list == NULL) |
|
|
+ return 0; |
|
|
+ |
|
|
+ struct gomp_thread *thr = gomp_thread (); |
|
|
+ if (thr->place == 0) |
|
|
+ gomp_init_affinity (); |
|
|
+ |
|
|
+ return thr->ts.place_partition_len; |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+omp_get_partition_place_nums (int *place_nums) |
|
|
+{ |
|
|
+ if (gomp_places_list == NULL) |
|
|
+ return; |
|
|
+ |
|
|
+ struct gomp_thread *thr = gomp_thread (); |
|
|
+ if (thr->place == 0) |
|
|
+ gomp_init_affinity (); |
|
|
+ |
|
|
+ unsigned int i; |
|
|
+ for (i = 0; i < thr->ts.place_partition_len; i++) |
|
|
+ *place_nums++ = thr->ts.place_partition_off + i; |
|
|
+} |
|
|
+ |
|
|
ialias (omp_set_dynamic) |
|
|
ialias (omp_set_nested) |
|
|
ialias (omp_set_num_threads) |
|
|
@@ -1444,3 +1532,9 @@ ialias (omp_get_num_devices) |
|
|
ialias (omp_get_num_teams) |
|
|
ialias (omp_get_team_num) |
|
|
ialias (omp_is_initial_device) |
|
|
+ialias (omp_get_initial_device) |
|
|
+ialias (omp_get_max_task_priority) |
|
|
+ialias (omp_get_num_places) |
|
|
+ialias (omp_get_place_num) |
|
|
+ialias (omp_get_partition_num_places) |
|
|
+ialias (omp_get_partition_place_nums) |
|
|
--- libgomp/openacc.h.jj 2016-07-13 16:57:04.432535397 +0200 |
|
|
+++ libgomp/openacc.h 2016-07-13 16:57:04.432535397 +0200 |
|
|
@@ -0,0 +1,131 @@ |
|
|
+/* OpenACC Runtime Library User-facing Declarations |
|
|
+ |
|
|
+ Copyright (C) 2013-2016 Free Software Foundation, Inc. |
|
|
+ |
|
|
+ Contributed by Mentor Embedded. |
|
|
+ |
|
|
+ This file is part of the GNU Offloading and Multi Processing Library |
|
|
+ (libgomp). |
|
|
+ |
|
|
+ Libgomp is free software; you can redistribute it and/or modify it |
|
|
+ under the terms of the GNU General Public License as published by |
|
|
+ the Free Software Foundation; either version 3, or (at your option) |
|
|
+ any later version. |
|
|
+ |
|
|
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
|
|
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
|
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
|
+ more details. |
|
|
+ |
|
|
+ Under Section 7 of GPL version 3, you are granted additional |
|
|
+ permissions described in the GCC Runtime Library Exception, version |
|
|
+ 3.1, as published by the Free Software Foundation. |
|
|
+ |
|
|
+ You should have received a copy of the GNU General Public License and |
|
|
+ a copy of the GCC Runtime Library Exception along with this program; |
|
|
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
|
|
+ <http://www.gnu.org/licenses/>. */ |
|
|
+ |
|
|
+#ifndef _OPENACC_H |
|
|
+#define _OPENACC_H 1 |
|
|
+ |
|
|
+/* The OpenACC standard is silent on whether or not including <openacc.h> |
|
|
+ might or must not include other header files. We chose to include |
|
|
+ some. */ |
|
|
+#include <stddef.h> |
|
|
+ |
|
|
+#ifdef __cplusplus |
|
|
+extern "C" { |
|
|
+#endif |
|
|
+ |
|
|
+#if __cplusplus >= 201103 |
|
|
+# define __GOACC_NOTHROW noexcept |
|
|
+#elif __cplusplus |
|
|
+# define __GOACC_NOTHROW throw () |
|
|
+#else /* Not C++ */ |
|
|
+# define __GOACC_NOTHROW __attribute__ ((__nothrow__)) |
|
|
+#endif |
|
|
+ |
|
|
+/* Types */ |
|
|
+typedef enum acc_device_t { |
|
|
+ /* Keep in sync with include/gomp-constants.h. */ |
|
|
+ acc_device_none = 0, |
|
|
+ acc_device_default = 1, |
|
|
+ acc_device_host = 2, |
|
|
+ /* acc_device_host_nonshm = 3 removed. */ |
|
|
+ acc_device_not_host = 4, |
|
|
+ acc_device_nvidia = 5, |
|
|
+ _ACC_device_hwm, |
|
|
+ /* Ensure enumeration is layout compatible with int. */ |
|
|
+ _ACC_highest = __INT_MAX__, |
|
|
+ _ACC_neg = -1 |
|
|
+} acc_device_t; |
|
|
+ |
|
|
+typedef enum acc_async_t { |
|
|
+ /* Keep in sync with include/gomp-constants.h. */ |
|
|
+ acc_async_noval = -1, |
|
|
+ acc_async_sync = -2 |
|
|
+} acc_async_t; |
|
|
+ |
|
|
+int acc_get_num_devices (acc_device_t) __GOACC_NOTHROW; |
|
|
+void acc_set_device_type (acc_device_t) __GOACC_NOTHROW; |
|
|
+acc_device_t acc_get_device_type (void) __GOACC_NOTHROW; |
|
|
+void acc_set_device_num (int, acc_device_t) __GOACC_NOTHROW; |
|
|
+int acc_get_device_num (acc_device_t) __GOACC_NOTHROW; |
|
|
+int acc_async_test (int) __GOACC_NOTHROW; |
|
|
+int acc_async_test_all (void) __GOACC_NOTHROW; |
|
|
+void acc_wait (int) __GOACC_NOTHROW; |
|
|
+void acc_wait_async (int, int) __GOACC_NOTHROW; |
|
|
+void acc_wait_all (void) __GOACC_NOTHROW; |
|
|
+void acc_wait_all_async (int) __GOACC_NOTHROW; |
|
|
+void acc_init (acc_device_t) __GOACC_NOTHROW; |
|
|
+void acc_shutdown (acc_device_t) __GOACC_NOTHROW; |
|
|
+#ifdef __cplusplus |
|
|
+int acc_on_device (int __arg) __GOACC_NOTHROW; |
|
|
+#else |
|
|
+int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW; |
|
|
+#endif |
|
|
+void *acc_malloc (size_t) __GOACC_NOTHROW; |
|
|
+void acc_free (void *) __GOACC_NOTHROW; |
|
|
+/* Some of these would be more correct with const qualifiers, but |
|
|
+ the standard specifies otherwise. */ |
|
|
+void *acc_copyin (void *, size_t) __GOACC_NOTHROW; |
|
|
+void *acc_present_or_copyin (void *, size_t) __GOACC_NOTHROW; |
|
|
+void *acc_create (void *, size_t) __GOACC_NOTHROW; |
|
|
+void *acc_present_or_create (void *, size_t) __GOACC_NOTHROW; |
|
|
+void acc_copyout (void *, size_t) __GOACC_NOTHROW; |
|
|
+void acc_delete (void *, size_t) __GOACC_NOTHROW; |
|
|
+void acc_update_device (void *, size_t) __GOACC_NOTHROW; |
|
|
+void acc_update_self (void *, size_t) __GOACC_NOTHROW; |
|
|
+void acc_map_data (void *, void *, size_t) __GOACC_NOTHROW; |
|
|
+void acc_unmap_data (void *) __GOACC_NOTHROW; |
|
|
+void *acc_deviceptr (void *) __GOACC_NOTHROW; |
|
|
+void *acc_hostptr (void *) __GOACC_NOTHROW; |
|
|
+int acc_is_present (void *, size_t) __GOACC_NOTHROW; |
|
|
+void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW; |
|
|
+void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW; |
|
|
+ |
|
|
+/* Old names. OpenACC does not specify whether these can or must |
|
|
+ not be macros, inlines or aliases for the new names. */ |
|
|
+#define acc_pcreate acc_present_or_create |
|
|
+#define acc_pcopyin acc_present_or_copyin |
|
|
+ |
|
|
+/* CUDA-specific routines. */ |
|
|
+void *acc_get_current_cuda_device (void) __GOACC_NOTHROW; |
|
|
+void *acc_get_current_cuda_context (void) __GOACC_NOTHROW; |
|
|
+void *acc_get_cuda_stream (int) __GOACC_NOTHROW; |
|
|
+int acc_set_cuda_stream (int, void *) __GOACC_NOTHROW; |
|
|
+ |
|
|
+#ifdef __cplusplus |
|
|
+} |
|
|
+ |
|
|
+/* Forwarding function with correctly typed arg. */ |
|
|
+ |
|
|
+#pragma acc routine seq |
|
|
+inline int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW |
|
|
+{ |
|
|
+ return acc_on_device ((int) __arg); |
|
|
+} |
|
|
+#endif |
|
|
+ |
|
|
+#endif /* _OPENACC_H */ |
|
|
--- libgomp/config/linux/doacross.h.jj 2016-07-13 16:57:18.902355979 +0200 |
|
|
+++ libgomp/config/linux/doacross.h 2016-07-13 16:57:18.902355979 +0200 |
|
|
@@ -0,0 +1,57 @@ |
|
|
+/* Copyright (C) 2015-2016 Free Software Foundation, Inc. |
|
|
+ Contributed by Jakub Jelinek <jakub@redhat.com>. |
|
|
+ |
|
|
+ This file is part of the GNU Offloading and Multi Processing Library |
|
|
+ (libgomp). |
|
|
+ |
|
|
+ Libgomp is free software; you can redistribute it and/or modify it |
|
|
+ under the terms of the GNU General Public License as published by |
|
|
+ the Free Software Foundation; either version 3, or (at your option) |
|
|
+ any later version. |
|
|
+ |
|
|
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
|
|
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
|
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
|
+ more details. |
|
|
+ |
|
|
+ Under Section 7 of GPL version 3, you are granted additional |
|
|
+ permissions described in the GCC Runtime Library Exception, version |
|
|
+ 3.1, as published by the Free Software Foundation. |
|
|
+ |
|
|
+ You should have received a copy of the GNU General Public License and |
|
|
+ a copy of the GCC Runtime Library Exception along with this program; |
|
|
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
|
|
+ <http://www.gnu.org/licenses/>. */ |
|
|
+ |
|
|
+/* This is a Linux specific implementation of doacross spinning. */ |
|
|
+ |
|
|
+#ifndef GOMP_DOACROSS_H |
|
|
+#define GOMP_DOACROSS_H 1 |
|
|
+ |
|
|
+#include "libgomp.h" |
|
|
+#include <errno.h> |
|
|
+#include "wait.h" |
|
|
+ |
|
|
+#ifdef HAVE_ATTRIBUTE_VISIBILITY |
|
|
+# pragma GCC visibility push(hidden) |
|
|
+#endif |
|
|
+ |
|
|
+static inline void doacross_spin (unsigned long *addr, unsigned long expected, |
|
|
+ unsigned long cur) |
|
|
+{ |
|
|
+ /* FIXME: back off depending on how large expected - cur is. */ |
|
|
+ do |
|
|
+ { |
|
|
+ cpu_relax (); |
|
|
+ cur = __atomic_load_n (addr, MEMMODEL_RELAXED); |
|
|
+ if (expected < cur) |
|
|
+ return; |
|
|
+ } |
|
|
+ while (1); |
|
|
+} |
|
|
+ |
|
|
+#ifdef HAVE_ATTRIBUTE_VISIBILITY |
|
|
+# pragma GCC visibility pop |
|
|
+#endif |
|
|
+ |
|
|
+#endif /* GOMP_DOACROSS_H */ |
|
|
--- libgomp/config/posix/doacross.h.jj 2016-07-13 16:57:18.903355966 +0200 |
|
|
+++ libgomp/config/posix/doacross.h 2016-07-13 16:57:18.903355966 +0200 |
|
|
@@ -0,0 +1,62 @@ |
|
|
+/* Copyright (C) 2015-2016 Free Software Foundation, Inc. |
|
|
+ Contributed by Jakub Jelinek <jakub@redhat.com>. |
|
|
+ |
|
|
+ This file is part of the GNU Offloading and Multi Processing Library |
|
|
+ (libgomp). |
|
|
+ |
|
|
+ Libgomp is free software; you can redistribute it and/or modify it |
|
|
+ under the terms of the GNU General Public License as published by |
|
|
+ the Free Software Foundation; either version 3, or (at your option) |
|
|
+ any later version. |
|
|
+ |
|
|
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
|
|
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
|
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
|
+ more details. |
|
|
+ |
|
|
+ Under Section 7 of GPL version 3, you are granted additional |
|
|
+ permissions described in the GCC Runtime Library Exception, version |
|
|
+ 3.1, as published by the Free Software Foundation. |
|
|
+ |
|
|
+ You should have received a copy of the GNU General Public License and |
|
|
+ a copy of the GCC Runtime Library Exception along with this program; |
|
|
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
|
|
+ <http://www.gnu.org/licenses/>. */ |
|
|
+ |
|
|
+/* This is a generic implementation of doacross spinning. */ |
|
|
+ |
|
|
+#ifndef GOMP_DOACROSS_H |
|
|
+#define GOMP_DOACROSS_H 1 |
|
|
+ |
|
|
+#include "libgomp.h" |
|
|
+#include <errno.h> |
|
|
+ |
|
|
+#ifdef HAVE_ATTRIBUTE_VISIBILITY |
|
|
+# pragma GCC visibility push(hidden) |
|
|
+#endif |
|
|
+ |
|
|
+static inline void |
|
|
+cpu_relax (void) |
|
|
+{ |
|
|
+ __asm volatile ("" : : : "memory"); |
|
|
+} |
|
|
+ |
|
|
+static inline void doacross_spin (unsigned long *addr, unsigned long expected, |
|
|
+ unsigned long cur) |
|
|
+{ |
|
|
+ /* FIXME: back off depending on how large expected - cur is. */ |
|
|
+ do |
|
|
+ { |
|
|
+ cpu_relax (); |
|
|
+ cur = __atomic_load_n (addr, MEMMODEL_RELAXED); |
|
|
+ if (expected < cur) |
|
|
+ return; |
|
|
+ } |
|
|
+ while (1); |
|
|
+} |
|
|
+ |
|
|
+#ifdef HAVE_ATTRIBUTE_VISIBILITY |
|
|
+# pragma GCC visibility pop |
|
|
+#endif |
|
|
+ |
|
|
+#endif /* GOMP_DOACROSS_H */ |
|
|
--- libgomp/splay-tree.c.jj 2016-07-13 16:57:18.919355768 +0200 |
|
|
+++ libgomp/splay-tree.c 2016-07-13 16:57:18.919355768 +0200 |
|
|
@@ -0,0 +1,238 @@ |
|
|
+/* A splay-tree datatype. |
|
|
+ Copyright (C) 1998-2016 Free Software Foundation, Inc. |
|
|
+ Contributed by Mark Mitchell (mark@markmitchell.com). |
|
|
+ |
|
|
+ This file is part of the GNU Offloading and Multi Processing Library |
|
|
+ (libgomp). |
|
|
+ |
|
|
+ Libgomp is free software; you can redistribute it and/or modify it |
|
|
+ under the terms of the GNU General Public License as published by |
|
|
+ the Free Software Foundation; either version 3, or (at your option) |
|
|
+ any later version. |
|
|
+ |
|
|
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
|
|
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
|
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
|
+ more details. |
|
|
+ |
|
|
+ Under Section 7 of GPL version 3, you are granted additional |
|
|
+ permissions described in the GCC Runtime Library Exception, version |
|
|
+ 3.1, as published by the Free Software Foundation. |
|
|
+ |
|
|
+ You should have received a copy of the GNU General Public License and |
|
|
+ a copy of the GCC Runtime Library Exception along with this program; |
|
|
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
|
|
+ <http://www.gnu.org/licenses/>. */ |
|
|
+ |
|
|
+/* The splay tree code copied from include/splay-tree.h and adjusted, |
|
|
+ so that all the data lives directly in splay_tree_node_s structure |
|
|
+ and no extra allocations are needed. */ |
|
|
+ |
|
|
+/* For an easily readable description of splay-trees, see: |
|
|
+ |
|
|
+ Lewis, Harry R. and Denenberg, Larry. Data Structures and Their |
|
|
+ Algorithms. Harper-Collins, Inc. 1991. |
|
|
+ |
|
|
+ The major feature of splay trees is that all basic tree operations |
|
|
+ are amortized O(log n) time for a tree with n nodes. */ |
|
|
+ |
|
|
+#include "libgomp.h" |
|
|
+ |
|
|
+/* Rotate the edge joining the left child N with its parent P. PP is the |
|
|
+ grandparents' pointer to P. */ |
|
|
+ |
|
|
+static inline void |
|
|
+rotate_left (splay_tree_node *pp, splay_tree_node p, splay_tree_node n) |
|
|
+{ |
|
|
+ splay_tree_node tmp; |
|
|
+ tmp = n->right; |
|
|
+ n->right = p; |
|
|
+ p->left = tmp; |
|
|
+ *pp = n; |
|
|
+} |
|
|
+ |
|
|
+/* Rotate the edge joining the right child N with its parent P. PP is the |
|
|
+ grandparents' pointer to P. */ |
|
|
+ |
|
|
+static inline void |
|
|
+rotate_right (splay_tree_node *pp, splay_tree_node p, splay_tree_node n) |
|
|
+{ |
|
|
+ splay_tree_node tmp; |
|
|
+ tmp = n->left; |
|
|
+ n->left = p; |
|
|
+ p->right = tmp; |
|
|
+ *pp = n; |
|
|
+} |
|
|
+ |
|
|
+/* Bottom up splay of KEY. */ |
|
|
+ |
|
|
+static void |
|
|
+splay_tree_splay (splay_tree sp, splay_tree_key key) |
|
|
+{ |
|
|
+ if (sp->root == NULL) |
|
|
+ return; |
|
|
+ |
|
|
+ do { |
|
|
+ int cmp1, cmp2; |
|
|
+ splay_tree_node n, c; |
|
|
+ |
|
|
+ n = sp->root; |
|
|
+ cmp1 = splay_compare (key, &n->key); |
|
|
+ |
|
|
+ /* Found. */ |
|
|
+ if (cmp1 == 0) |
|
|
+ return; |
|
|
+ |
|
|
+ /* Left or right? If no child, then we're done. */ |
|
|
+ if (cmp1 < 0) |
|
|
+ c = n->left; |
|
|
+ else |
|
|
+ c = n->right; |
|
|
+ if (!c) |
|
|
+ return; |
|
|
+ |
|
|
+ /* Next one left or right? If found or no child, we're done |
|
|
+ after one rotation. */ |
|
|
+ cmp2 = splay_compare (key, &c->key); |
|
|
+ if (cmp2 == 0 |
|
|
+ || (cmp2 < 0 && !c->left) |
|
|
+ || (cmp2 > 0 && !c->right)) |
|
|
+ { |
|
|
+ if (cmp1 < 0) |
|
|
+ rotate_left (&sp->root, n, c); |
|
|
+ else |
|
|
+ rotate_right (&sp->root, n, c); |
|
|
+ return; |
|
|
+ } |
|
|
+ |
|
|
+ /* Now we have the four cases of double-rotation. */ |
|
|
+ if (cmp1 < 0 && cmp2 < 0) |
|
|
+ { |
|
|
+ rotate_left (&n->left, c, c->left); |
|
|
+ rotate_left (&sp->root, n, n->left); |
|
|
+ } |
|
|
+ else if (cmp1 > 0 && cmp2 > 0) |
|
|
+ { |
|
|
+ rotate_right (&n->right, c, c->right); |
|
|
+ rotate_right (&sp->root, n, n->right); |
|
|
+ } |
|
|
+ else if (cmp1 < 0 && cmp2 > 0) |
|
|
+ { |
|
|
+ rotate_right (&n->left, c, c->right); |
|
|
+ rotate_left (&sp->root, n, n->left); |
|
|
+ } |
|
|
+ else if (cmp1 > 0 && cmp2 < 0) |
|
|
+ { |
|
|
+ rotate_left (&n->right, c, c->left); |
|
|
+ rotate_right (&sp->root, n, n->right); |
|
|
+ } |
|
|
+ } while (1); |
|
|
+} |
|
|
+ |
|
|
+/* Insert a new NODE into SP. The NODE shouldn't exist in the tree. */ |
|
|
+ |
|
|
+attribute_hidden void |
|
|
+splay_tree_insert (splay_tree sp, splay_tree_node node) |
|
|
+{ |
|
|
+ int comparison = 0; |
|
|
+ |
|
|
+ splay_tree_splay (sp, &node->key); |
|
|
+ |
|
|
+ if (sp->root) |
|
|
+ comparison = splay_compare (&sp->root->key, &node->key); |
|
|
+ |
|
|
+ if (sp->root && comparison == 0) |
|
|
+ gomp_fatal ("Duplicate node"); |
|
|
+ else |
|
|
+ { |
|
|
+ /* Insert it at the root. */ |
|
|
+ if (sp->root == NULL) |
|
|
+ node->left = node->right = NULL; |
|
|
+ else if (comparison < 0) |
|
|
+ { |
|
|
+ node->left = sp->root; |
|
|
+ node->right = node->left->right; |
|
|
+ node->left->right = NULL; |
|
|
+ } |
|
|
+ else |
|
|
+ { |
|
|
+ node->right = sp->root; |
|
|
+ node->left = node->right->left; |
|
|
+ node->right->left = NULL; |
|
|
+ } |
|
|
+ |
|
|
+ sp->root = node; |
|
|
+ } |
|
|
+} |
|
|
+ |
|
|
+/* Remove node with KEY from SP. It is not an error if it did not exist. */ |
|
|
+ |
|
|
+attribute_hidden void |
|
|
+splay_tree_remove (splay_tree sp, splay_tree_key key) |
|
|
+{ |
|
|
+ splay_tree_splay (sp, key); |
|
|
+ |
|
|
+ if (sp->root && splay_compare (&sp->root->key, key) == 0) |
|
|
+ { |
|
|
+ splay_tree_node left, right; |
|
|
+ |
|
|
+ left = sp->root->left; |
|
|
+ right = sp->root->right; |
|
|
+ |
|
|
+ /* One of the children is now the root. Doesn't matter much |
|
|
+ which, so long as we preserve the properties of the tree. */ |
|
|
+ if (left) |
|
|
+ { |
|
|
+ sp->root = left; |
|
|
+ |
|
|
+ /* If there was a right child as well, hang it off the |
|
|
+ right-most leaf of the left child. */ |
|
|
+ if (right) |
|
|
+ { |
|
|
+ while (left->right) |
|
|
+ left = left->right; |
|
|
+ left->right = right; |
|
|
+ } |
|
|
+ } |
|
|
+ else |
|
|
+ sp->root = right; |
|
|
+ } |
|
|
+} |
|
|
+ |
|
|
+/* Lookup KEY in SP, returning NODE if present, and NULL |
|
|
+ otherwise. */ |
|
|
+ |
|
|
+attribute_hidden splay_tree_key |
|
|
+splay_tree_lookup (splay_tree sp, splay_tree_key key) |
|
|
+{ |
|
|
+ splay_tree_splay (sp, key); |
|
|
+ |
|
|
+ if (sp->root && splay_compare (&sp->root->key, key) == 0) |
|
|
+ return &sp->root->key; |
|
|
+ else |
|
|
+ return NULL; |
|
|
+} |
|
|
+ |
|
|
+/* Helper function for splay_tree_foreach. |
|
|
+ |
|
|
+ Run FUNC on every node in KEY. */ |
|
|
+ |
|
|
+static void |
|
|
+splay_tree_foreach_internal (splay_tree_node node, splay_tree_callback func, |
|
|
+ void *data) |
|
|
+{ |
|
|
+ if (!node) |
|
|
+ return; |
|
|
+ func (&node->key, data); |
|
|
+ splay_tree_foreach_internal (node->left, func, data); |
|
|
+ /* Yeah, whatever. GCC can fix my tail recursion. */ |
|
|
+ splay_tree_foreach_internal (node->right, func, data); |
|
|
+} |
|
|
+ |
|
|
+/* Run FUNC on each of the nodes in SP. */ |
|
|
+ |
|
|
+attribute_hidden void |
|
|
+splay_tree_foreach (splay_tree sp, splay_tree_callback func, void *data) |
|
|
+{ |
|
|
+ splay_tree_foreach_internal (sp->root, func, data); |
|
|
+} |
|
|
--- libgomp/libgomp-plugin.c.jj 2016-07-13 16:57:04.435535360 +0200 |
|
|
+++ libgomp/libgomp-plugin.c 2016-07-13 16:57:04.435535360 +0200 |
|
|
@@ -0,0 +1,80 @@ |
|
|
+/* Copyright (C) 2014-2016 Free Software Foundation, Inc. |
|
|
+ |
|
|
+ Contributed by Mentor Embedded. |
|
|
+ |
|
|
+ This file is part of the GNU Offloading and Multi Processing Library |
|
|
+ (libgomp). |
|
|
+ |
|
|
+ Libgomp is free software; you can redistribute it and/or modify it |
|
|
+ under the terms of the GNU General Public License as published by |
|
|
+ the Free Software Foundation; either version 3, or (at your option) |
|
|
+ any later version. |
|
|
+ |
|
|
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
|
|
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
|
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
|
+ more details. |
|
|
+ |
|
|
+ Under Section 7 of GPL version 3, you are granted additional |
|
|
+ permissions described in the GCC Runtime Library Exception, version |
|
|
+ 3.1, as published by the Free Software Foundation. |
|
|
+ |
|
|
+ You should have received a copy of the GNU General Public License and |
|
|
+ a copy of the GCC Runtime Library Exception along with this program; |
|
|
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
|
|
+ <http://www.gnu.org/licenses/>. */ |
|
|
+ |
|
|
+/* Exported (non-hidden) functions exposing libgomp interface for plugins. */ |
|
|
+ |
|
|
+#include <stdlib.h> |
|
|
+ |
|
|
+#include "libgomp.h" |
|
|
+#include "libgomp-plugin.h" |
|
|
+ |
|
|
+void * |
|
|
+GOMP_PLUGIN_malloc (size_t size) |
|
|
+{ |
|
|
+ return gomp_malloc (size); |
|
|
+} |
|
|
+ |
|
|
+void * |
|
|
+GOMP_PLUGIN_malloc_cleared (size_t size) |
|
|
+{ |
|
|
+ return gomp_malloc_cleared (size); |
|
|
+} |
|
|
+ |
|
|
+void * |
|
|
+GOMP_PLUGIN_realloc (void *ptr, size_t size) |
|
|
+{ |
|
|
+ return gomp_realloc (ptr, size); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+GOMP_PLUGIN_debug (int kind, const char *msg, ...) |
|
|
+{ |
|
|
+ va_list ap; |
|
|
+ |
|
|
+ va_start (ap, msg); |
|
|
+ gomp_vdebug (kind, msg, ap); |
|
|
+ va_end (ap); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+GOMP_PLUGIN_error (const char *msg, ...) |
|
|
+{ |
|
|
+ va_list ap; |
|
|
+ |
|
|
+ va_start (ap, msg); |
|
|
+ gomp_verror (msg, ap); |
|
|
+ va_end (ap); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+GOMP_PLUGIN_fatal (const char *msg, ...) |
|
|
+{ |
|
|
+ va_list ap; |
|
|
+ |
|
|
+ va_start (ap, msg); |
|
|
+ gomp_vfatal (msg, ap); |
|
|
+ va_end (ap); |
|
|
+} |
|
|
--- libgomp/libgomp-plugin.h.jj 2016-07-13 16:57:04.438535323 +0200 |
|
|
+++ libgomp/libgomp-plugin.h 2016-07-13 16:57:04.438535323 +0200 |
|
|
@@ -0,0 +1,80 @@ |
|
|
+/* Copyright (C) 2014-2016 Free Software Foundation, Inc. |
|
|
+ |
|
|
+ Contributed by Mentor Embedded. |
|
|
+ |
|
|
+ This file is part of the GNU Offloading and Multi Processing Library |
|
|
+ (libgomp). |
|
|
+ |
|
|
+ Libgomp is free software; you can redistribute it and/or modify it |
|
|
+ under the terms of the GNU General Public License as published by |
|
|
+ the Free Software Foundation; either version 3, or (at your option) |
|
|
+ any later version. |
|
|
+ |
|
|
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
|
|
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
|
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
|
+ more details. |
|
|
+ |
|
|
+ Under Section 7 of GPL version 3, you are granted additional |
|
|
+ permissions described in the GCC Runtime Library Exception, version |
|
|
+ 3.1, as published by the Free Software Foundation. |
|
|
+ |
|
|
+ You should have received a copy of the GNU General Public License and |
|
|
+ a copy of the GCC Runtime Library Exception along with this program; |
|
|
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
|
|
+ <http://www.gnu.org/licenses/>. */ |
|
|
+ |
|
|
+/* An interface to various libgomp-internal functions for use by plugins. */ |
|
|
+ |
|
|
+#ifndef LIBGOMP_PLUGIN_H |
|
|
+#define LIBGOMP_PLUGIN_H 1 |
|
|
+ |
|
|
+#include <stddef.h> |
|
|
+#include <stdint.h> |
|
|
+ |
|
|
+#ifdef __cplusplus |
|
|
+extern "C" { |
|
|
+#endif |
|
|
+ |
|
|
+/* Capabilities of offloading devices. */ |
|
|
+#define GOMP_OFFLOAD_CAP_SHARED_MEM (1 << 0) |
|
|
+#define GOMP_OFFLOAD_CAP_NATIVE_EXEC (1 << 1) |
|
|
+#define GOMP_OFFLOAD_CAP_OPENMP_400 (1 << 2) |
|
|
+#define GOMP_OFFLOAD_CAP_OPENACC_200 (1 << 3) |
|
|
+ |
|
|
+/* Type of offload target device. Keep in sync with include/gomp-constants.h. */ |
|
|
+enum offload_target_type |
|
|
+{ |
|
|
+ OFFLOAD_TARGET_TYPE_HOST = 2, |
|
|
+ /* OFFLOAD_TARGET_TYPE_HOST_NONSHM = 3 removed. */ |
|
|
+ OFFLOAD_TARGET_TYPE_NVIDIA_PTX = 5, |
|
|
+ OFFLOAD_TARGET_TYPE_INTEL_MIC = 6, |
|
|
+ OFFLOAD_TARGET_TYPE_HSA = 7 |
|
|
+}; |
|
|
+ |
|
|
+/* Auxiliary struct, used for transferring pairs of addresses from plugin |
|
|
+ to libgomp. */ |
|
|
+struct addr_pair |
|
|
+{ |
|
|
+ uintptr_t start; |
|
|
+ uintptr_t end; |
|
|
+}; |
|
|
+ |
|
|
+/* Miscellaneous functions. */ |
|
|
+extern void *GOMP_PLUGIN_malloc (size_t) __attribute__ ((malloc)); |
|
|
+extern void *GOMP_PLUGIN_malloc_cleared (size_t) __attribute__ ((malloc)); |
|
|
+extern void *GOMP_PLUGIN_realloc (void *, size_t); |
|
|
+void GOMP_PLUGIN_target_task_completion (void *); |
|
|
+ |
|
|
+extern void GOMP_PLUGIN_debug (int, const char *, ...) |
|
|
+ __attribute__ ((format (printf, 2, 3))); |
|
|
+extern void GOMP_PLUGIN_error (const char *, ...) |
|
|
+ __attribute__ ((format (printf, 1, 2))); |
|
|
+extern void GOMP_PLUGIN_fatal (const char *, ...) |
|
|
+ __attribute__ ((noreturn, format (printf, 1, 2))); |
|
|
+ |
|
|
+#ifdef __cplusplus |
|
|
+} |
|
|
+#endif |
|
|
+ |
|
|
+#endif |
|
|
--- libgomp/oacc-async.c.jj 2016-07-13 16:57:13.488423109 +0200 |
|
|
+++ libgomp/oacc-async.c 2016-07-13 16:57:13.488423109 +0200 |
|
|
@@ -0,0 +1,107 @@ |
|
|
+/* OpenACC Runtime Library Definitions. |
|
|
+ |
|
|
+ Copyright (C) 2013-2016 Free Software Foundation, Inc. |
|
|
+ |
|
|
+ Contributed by Mentor Embedded. |
|
|
+ |
|
|
+ This file is part of the GNU Offloading and Multi Processing Library |
|
|
+ (libgomp). |
|
|
+ |
|
|
+ Libgomp is free software; you can redistribute it and/or modify it |
|
|
+ under the terms of the GNU General Public License as published by |
|
|
+ the Free Software Foundation; either version 3, or (at your option) |
|
|
+ any later version. |
|
|
+ |
|
|
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
|
|
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
|
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
|
+ more details. |
|
|
+ |
|
|
+ Under Section 7 of GPL version 3, you are granted additional |
|
|
+ permissions described in the GCC Runtime Library Exception, version |
|
|
+ 3.1, as published by the Free Software Foundation. |
|
|
+ |
|
|
+ You should have received a copy of the GNU General Public License and |
|
|
+ a copy of the GCC Runtime Library Exception along with this program; |
|
|
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
|
|
+ <http://www.gnu.org/licenses/>. */ |
|
|
+ |
|
|
+#include <assert.h> |
|
|
+#include "openacc.h" |
|
|
+#include "libgomp.h" |
|
|
+#include "oacc-int.h" |
|
|
+ |
|
|
+int |
|
|
+acc_async_test (int async) |
|
|
+{ |
|
|
+ if (async < acc_async_sync) |
|
|
+ gomp_fatal ("invalid async argument: %d", async); |
|
|
+ |
|
|
+ struct goacc_thread *thr = goacc_thread (); |
|
|
+ |
|
|
+ if (!thr || !thr->dev) |
|
|
+ gomp_fatal ("no device active"); |
|
|
+ |
|
|
+ return thr->dev->openacc.async_test_func (async); |
|
|
+} |
|
|
+ |
|
|
+int |
|
|
+acc_async_test_all (void) |
|
|
+{ |
|
|
+ struct goacc_thread *thr = goacc_thread (); |
|
|
+ |
|
|
+ if (!thr || !thr->dev) |
|
|
+ gomp_fatal ("no device active"); |
|
|
+ |
|
|
+ return thr->dev->openacc.async_test_all_func (); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+acc_wait (int async) |
|
|
+{ |
|
|
+ if (async < acc_async_sync) |
|
|
+ gomp_fatal ("invalid async argument: %d", async); |
|
|
+ |
|
|
+ struct goacc_thread *thr = goacc_thread (); |
|
|
+ |
|
|
+ if (!thr || !thr->dev) |
|
|
+ gomp_fatal ("no device active"); |
|
|
+ |
|
|
+ thr->dev->openacc.async_wait_func (async); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+acc_wait_async (int async1, int async2) |
|
|
+{ |
|
|
+ struct goacc_thread *thr = goacc_thread (); |
|
|
+ |
|
|
+ if (!thr || !thr->dev) |
|
|
+ gomp_fatal ("no device active"); |
|
|
+ |
|
|
+ thr->dev->openacc.async_wait_async_func (async1, async2); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+acc_wait_all (void) |
|
|
+{ |
|
|
+ struct goacc_thread *thr = goacc_thread (); |
|
|
+ |
|
|
+ if (!thr || !thr->dev) |
|
|
+ gomp_fatal ("no device active"); |
|
|
+ |
|
|
+ thr->dev->openacc.async_wait_all_func (); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+acc_wait_all_async (int async) |
|
|
+{ |
|
|
+ if (async < acc_async_sync) |
|
|
+ gomp_fatal ("invalid async argument: %d", async); |
|
|
+ |
|
|
+ struct goacc_thread *thr = goacc_thread (); |
|
|
+ |
|
|
+ if (!thr || !thr->dev) |
|
|
+ gomp_fatal ("no device active"); |
|
|
+ |
|
|
+ thr->dev->openacc.async_wait_all_async_func (async); |
|
|
+} |
|
|
--- libgomp/splay-tree.h.jj 2016-07-13 16:57:18.934355582 +0200 |
|
|
+++ libgomp/splay-tree.h 2016-07-13 16:57:18.934355582 +0200 |
|
|
@@ -0,0 +1,130 @@ |
|
|
+/* A splay-tree datatype. |
|
|
+ Copyright (C) 1998-2016 Free Software Foundation, Inc. |
|
|
+ Contributed by Mark Mitchell (mark@markmitchell.com). |
|
|
+ |
|
|
+ This file is part of the GNU Offloading and Multi Processing Library |
|
|
+ (libgomp). |
|
|
+ |
|
|
+ Libgomp is free software; you can redistribute it and/or modify it |
|
|
+ under the terms of the GNU General Public License as published by |
|
|
+ the Free Software Foundation; either version 3, or (at your option) |
|
|
+ any later version. |
|
|
+ |
|
|
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
|
|
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
|
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
|
+ more details. |
|
|
+ |
|
|
+ Under Section 7 of GPL version 3, you are granted additional |
|
|
+ permissions described in the GCC Runtime Library Exception, version |
|
|
+ 3.1, as published by the Free Software Foundation. |
|
|
+ |
|
|
+ You should have received a copy of the GNU General Public License and |
|
|
+ a copy of the GCC Runtime Library Exception along with this program; |
|
|
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
|
|
+ <http://www.gnu.org/licenses/>. */ |
|
|
+ |
|
|
+/* The splay tree code copied from include/splay-tree.h and adjusted, |
|
|
+ so that all the data lives directly in splay_tree_node_s structure |
|
|
+ and no extra allocations are needed. |
|
|
+ |
|
|
+ Files including this header should before including it add: |
|
|
+typedef struct splay_tree_node_s *splay_tree_node; |
|
|
+typedef struct splay_tree_s *splay_tree; |
|
|
+typedef struct splay_tree_key_s *splay_tree_key; |
|
|
+ define splay_tree_key_s structure, and define |
|
|
+ splay_compare inline function. |
|
|
+ |
|
|
+ Alternatively, they can define splay_tree_prefix macro before |
|
|
+ including this header and then all the above types, the |
|
|
+ splay_compare function and the splay_tree_{lookup,insert_remove} |
|
|
+ function will be prefixed by that prefix. If splay_tree_prefix |
|
|
+ macro is defined, this header must be included twice: once where |
|
|
+ you need the header file definitions, and once where you need the |
|
|
+ .c implementation routines. In the latter case, you must also |
|
|
+ define the macro splay_tree_c. See the include of splay-tree.h in |
|
|
+ priority_queue.[hc] for an example. */ |
|
|
+ |
|
|
+/* For an easily readable description of splay-trees, see: |
|
|
+ |
|
|
+ Lewis, Harry R. and Denenberg, Larry. Data Structures and Their |
|
|
+ Algorithms. Harper-Collins, Inc. 1991. |
|
|
+ |
|
|
+ The major feature of splay trees is that all basic tree operations |
|
|
+ are amortized O(log n) time for a tree with n nodes. */ |
|
|
+ |
|
|
+#ifdef splay_tree_prefix |
|
|
+# define splay_tree_name_1(prefix, name) prefix ## _ ## name |
|
|
+# define splay_tree_name(prefix, name) splay_tree_name_1 (prefix, name) |
|
|
+# define splay_tree_node_s \ |
|
|
+ splay_tree_name (splay_tree_prefix, splay_tree_node_s) |
|
|
+# define splay_tree_s \ |
|
|
+ splay_tree_name (splay_tree_prefix, splay_tree_s) |
|
|
+# define splay_tree_key_s \ |
|
|
+ splay_tree_name (splay_tree_prefix, splay_tree_key_s) |
|
|
+# define splay_tree_node \ |
|
|
+ splay_tree_name (splay_tree_prefix, splay_tree_node) |
|
|
+# define splay_tree \ |
|
|
+ splay_tree_name (splay_tree_prefix, splay_tree) |
|
|
+# define splay_tree_key \ |
|
|
+ splay_tree_name (splay_tree_prefix, splay_tree_key) |
|
|
+# define splay_compare \ |
|
|
+ splay_tree_name (splay_tree_prefix, splay_compare) |
|
|
+# define splay_tree_lookup \ |
|
|
+ splay_tree_name (splay_tree_prefix, splay_tree_lookup) |
|
|
+# define splay_tree_insert \ |
|
|
+ splay_tree_name (splay_tree_prefix, splay_tree_insert) |
|
|
+# define splay_tree_remove \ |
|
|
+ splay_tree_name (splay_tree_prefix, splay_tree_remove) |
|
|
+# define splay_tree_foreach \ |
|
|
+ splay_tree_name (splay_tree_prefix, splay_tree_foreach) |
|
|
+# define splay_tree_callback \ |
|
|
+ splay_tree_name (splay_tree_prefix, splay_tree_callback) |
|
|
+#endif |
|
|
+ |
|
|
+#ifndef splay_tree_c |
|
|
+/* Header file definitions and prototypes. */ |
|
|
+ |
|
|
+/* The nodes in the splay tree. */ |
|
|
+struct splay_tree_node_s { |
|
|
+ struct splay_tree_key_s key; |
|
|
+ /* The left and right children, respectively. */ |
|
|
+ splay_tree_node left; |
|
|
+ splay_tree_node right; |
|
|
+}; |
|
|
+ |
|
|
+/* The splay tree. */ |
|
|
+struct splay_tree_s { |
|
|
+ splay_tree_node root; |
|
|
+}; |
|
|
+ |
|
|
+typedef void (*splay_tree_callback) (splay_tree_key, void *); |
|
|
+ |
|
|
+extern splay_tree_key splay_tree_lookup (splay_tree, splay_tree_key); |
|
|
+extern void splay_tree_insert (splay_tree, splay_tree_node); |
|
|
+extern void splay_tree_remove (splay_tree, splay_tree_key); |
|
|
+extern void splay_tree_foreach (splay_tree, splay_tree_callback, void *); |
|
|
+#else /* splay_tree_c */ |
|
|
+# ifdef splay_tree_prefix |
|
|
+# include "splay-tree.c" |
|
|
+# undef splay_tree_name_1 |
|
|
+# undef splay_tree_name |
|
|
+# undef splay_tree_node_s |
|
|
+# undef splay_tree_s |
|
|
+# undef splay_tree_key_s |
|
|
+# undef splay_tree_node |
|
|
+# undef splay_tree |
|
|
+# undef splay_tree_key |
|
|
+# undef splay_compare |
|
|
+# undef splay_tree_lookup |
|
|
+# undef splay_tree_insert |
|
|
+# undef splay_tree_remove |
|
|
+# undef splay_tree_foreach |
|
|
+# undef splay_tree_callback |
|
|
+# undef splay_tree_c |
|
|
+# endif |
|
|
+#endif /* #ifndef splay_tree_c */ |
|
|
+ |
|
|
+#ifdef splay_tree_prefix |
|
|
+# undef splay_tree_prefix |
|
|
+#endif |
|
|
--- libgomp/oacc-plugin.c.jj 2016-07-13 16:57:13.481423196 +0200 |
|
|
+++ libgomp/oacc-plugin.c 2016-07-14 15:40:21.653151873 +0200 |
|
|
@@ -0,0 +1,44 @@ |
|
|
+/* Copyright (C) 2014-2016 Free Software Foundation, Inc. |
|
|
+ |
|
|
+ Contributed by Mentor Embedded. |
|
|
+ |
|
|
+ This file is part of the GNU Offloading and Multi Processing Library |
|
|
+ (libgomp). |
|
|
+ |
|
|
+ Libgomp is free software; you can redistribute it and/or modify it |
|
|
+ under the terms of the GNU General Public License as published by |
|
|
+ the Free Software Foundation; either version 3, or (at your option) |
|
|
+ any later version. |
|
|
+ |
|
|
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
|
|
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
|
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
|
+ more details. |
|
|
+ |
|
|
+ Under Section 7 of GPL version 3, you are granted additional |
|
|
+ permissions described in the GCC Runtime Library Exception, version |
|
|
+ 3.1, as published by the Free Software Foundation. |
|
|
+ |
|
|
+ You should have received a copy of the GNU General Public License and |
|
|
+ a copy of the GCC Runtime Library Exception along with this program; |
|
|
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
|
|
+ <http://www.gnu.org/licenses/>. */ |
|
|
+ |
|
|
+/* Initialize and register OpenACC dispatch table from libgomp plugin. */ |
|
|
+ |
|
|
+#include "libgomp.h" |
|
|
+#include "oacc-plugin.h" |
|
|
+#include "oacc-int.h" |
|
|
+ |
|
|
+void |
|
|
+GOMP_PLUGIN_async_unmap_vars (void *ptr, int async) |
|
|
+{ |
|
|
+} |
|
|
+ |
|
|
+/* Return the target-specific part of the TLS data for the current thread. */ |
|
|
+ |
|
|
+void * |
|
|
+GOMP_PLUGIN_acc_thread (void) |
|
|
+{ |
|
|
+ return NULL; |
|
|
+} |
|
|
--- libgomp/oacc-init.c.jj 2016-07-13 16:57:04.423535509 +0200 |
|
|
+++ libgomp/oacc-init.c 2016-07-14 19:06:41.679575688 +0200 |
|
|
@@ -0,0 +1,640 @@ |
|
|
+/* OpenACC Runtime initialization routines |
|
|
+ |
|
|
+ Copyright (C) 2013-2016 Free Software Foundation, Inc. |
|
|
+ |
|
|
+ Contributed by Mentor Embedded. |
|
|
+ |
|
|
+ This file is part of the GNU Offloading and Multi Processing Library |
|
|
+ (libgomp). |
|
|
+ |
|
|
+ Libgomp is free software; you can redistribute it and/or modify it |
|
|
+ under the terms of the GNU General Public License as published by |
|
|
+ the Free Software Foundation; either version 3, or (at your option) |
|
|
+ any later version. |
|
|
+ |
|
|
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
|
|
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
|
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
|
+ more details. |
|
|
+ |
|
|
+ Under Section 7 of GPL version 3, you are granted additional |
|
|
+ permissions described in the GCC Runtime Library Exception, version |
|
|
+ 3.1, as published by the Free Software Foundation. |
|
|
+ |
|
|
+ You should have received a copy of the GNU General Public License and |
|
|
+ a copy of the GCC Runtime Library Exception along with this program; |
|
|
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
|
|
+ <http://www.gnu.org/licenses/>. */ |
|
|
+ |
|
|
+#include "libgomp.h" |
|
|
+#include "oacc-int.h" |
|
|
+#include "openacc.h" |
|
|
+#include <assert.h> |
|
|
+#include <stdlib.h> |
|
|
+#include <strings.h> |
|
|
+#include <stdbool.h> |
|
|
+#include <string.h> |
|
|
+ |
|
|
+/* This lock is used to protect access to cached_base_dev, dispatchers and |
|
|
+ the (abstract) initialisation state of attached offloading devices. */ |
|
|
+ |
|
|
+static gomp_mutex_t acc_device_lock; |
|
|
+ |
|
|
+/* A cached version of the dispatcher for the global "current" accelerator type, |
|
|
+ e.g. used as the default when creating new host threads. This is the |
|
|
+ device-type equivalent of goacc_device_num (which specifies which device to |
|
|
+ use out of potentially several of the same type). If there are several |
|
|
+ devices of a given type, this points at the first one. */ |
|
|
+ |
|
|
+static struct gomp_device_descr *cached_base_dev = NULL; |
|
|
+ |
|
|
+#if defined HAVE_TLS || defined USE_EMUTLS |
|
|
+__thread struct goacc_thread *goacc_tls_data; |
|
|
+#else |
|
|
+pthread_key_t goacc_tls_key; |
|
|
+#endif |
|
|
+static pthread_key_t goacc_cleanup_key; |
|
|
+ |
|
|
+static struct goacc_thread *goacc_threads; |
|
|
+static gomp_mutex_t goacc_thread_lock; |
|
|
+ |
|
|
+/* An array of dispatchers for device types, indexed by the type. This array |
|
|
+ only references "base" devices, and other instances of the same type are |
|
|
+ found by simply indexing from each such device (which are stored linearly, |
|
|
+ grouped by device in target.c:devices). */ |
|
|
+static struct gomp_device_descr *dispatchers[_ACC_device_hwm] = { 0 }; |
|
|
+ |
|
|
+attribute_hidden void |
|
|
+goacc_register (struct gomp_device_descr *disp) |
|
|
+{ |
|
|
+ /* Only register the 0th device here. */ |
|
|
+ if (disp->target_id != 0) |
|
|
+ return; |
|
|
+ |
|
|
+ gomp_mutex_lock (&acc_device_lock); |
|
|
+ |
|
|
+ assert (acc_device_type (disp->type) != acc_device_none |
|
|
+ && acc_device_type (disp->type) != acc_device_default |
|
|
+ && acc_device_type (disp->type) != acc_device_not_host); |
|
|
+ assert (!dispatchers[disp->type]); |
|
|
+ dispatchers[disp->type] = disp; |
|
|
+ |
|
|
+ gomp_mutex_unlock (&acc_device_lock); |
|
|
+} |
|
|
+ |
|
|
+static const char * |
|
|
+name_of_acc_device_t (enum acc_device_t type) |
|
|
+{ |
|
|
+ switch (type) |
|
|
+ { |
|
|
+ case acc_device_none: return "none"; |
|
|
+ case acc_device_default: return "default"; |
|
|
+ case acc_device_host: return "host"; |
|
|
+ case acc_device_not_host: return "not_host"; |
|
|
+ case acc_device_nvidia: return "nvidia"; |
|
|
+ default: gomp_fatal ("unknown device type %u", (unsigned) type); |
|
|
+ } |
|
|
+} |
|
|
+ |
|
|
+/* ACC_DEVICE_LOCK must be held before calling this function. If FAIL_IS_ERROR |
|
|
+ is true, this function raises an error if there are no devices of type D, |
|
|
+ otherwise it returns NULL in that case. */ |
|
|
+ |
|
|
+static struct gomp_device_descr * |
|
|
+resolve_device (acc_device_t d, bool fail_is_error) |
|
|
+{ |
|
|
+ acc_device_t d_arg = d; |
|
|
+ |
|
|
+ switch (d) |
|
|
+ { |
|
|
+ case acc_device_default: |
|
|
+ { |
|
|
+ if (goacc_device_type) |
|
|
+ { |
|
|
+ /* Lookup the named device. */ |
|
|
+ if (!strcasecmp (goacc_device_type, "host")) |
|
|
+ { |
|
|
+ d = acc_device_host; |
|
|
+ goto found; |
|
|
+ } |
|
|
+ |
|
|
+ if (fail_is_error) |
|
|
+ { |
|
|
+ gomp_mutex_unlock (&acc_device_lock); |
|
|
+ gomp_fatal ("device type %s not supported", goacc_device_type); |
|
|
+ } |
|
|
+ else |
|
|
+ return NULL; |
|
|
+ } |
|
|
+ |
|
|
+ /* No default device specified, so start scanning for any non-host |
|
|
+ device that is available. */ |
|
|
+ d = acc_device_not_host; |
|
|
+ } |
|
|
+ /* FALLTHROUGH */ |
|
|
+ |
|
|
+ case acc_device_not_host: |
|
|
+ if (d_arg == acc_device_default) |
|
|
+ { |
|
|
+ d = acc_device_host; |
|
|
+ goto found; |
|
|
+ } |
|
|
+ if (fail_is_error) |
|
|
+ { |
|
|
+ gomp_mutex_unlock (&acc_device_lock); |
|
|
+ gomp_fatal ("no device found"); |
|
|
+ } |
|
|
+ else |
|
|
+ return NULL; |
|
|
+ break; |
|
|
+ |
|
|
+ case acc_device_host: |
|
|
+ break; |
|
|
+ |
|
|
+ default: |
|
|
+ if (d > _ACC_device_hwm) |
|
|
+ { |
|
|
+ if (fail_is_error) |
|
|
+ goto unsupported_device; |
|
|
+ else |
|
|
+ return NULL; |
|
|
+ } |
|
|
+ break; |
|
|
+ } |
|
|
+ found: |
|
|
+ |
|
|
+ assert (d != acc_device_none |
|
|
+ && d != acc_device_default |
|
|
+ && d != acc_device_not_host); |
|
|
+ |
|
|
+ if (dispatchers[d] == NULL && fail_is_error) |
|
|
+ { |
|
|
+ unsupported_device: |
|
|
+ gomp_mutex_unlock (&acc_device_lock); |
|
|
+ gomp_fatal ("device type %s not supported", name_of_acc_device_t (d)); |
|
|
+ } |
|
|
+ |
|
|
+ return dispatchers[d]; |
|
|
+} |
|
|
+ |
|
|
+/* Emit a suitable error if no device of a particular type is available, or |
|
|
+ the given device number is out-of-range. */ |
|
|
+static void |
|
|
+acc_dev_num_out_of_range (acc_device_t d, int ord, int ndevs) |
|
|
+{ |
|
|
+ if (ndevs == 0) |
|
|
+ gomp_fatal ("no devices of type %s available", name_of_acc_device_t (d)); |
|
|
+ else |
|
|
+ gomp_fatal ("device %u out of range", ord); |
|
|
+} |
|
|
+ |
|
|
+/* This is called when plugins have been initialized, and serves to call |
|
|
+ (indirectly) the target's device_init hook. Calling multiple times without |
|
|
+ an intervening acc_shutdown_1 call is an error. ACC_DEVICE_LOCK must be |
|
|
+ held before calling this function. */ |
|
|
+ |
|
|
+static struct gomp_device_descr * |
|
|
+acc_init_1 (acc_device_t d) |
|
|
+{ |
|
|
+ struct gomp_device_descr *base_dev, *acc_dev; |
|
|
+ int ndevs; |
|
|
+ |
|
|
+ base_dev = resolve_device (d, true); |
|
|
+ |
|
|
+ ndevs = base_dev->get_num_devices_func (); |
|
|
+ |
|
|
+ if (ndevs <= 0 || goacc_device_num >= ndevs) |
|
|
+ acc_dev_num_out_of_range (d, goacc_device_num, ndevs); |
|
|
+ |
|
|
+ acc_dev = &base_dev[goacc_device_num]; |
|
|
+ |
|
|
+ gomp_mutex_lock (&acc_dev->lock); |
|
|
+ if (acc_dev->state == GOMP_DEVICE_INITIALIZED) |
|
|
+ { |
|
|
+ gomp_mutex_unlock (&acc_dev->lock); |
|
|
+ gomp_fatal ("device already active"); |
|
|
+ } |
|
|
+ |
|
|
+ gomp_init_device (acc_dev); |
|
|
+ gomp_mutex_unlock (&acc_dev->lock); |
|
|
+ |
|
|
+ return base_dev; |
|
|
+} |
|
|
+ |
|
|
+/* ACC_DEVICE_LOCK must be held before calling this function. */ |
|
|
+ |
|
|
+static void |
|
|
+acc_shutdown_1 (acc_device_t d) |
|
|
+{ |
|
|
+ struct gomp_device_descr *base_dev; |
|
|
+ struct goacc_thread *walk; |
|
|
+ int ndevs, i; |
|
|
+ bool devices_active = false; |
|
|
+ |
|
|
+ /* Get the base device for this device type. */ |
|
|
+ base_dev = resolve_device (d, true); |
|
|
+ |
|
|
+ ndevs = base_dev->get_num_devices_func (); |
|
|
+ |
|
|
+ gomp_mutex_lock (&goacc_thread_lock); |
|
|
+ |
|
|
+ /* Free target-specific TLS data and close all devices. */ |
|
|
+ for (walk = goacc_threads; walk != NULL; walk = walk->next) |
|
|
+ { |
|
|
+ if (walk->target_tls) |
|
|
+ base_dev->openacc.destroy_thread_data_func (walk->target_tls); |
|
|
+ |
|
|
+ walk->target_tls = NULL; |
|
|
+ |
|
|
+ /* Similarly, if this happens then user code has done something weird. */ |
|
|
+ if (walk->saved_bound_dev) |
|
|
+ { |
|
|
+ gomp_mutex_unlock (&goacc_thread_lock); |
|
|
+ gomp_fatal ("shutdown during host fallback"); |
|
|
+ } |
|
|
+ |
|
|
+ if (walk->dev) |
|
|
+ { |
|
|
+ gomp_mutex_lock (&walk->dev->lock); |
|
|
+ gomp_free_memmap (&walk->dev->mem_map); |
|
|
+ gomp_mutex_unlock (&walk->dev->lock); |
|
|
+ |
|
|
+ walk->dev = NULL; |
|
|
+ walk->base_dev = NULL; |
|
|
+ } |
|
|
+ } |
|
|
+ |
|
|
+ gomp_mutex_unlock (&goacc_thread_lock); |
|
|
+ |
|
|
+ /* Close all the devices of this type that have been opened. */ |
|
|
+ bool ret = true; |
|
|
+ for (i = 0; i < ndevs; i++) |
|
|
+ { |
|
|
+ struct gomp_device_descr *acc_dev = &base_dev[i]; |
|
|
+ gomp_mutex_lock (&acc_dev->lock); |
|
|
+ if (acc_dev->state == GOMP_DEVICE_INITIALIZED) |
|
|
+ { |
|
|
+ devices_active = true; |
|
|
+ ret &= acc_dev->fini_device_func (acc_dev->target_id); |
|
|
+ acc_dev->state = GOMP_DEVICE_UNINITIALIZED; |
|
|
+ } |
|
|
+ gomp_mutex_unlock (&acc_dev->lock); |
|
|
+ } |
|
|
+ |
|
|
+ if (!ret) |
|
|
+ gomp_fatal ("device finalization failed"); |
|
|
+ |
|
|
+ if (!devices_active) |
|
|
+ gomp_fatal ("no device initialized"); |
|
|
+} |
|
|
+ |
|
|
+static struct goacc_thread * |
|
|
+goacc_new_thread (void) |
|
|
+{ |
|
|
+ struct goacc_thread *thr = gomp_malloc (sizeof (struct gomp_thread)); |
|
|
+ |
|
|
+#if defined HAVE_TLS || defined USE_EMUTLS |
|
|
+ goacc_tls_data = thr; |
|
|
+#else |
|
|
+ pthread_setspecific (goacc_tls_key, thr); |
|
|
+#endif |
|
|
+ |
|
|
+ pthread_setspecific (goacc_cleanup_key, thr); |
|
|
+ |
|
|
+ gomp_mutex_lock (&goacc_thread_lock); |
|
|
+ thr->next = goacc_threads; |
|
|
+ goacc_threads = thr; |
|
|
+ gomp_mutex_unlock (&goacc_thread_lock); |
|
|
+ |
|
|
+ return thr; |
|
|
+} |
|
|
+ |
|
|
+static void |
|
|
+goacc_destroy_thread (void *data) |
|
|
+{ |
|
|
+ struct goacc_thread *thr = data, *walk, *prev; |
|
|
+ |
|
|
+ gomp_mutex_lock (&goacc_thread_lock); |
|
|
+ |
|
|
+ if (thr) |
|
|
+ { |
|
|
+ struct gomp_device_descr *acc_dev = thr->dev; |
|
|
+ |
|
|
+ if (acc_dev && thr->target_tls) |
|
|
+ { |
|
|
+ acc_dev->openacc.destroy_thread_data_func (thr->target_tls); |
|
|
+ thr->target_tls = NULL; |
|
|
+ } |
|
|
+ |
|
|
+ assert (!thr->mapped_data); |
|
|
+ |
|
|
+ /* Remove from thread list. */ |
|
|
+ for (prev = NULL, walk = goacc_threads; walk; |
|
|
+ prev = walk, walk = walk->next) |
|
|
+ if (walk == thr) |
|
|
+ { |
|
|
+ if (prev == NULL) |
|
|
+ goacc_threads = walk->next; |
|
|
+ else |
|
|
+ prev->next = walk->next; |
|
|
+ |
|
|
+ free (thr); |
|
|
+ |
|
|
+ break; |
|
|
+ } |
|
|
+ |
|
|
+ assert (walk); |
|
|
+ } |
|
|
+ |
|
|
+ gomp_mutex_unlock (&goacc_thread_lock); |
|
|
+} |
|
|
+ |
|
|
+/* Use the ORD'th device instance for the current host thread (or -1 for the |
|
|
+ current global default). The device (and the runtime) must be initialised |
|
|
+ before calling this function. */ |
|
|
+ |
|
|
+void |
|
|
+goacc_attach_host_thread_to_device (int ord) |
|
|
+{ |
|
|
+ struct goacc_thread *thr = goacc_thread (); |
|
|
+ struct gomp_device_descr *acc_dev = NULL, *base_dev = NULL; |
|
|
+ int num_devices; |
|
|
+ |
|
|
+ if (thr && thr->dev && (thr->dev->target_id == ord || ord < 0)) |
|
|
+ return; |
|
|
+ |
|
|
+ if (ord < 0) |
|
|
+ ord = goacc_device_num; |
|
|
+ |
|
|
+ /* Decide which type of device to use. If the current thread has a device |
|
|
+ type already (e.g. set by acc_set_device_type), use that, else use the |
|
|
+ global default. */ |
|
|
+ if (thr && thr->base_dev) |
|
|
+ base_dev = thr->base_dev; |
|
|
+ else |
|
|
+ { |
|
|
+ assert (cached_base_dev); |
|
|
+ base_dev = cached_base_dev; |
|
|
+ } |
|
|
+ |
|
|
+ num_devices = base_dev->get_num_devices_func (); |
|
|
+ if (num_devices <= 0 || ord >= num_devices) |
|
|
+ acc_dev_num_out_of_range (acc_device_type (base_dev->type), ord, |
|
|
+ num_devices); |
|
|
+ |
|
|
+ if (!thr) |
|
|
+ thr = goacc_new_thread (); |
|
|
+ |
|
|
+ thr->base_dev = base_dev; |
|
|
+ thr->dev = acc_dev = &base_dev[ord]; |
|
|
+ thr->saved_bound_dev = NULL; |
|
|
+ |
|
|
+ thr->target_tls |
|
|
+ = acc_dev->openacc.create_thread_data_func (ord); |
|
|
+ |
|
|
+ acc_dev->openacc.async_set_async_func (acc_async_sync); |
|
|
+} |
|
|
+ |
|
|
+/* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of |
|
|
+ init/shutdown is per-process or per-thread. We choose per-process. */ |
|
|
+ |
|
|
+void |
|
|
+acc_init (acc_device_t d) |
|
|
+{ |
|
|
+ gomp_mutex_lock (&acc_device_lock); |
|
|
+ |
|
|
+ cached_base_dev = acc_init_1 (d); |
|
|
+ |
|
|
+ gomp_mutex_unlock (&acc_device_lock); |
|
|
+ |
|
|
+ goacc_attach_host_thread_to_device (-1); |
|
|
+} |
|
|
+ |
|
|
+ialias (acc_init) |
|
|
+ |
|
|
+void |
|
|
+acc_shutdown (acc_device_t d) |
|
|
+{ |
|
|
+ gomp_mutex_lock (&acc_device_lock); |
|
|
+ |
|
|
+ acc_shutdown_1 (d); |
|
|
+ |
|
|
+ gomp_mutex_unlock (&acc_device_lock); |
|
|
+} |
|
|
+ |
|
|
+ialias (acc_shutdown) |
|
|
+ |
|
|
+int |
|
|
+acc_get_num_devices (acc_device_t d) |
|
|
+{ |
|
|
+ int n = 0; |
|
|
+ struct gomp_device_descr *acc_dev; |
|
|
+ |
|
|
+ if (d == acc_device_none) |
|
|
+ return 0; |
|
|
+ |
|
|
+ gomp_mutex_lock (&acc_device_lock); |
|
|
+ acc_dev = resolve_device (d, false); |
|
|
+ gomp_mutex_unlock (&acc_device_lock); |
|
|
+ |
|
|
+ if (!acc_dev) |
|
|
+ return 0; |
|
|
+ |
|
|
+ n = acc_dev->get_num_devices_func (); |
|
|
+ if (n < 0) |
|
|
+ n = 0; |
|
|
+ |
|
|
+ return n; |
|
|
+} |
|
|
+ |
|
|
+ialias (acc_get_num_devices) |
|
|
+ |
|
|
+/* Set the device type for the current thread only (using the current global |
|
|
+ default device number), initialising that device if necessary. Also set the |
|
|
+ default device type for new threads to D. */ |
|
|
+ |
|
|
+void |
|
|
+acc_set_device_type (acc_device_t d) |
|
|
+{ |
|
|
+ struct gomp_device_descr *base_dev, *acc_dev; |
|
|
+ struct goacc_thread *thr = goacc_thread (); |
|
|
+ |
|
|
+ gomp_mutex_lock (&acc_device_lock); |
|
|
+ |
|
|
+ cached_base_dev = base_dev = resolve_device (d, true); |
|
|
+ acc_dev = &base_dev[goacc_device_num]; |
|
|
+ |
|
|
+ gomp_mutex_lock (&acc_dev->lock); |
|
|
+ if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED) |
|
|
+ gomp_init_device (acc_dev); |
|
|
+ gomp_mutex_unlock (&acc_dev->lock); |
|
|
+ |
|
|
+ gomp_mutex_unlock (&acc_device_lock); |
|
|
+ |
|
|
+ /* We're changing device type: invalidate the current thread's dev and |
|
|
+ base_dev pointers. */ |
|
|
+ if (thr && thr->base_dev != base_dev) |
|
|
+ { |
|
|
+ thr->base_dev = thr->dev = NULL; |
|
|
+ } |
|
|
+ |
|
|
+ goacc_attach_host_thread_to_device (-1); |
|
|
+} |
|
|
+ |
|
|
+ialias (acc_set_device_type) |
|
|
+ |
|
|
+acc_device_t |
|
|
+acc_get_device_type (void) |
|
|
+{ |
|
|
+ acc_device_t res = acc_device_none; |
|
|
+ struct gomp_device_descr *dev; |
|
|
+ struct goacc_thread *thr = goacc_thread (); |
|
|
+ |
|
|
+ if (thr && thr->base_dev) |
|
|
+ res = acc_device_type (thr->base_dev->type); |
|
|
+ else |
|
|
+ { |
|
|
+ gomp_mutex_lock (&acc_device_lock); |
|
|
+ dev = resolve_device (acc_device_default, true); |
|
|
+ gomp_mutex_unlock (&acc_device_lock); |
|
|
+ res = acc_device_type (dev->type); |
|
|
+ } |
|
|
+ |
|
|
+ assert (res != acc_device_default |
|
|
+ && res != acc_device_not_host); |
|
|
+ |
|
|
+ return res; |
|
|
+} |
|
|
+ |
|
|
+ialias (acc_get_device_type) |
|
|
+ |
|
|
+int |
|
|
+acc_get_device_num (acc_device_t d) |
|
|
+{ |
|
|
+ const struct gomp_device_descr *dev; |
|
|
+ struct goacc_thread *thr = goacc_thread (); |
|
|
+ |
|
|
+ if (d >= _ACC_device_hwm) |
|
|
+ gomp_fatal ("unknown device type %u", (unsigned) d); |
|
|
+ |
|
|
+ gomp_mutex_lock (&acc_device_lock); |
|
|
+ dev = resolve_device (d, true); |
|
|
+ gomp_mutex_unlock (&acc_device_lock); |
|
|
+ |
|
|
+ if (thr && thr->base_dev == dev && thr->dev) |
|
|
+ return thr->dev->target_id; |
|
|
+ |
|
|
+ return goacc_device_num; |
|
|
+} |
|
|
+ |
|
|
+ialias (acc_get_device_num) |
|
|
+ |
|
|
+void |
|
|
+acc_set_device_num (int ord, acc_device_t d) |
|
|
+{ |
|
|
+ struct gomp_device_descr *base_dev, *acc_dev; |
|
|
+ int num_devices; |
|
|
+ |
|
|
+ if (ord < 0) |
|
|
+ ord = goacc_device_num; |
|
|
+ |
|
|
+ if ((int) d == 0) |
|
|
+ /* Set whatever device is being used by the current host thread to use |
|
|
+ device instance ORD. It's unclear if this is supposed to affect other |
|
|
+ host threads too (OpenACC 2.0 (3.2.4) acc_set_device_num). */ |
|
|
+ goacc_attach_host_thread_to_device (ord); |
|
|
+ else |
|
|
+ { |
|
|
+ gomp_mutex_lock (&acc_device_lock); |
|
|
+ |
|
|
+ cached_base_dev = base_dev = resolve_device (d, true); |
|
|
+ |
|
|
+ num_devices = base_dev->get_num_devices_func (); |
|
|
+ |
|
|
+ if (num_devices <= 0 || ord >= num_devices) |
|
|
+ acc_dev_num_out_of_range (d, ord, num_devices); |
|
|
+ |
|
|
+ acc_dev = &base_dev[ord]; |
|
|
+ |
|
|
+ gomp_mutex_lock (&acc_dev->lock); |
|
|
+ if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED) |
|
|
+ gomp_init_device (acc_dev); |
|
|
+ gomp_mutex_unlock (&acc_dev->lock); |
|
|
+ |
|
|
+ gomp_mutex_unlock (&acc_device_lock); |
|
|
+ |
|
|
+ goacc_attach_host_thread_to_device (ord); |
|
|
+ } |
|
|
+ |
|
|
+ goacc_device_num = ord; |
|
|
+} |
|
|
+ |
|
|
+ialias (acc_set_device_num) |
|
|
+ |
|
|
+int |
|
|
+acc_on_device (acc_device_t dev) |
|
|
+{ |
|
|
+ return dev == acc_device_host || dev == acc_device_none; |
|
|
+} |
|
|
+ |
|
|
+ialias (acc_on_device) |
|
|
+ |
|
|
+attribute_hidden void |
|
|
+goacc_runtime_initialize (void) |
|
|
+{ |
|
|
+ gomp_mutex_init (&acc_device_lock); |
|
|
+ |
|
|
+#if !(defined HAVE_TLS || defined USE_EMUTLS) |
|
|
+ pthread_key_create (&goacc_tls_key, NULL); |
|
|
+#endif |
|
|
+ |
|
|
+ pthread_key_create (&goacc_cleanup_key, goacc_destroy_thread); |
|
|
+ |
|
|
+ cached_base_dev = NULL; |
|
|
+ |
|
|
+ goacc_threads = NULL; |
|
|
+ gomp_mutex_init (&goacc_thread_lock); |
|
|
+ |
|
|
+ /* Initialize and register the 'host' device type. */ |
|
|
+ goacc_host_init (); |
|
|
+} |
|
|
+ |
|
|
+/* Compiler helper functions */ |
|
|
+ |
|
|
+attribute_hidden void |
|
|
+goacc_save_and_set_bind (acc_device_t d) |
|
|
+{ |
|
|
+ struct goacc_thread *thr = goacc_thread (); |
|
|
+ |
|
|
+ assert (!thr->saved_bound_dev); |
|
|
+ |
|
|
+ thr->saved_bound_dev = thr->dev; |
|
|
+ thr->dev = dispatchers[d]; |
|
|
+} |
|
|
+ |
|
|
+attribute_hidden void |
|
|
+goacc_restore_bind (void) |
|
|
+{ |
|
|
+ struct goacc_thread *thr = goacc_thread (); |
|
|
+ |
|
|
+ thr->dev = thr->saved_bound_dev; |
|
|
+ thr->saved_bound_dev = NULL; |
|
|
+} |
|
|
+ |
|
|
+/* This is called from any OpenACC support function that may need to implicitly |
|
|
+ initialize the libgomp runtime, either globally or from a new host thread. |
|
|
+ On exit "goacc_thread" will return a valid & populated thread block. */ |
|
|
+ |
|
|
+attribute_hidden void |
|
|
+goacc_lazy_initialize (void) |
|
|
+{ |
|
|
+ struct goacc_thread *thr = goacc_thread (); |
|
|
+ |
|
|
+ if (thr && thr->dev) |
|
|
+ return; |
|
|
+ |
|
|
+ if (!cached_base_dev) |
|
|
+ acc_init (acc_device_default); |
|
|
+ else |
|
|
+ goacc_attach_host_thread_to_device (-1); |
|
|
+} |
|
|
--- libgomp/oacc-int.h.jj 2016-07-13 16:57:04.400535794 +0200 |
|
|
+++ libgomp/oacc-int.h 2016-07-13 16:57:04.400535794 +0200 |
|
|
@@ -0,0 +1,106 @@ |
|
|
+/* OpenACC Runtime - internal declarations |
|
|
+ |
|
|
+ Copyright (C) 2013-2016 Free Software Foundation, Inc. |
|
|
+ |
|
|
+ Contributed by Mentor Embedded. |
|
|
+ |
|
|
+ This file is part of the GNU Offloading and Multi Processing Library |
|
|
+ (libgomp). |
|
|
+ |
|
|
+ Libgomp is free software; you can redistribute it and/or modify it |
|
|
+ under the terms of the GNU General Public License as published by |
|
|
+ the Free Software Foundation; either version 3, or (at your option) |
|
|
+ any later version. |
|
|
+ |
|
|
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
|
|
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
|
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
|
+ more details. |
|
|
+ |
|
|
+ Under Section 7 of GPL version 3, you are granted additional |
|
|
+ permissions described in the GCC Runtime Library Exception, version |
|
|
+ 3.1, as published by the Free Software Foundation. |
|
|
+ |
|
|
+ You should have received a copy of the GNU General Public License and |
|
|
+ a copy of the GCC Runtime Library Exception along with this program; |
|
|
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
|
|
+ <http://www.gnu.org/licenses/>. */ |
|
|
+ |
|
|
+/* This file contains data types and function declarations that are not |
|
|
+ part of the official OpenACC user interface. There are declarations |
|
|
+ in here that are part of the GNU OpenACC ABI, in that the compiler is |
|
|
+ required to know about them and use them. |
|
|
+ |
|
|
+ The convention is that the all caps prefix "GOACC" is used group items |
|
|
+ that are part of the external ABI, and the lower case prefix "goacc" |
|
|
+ is used group items that are completely private to the library. */ |
|
|
+ |
|
|
+#ifndef OACC_INT_H |
|
|
+#define OACC_INT_H 1 |
|
|
+ |
|
|
+#include "openacc.h" |
|
|
+#include "config.h" |
|
|
+#include <stddef.h> |
|
|
+#include <stdbool.h> |
|
|
+#include <stdarg.h> |
|
|
+ |
|
|
+#ifdef HAVE_ATTRIBUTE_VISIBILITY |
|
|
+# pragma GCC visibility push(hidden) |
|
|
+#endif |
|
|
+ |
|
|
+static inline enum acc_device_t |
|
|
+acc_device_type (enum offload_target_type type) |
|
|
+{ |
|
|
+ return (enum acc_device_t) type; |
|
|
+} |
|
|
+ |
|
|
+struct goacc_thread |
|
|
+{ |
|
|
+ /* The base device for the current thread. */ |
|
|
+ struct gomp_device_descr *base_dev; |
|
|
+ |
|
|
+ /* The device for the current thread. */ |
|
|
+ struct gomp_device_descr *dev; |
|
|
+ |
|
|
+ struct gomp_device_descr *saved_bound_dev; |
|
|
+ |
|
|
+ /* This is a linked list of data mapped by the "acc data" pragma, following |
|
|
+ strictly push/pop semantics according to lexical scope. */ |
|
|
+ struct target_mem_desc *mapped_data; |
|
|
+ |
|
|
+ /* These structures form a list: this is the next thread in that list. */ |
|
|
+ struct goacc_thread *next; |
|
|
+ |
|
|
+ /* Target-specific data (used by plugin). */ |
|
|
+ void *target_tls; |
|
|
+}; |
|
|
+ |
|
|
+#if defined HAVE_TLS || defined USE_EMUTLS |
|
|
+extern __thread struct goacc_thread *goacc_tls_data; |
|
|
+static inline struct goacc_thread * |
|
|
+goacc_thread (void) |
|
|
+{ |
|
|
+ return goacc_tls_data; |
|
|
+} |
|
|
+#else |
|
|
+extern pthread_key_t goacc_tls_key; |
|
|
+static inline struct goacc_thread * |
|
|
+goacc_thread (void) |
|
|
+{ |
|
|
+ return pthread_getspecific (goacc_tls_key); |
|
|
+} |
|
|
+#endif |
|
|
+ |
|
|
+void goacc_register (struct gomp_device_descr *) __GOACC_NOTHROW; |
|
|
+void goacc_attach_host_thread_to_device (int); |
|
|
+void goacc_runtime_initialize (void); |
|
|
+void goacc_save_and_set_bind (acc_device_t); |
|
|
+void goacc_restore_bind (void); |
|
|
+void goacc_lazy_initialize (void); |
|
|
+void goacc_host_init (void); |
|
|
+ |
|
|
+#ifdef HAVE_ATTRIBUTE_VISIBILITY |
|
|
+# pragma GCC visibility pop |
|
|
+#endif |
|
|
+ |
|
|
+#endif |
|
|
--- libgomp/oacc-host.c.jj 2016-07-13 16:57:13.489423096 +0200 |
|
|
+++ libgomp/oacc-host.c 2016-07-13 16:57:13.489423096 +0200 |
|
|
@@ -0,0 +1,266 @@ |
|
|
+/* OpenACC Runtime Library: acc_device_host. |
|
|
+ |
|
|
+ Copyright (C) 2013-2016 Free Software Foundation, Inc. |
|
|
+ |
|
|
+ Contributed by Mentor Embedded. |
|
|
+ |
|
|
+ This file is part of the GNU Offloading and Multi Processing Library |
|
|
+ (libgomp). |
|
|
+ |
|
|
+ Libgomp is free software; you can redistribute it and/or modify it |
|
|
+ under the terms of the GNU General Public License as published by |
|
|
+ the Free Software Foundation; either version 3, or (at your option) |
|
|
+ any later version. |
|
|
+ |
|
|
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
|
|
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
|
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
|
+ more details. |
|
|
+ |
|
|
+ Under Section 7 of GPL version 3, you are granted additional |
|
|
+ permissions described in the GCC Runtime Library Exception, version |
|
|
+ 3.1, as published by the Free Software Foundation. |
|
|
+ |
|
|
+ You should have received a copy of the GNU General Public License and |
|
|
+ a copy of the GCC Runtime Library Exception along with this program; |
|
|
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
|
|
+ <http://www.gnu.org/licenses/>. */ |
|
|
+ |
|
|
+#include "libgomp.h" |
|
|
+#include "oacc-int.h" |
|
|
+#include "gomp-constants.h" |
|
|
+ |
|
|
+#include <stdbool.h> |
|
|
+#include <stddef.h> |
|
|
+#include <stdint.h> |
|
|
+ |
|
|
+static struct gomp_device_descr host_dispatch; |
|
|
+ |
|
|
+static const char * |
|
|
+host_get_name (void) |
|
|
+{ |
|
|
+ return host_dispatch.name; |
|
|
+} |
|
|
+ |
|
|
+static unsigned int |
|
|
+host_get_caps (void) |
|
|
+{ |
|
|
+ return host_dispatch.capabilities; |
|
|
+} |
|
|
+ |
|
|
+static int |
|
|
+host_get_type (void) |
|
|
+{ |
|
|
+ return host_dispatch.type; |
|
|
+} |
|
|
+ |
|
|
+static int |
|
|
+host_get_num_devices (void) |
|
|
+{ |
|
|
+ return 1; |
|
|
+} |
|
|
+ |
|
|
+static bool |
|
|
+host_init_device (int n __attribute__ ((unused))) |
|
|
+{ |
|
|
+ return true; |
|
|
+} |
|
|
+ |
|
|
+static bool |
|
|
+host_fini_device (int n __attribute__ ((unused))) |
|
|
+{ |
|
|
+ return true; |
|
|
+} |
|
|
+ |
|
|
+static unsigned |
|
|
+host_version (void) |
|
|
+{ |
|
|
+ return GOMP_VERSION; |
|
|
+} |
|
|
+ |
|
|
+static int |
|
|
+host_load_image (int n __attribute__ ((unused)), |
|
|
+ unsigned v __attribute__ ((unused)), |
|
|
+ const void *t __attribute__ ((unused)), |
|
|
+ struct addr_pair **r __attribute__ ((unused))) |
|
|
+{ |
|
|
+ return 0; |
|
|
+} |
|
|
+ |
|
|
+static bool |
|
|
+host_unload_image (int n __attribute__ ((unused)), |
|
|
+ unsigned v __attribute__ ((unused)), |
|
|
+ const void *t __attribute__ ((unused))) |
|
|
+{ |
|
|
+ return true; |
|
|
+} |
|
|
+ |
|
|
+static void * |
|
|
+host_alloc (int n __attribute__ ((unused)), size_t s) |
|
|
+{ |
|
|
+ return gomp_malloc (s); |
|
|
+} |
|
|
+ |
|
|
+static bool |
|
|
+host_free (int n __attribute__ ((unused)), void *p) |
|
|
+{ |
|
|
+ free (p); |
|
|
+ return true; |
|
|
+} |
|
|
+ |
|
|
+static bool |
|
|
+host_dev2host (int n __attribute__ ((unused)), |
|
|
+ void *h __attribute__ ((unused)), |
|
|
+ const void *d __attribute__ ((unused)), |
|
|
+ size_t s __attribute__ ((unused))) |
|
|
+{ |
|
|
+ return true; |
|
|
+} |
|
|
+ |
|
|
+static bool |
|
|
+host_host2dev (int n __attribute__ ((unused)), |
|
|
+ void *d __attribute__ ((unused)), |
|
|
+ const void *h __attribute__ ((unused)), |
|
|
+ size_t s __attribute__ ((unused))) |
|
|
+{ |
|
|
+ return true; |
|
|
+} |
|
|
+ |
|
|
+static void |
|
|
+host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars, |
|
|
+ void **args __attribute__((unused))) |
|
|
+{ |
|
|
+ void (*fn)(void *) = (void (*)(void *)) fn_ptr; |
|
|
+ |
|
|
+ fn (vars); |
|
|
+} |
|
|
+ |
|
|
+static void |
|
|
+host_openacc_exec (void (*fn) (void *), |
|
|
+ size_t mapnum __attribute__ ((unused)), |
|
|
+ void **hostaddrs, |
|
|
+ void **devaddrs __attribute__ ((unused)), |
|
|
+ int async __attribute__ ((unused)), |
|
|
+ unsigned *dims __attribute ((unused)), |
|
|
+ void *targ_mem_desc __attribute__ ((unused))) |
|
|
+{ |
|
|
+ fn (hostaddrs); |
|
|
+} |
|
|
+ |
|
|
+static void |
|
|
+host_openacc_register_async_cleanup (void *targ_mem_desc __attribute__ ((unused)), |
|
|
+ int async __attribute__ ((unused))) |
|
|
+{ |
|
|
+} |
|
|
+ |
|
|
+static int |
|
|
+host_openacc_async_test (int async __attribute__ ((unused))) |
|
|
+{ |
|
|
+ return 1; |
|
|
+} |
|
|
+ |
|
|
+static int |
|
|
+host_openacc_async_test_all (void) |
|
|
+{ |
|
|
+ return 1; |
|
|
+} |
|
|
+ |
|
|
+static void |
|
|
+host_openacc_async_wait (int async __attribute__ ((unused))) |
|
|
+{ |
|
|
+} |
|
|
+ |
|
|
+static void |
|
|
+host_openacc_async_wait_async (int async1 __attribute__ ((unused)), |
|
|
+ int async2 __attribute__ ((unused))) |
|
|
+{ |
|
|
+} |
|
|
+ |
|
|
+static void |
|
|
+host_openacc_async_wait_all (void) |
|
|
+{ |
|
|
+} |
|
|
+ |
|
|
+static void |
|
|
+host_openacc_async_wait_all_async (int async __attribute__ ((unused))) |
|
|
+{ |
|
|
+} |
|
|
+ |
|
|
+static void |
|
|
+host_openacc_async_set_async (int async __attribute__ ((unused))) |
|
|
+{ |
|
|
+} |
|
|
+ |
|
|
+static void * |
|
|
+host_openacc_create_thread_data (int ord __attribute__ ((unused))) |
|
|
+{ |
|
|
+ return NULL; |
|
|
+} |
|
|
+ |
|
|
+static void |
|
|
+host_openacc_destroy_thread_data (void *tls_data __attribute__ ((unused))) |
|
|
+{ |
|
|
+} |
|
|
+ |
|
|
+static struct gomp_device_descr host_dispatch = |
|
|
+ { |
|
|
+ .name = "host", |
|
|
+ .capabilities = (GOMP_OFFLOAD_CAP_SHARED_MEM |
|
|
+ | GOMP_OFFLOAD_CAP_NATIVE_EXEC |
|
|
+ | GOMP_OFFLOAD_CAP_OPENACC_200), |
|
|
+ .target_id = 0, |
|
|
+ .type = OFFLOAD_TARGET_TYPE_HOST, |
|
|
+ |
|
|
+ .get_name_func = host_get_name, |
|
|
+ .get_caps_func = host_get_caps, |
|
|
+ .get_type_func = host_get_type, |
|
|
+ .get_num_devices_func = host_get_num_devices, |
|
|
+ .init_device_func = host_init_device, |
|
|
+ .fini_device_func = host_fini_device, |
|
|
+ .version_func = host_version, |
|
|
+ .load_image_func = host_load_image, |
|
|
+ .unload_image_func = host_unload_image, |
|
|
+ .alloc_func = host_alloc, |
|
|
+ .free_func = host_free, |
|
|
+ .dev2host_func = host_dev2host, |
|
|
+ .host2dev_func = host_host2dev, |
|
|
+ .run_func = host_run, |
|
|
+ |
|
|
+ .mem_map = { NULL }, |
|
|
+ /* .lock initilized in goacc_host_init. */ |
|
|
+ .state = GOMP_DEVICE_UNINITIALIZED, |
|
|
+ |
|
|
+ .openacc = { |
|
|
+ .data_environ = NULL, |
|
|
+ |
|
|
+ .exec_func = host_openacc_exec, |
|
|
+ |
|
|
+ .register_async_cleanup_func = host_openacc_register_async_cleanup, |
|
|
+ |
|
|
+ .async_test_func = host_openacc_async_test, |
|
|
+ .async_test_all_func = host_openacc_async_test_all, |
|
|
+ .async_wait_func = host_openacc_async_wait, |
|
|
+ .async_wait_async_func = host_openacc_async_wait_async, |
|
|
+ .async_wait_all_func = host_openacc_async_wait_all, |
|
|
+ .async_wait_all_async_func = host_openacc_async_wait_all_async, |
|
|
+ .async_set_async_func = host_openacc_async_set_async, |
|
|
+ |
|
|
+ .create_thread_data_func = host_openacc_create_thread_data, |
|
|
+ .destroy_thread_data_func = host_openacc_destroy_thread_data, |
|
|
+ |
|
|
+ .cuda = { |
|
|
+ .get_current_device_func = NULL, |
|
|
+ .get_current_context_func = NULL, |
|
|
+ .get_stream_func = NULL, |
|
|
+ .set_stream_func = NULL, |
|
|
+ } |
|
|
+ } |
|
|
+ }; |
|
|
+ |
|
|
+/* Initialize and register this device type. */ |
|
|
+void |
|
|
+goacc_host_init (void) |
|
|
+{ |
|
|
+ gomp_mutex_init (&host_dispatch.lock); |
|
|
+ goacc_register (&host_dispatch); |
|
|
+} |
|
|
--- libgomp/oacc-parallel.c.jj 2016-07-13 16:57:04.399535807 +0200 |
|
|
+++ libgomp/oacc-parallel.c 2016-07-14 18:53:06.694996381 +0200 |
|
|
@@ -0,0 +1,241 @@ |
|
|
+/* Copyright (C) 2013-2016 Free Software Foundation, Inc. |
|
|
+ |
|
|
+ Contributed by Mentor Embedded. |
|
|
+ |
|
|
+ This file is part of the GNU Offloading and Multi Processing Library |
|
|
+ (libgomp). |
|
|
+ |
|
|
+ Libgomp is free software; you can redistribute it and/or modify it |
|
|
+ under the terms of the GNU General Public License as published by |
|
|
+ the Free Software Foundation; either version 3, or (at your option) |
|
|
+ any later version. |
|
|
+ |
|
|
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
|
|
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
|
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
|
+ more details. |
|
|
+ |
|
|
+ Under Section 7 of GPL version 3, you are granted additional |
|
|
+ permissions described in the GCC Runtime Library Exception, version |
|
|
+ 3.1, as published by the Free Software Foundation. |
|
|
+ |
|
|
+ You should have received a copy of the GNU General Public License and |
|
|
+ a copy of the GCC Runtime Library Exception along with this program; |
|
|
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
|
|
+ <http://www.gnu.org/licenses/>. */ |
|
|
+ |
|
|
+/* This file handles OpenACC constructs. */ |
|
|
+ |
|
|
+#include "openacc.h" |
|
|
+#include "libgomp.h" |
|
|
+#include "libgomp_g.h" |
|
|
+#include "gomp-constants.h" |
|
|
+#include "oacc-int.h" |
|
|
+#ifdef HAVE_INTTYPES_H |
|
|
+# include <inttypes.h> /* For PRIu64. */ |
|
|
+#endif |
|
|
+#include <string.h> |
|
|
+#include <stdarg.h> |
|
|
+#include <assert.h> |
|
|
+ |
|
|
+static void goacc_wait (int async, int num_waits, va_list *ap); |
|
|
+ |
|
|
+ |
|
|
+/* Launch a possibly offloaded function on DEVICE. FN is the host fn |
|
|
+ address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory |
|
|
+ blocks to be copied to/from the device. Varadic arguments are |
|
|
+ keyed optional parameters terminated with a zero. */ |
|
|
+ |
|
|
+void |
|
|
+GOACC_parallel_keyed (int device, void (*fn) (void *), |
|
|
+ size_t mapnum, void **hostaddrs, size_t *sizes, |
|
|
+ unsigned short *kinds, ...) |
|
|
+{ |
|
|
+ bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; |
|
|
+ struct goacc_thread *thr; |
|
|
+ struct gomp_device_descr *acc_dev; |
|
|
+ |
|
|
+#ifdef HAVE_INTTYPES_H |
|
|
+ gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", |
|
|
+ __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); |
|
|
+#else |
|
|
+ gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", |
|
|
+ __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); |
|
|
+#endif |
|
|
+ goacc_lazy_initialize (); |
|
|
+ |
|
|
+ thr = goacc_thread (); |
|
|
+ acc_dev = thr->dev; |
|
|
+ |
|
|
+ /* Host fallback if "if" clause is false or if the current device is set to |
|
|
+ the host. */ |
|
|
+ if (host_fallback) |
|
|
+ { |
|
|
+ goacc_save_and_set_bind (acc_device_host); |
|
|
+ fn (hostaddrs); |
|
|
+ goacc_restore_bind (); |
|
|
+ return; |
|
|
+ } |
|
|
+ else if (acc_device_type (acc_dev->type) == acc_device_host) |
|
|
+ { |
|
|
+ fn (hostaddrs); |
|
|
+ return; |
|
|
+ } |
|
|
+ |
|
|
+ /* acc_device_host is the only supported device type. */ |
|
|
+} |
|
|
+ |
|
|
+/* Legacy entry point, only provide host execution. */ |
|
|
+ |
|
|
+void |
|
|
+GOACC_parallel (int device, void (*fn) (void *), |
|
|
+ size_t mapnum, void **hostaddrs, size_t *sizes, |
|
|
+ unsigned short *kinds, |
|
|
+ int num_gangs, int num_workers, int vector_length, |
|
|
+ int async, int num_waits, ...) |
|
|
+{ |
|
|
+ goacc_save_and_set_bind (acc_device_host); |
|
|
+ fn (hostaddrs); |
|
|
+ goacc_restore_bind (); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+GOACC_data_start (int device, size_t mapnum, |
|
|
+ void **hostaddrs, size_t *sizes, unsigned short *kinds) |
|
|
+{ |
|
|
+ goacc_lazy_initialize (); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+GOACC_data_end (void) |
|
|
+{ |
|
|
+ gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); |
|
|
+ gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+GOACC_enter_exit_data (int device, size_t mapnum, |
|
|
+ void **hostaddrs, size_t *sizes, unsigned short *kinds, |
|
|
+ int async, int num_waits, ...) |
|
|
+{ |
|
|
+ goacc_lazy_initialize (); |
|
|
+} |
|
|
+ |
|
|
+static void |
|
|
+goacc_wait (int async, int num_waits, va_list *ap) |
|
|
+{ |
|
|
+ struct goacc_thread *thr = goacc_thread (); |
|
|
+ struct gomp_device_descr *acc_dev = thr->dev; |
|
|
+ |
|
|
+ while (num_waits--) |
|
|
+ { |
|
|
+ int qid = va_arg (*ap, int); |
|
|
+ |
|
|
+ if (acc_async_test (qid)) |
|
|
+ continue; |
|
|
+ |
|
|
+ if (async == acc_async_sync) |
|
|
+ acc_wait (qid); |
|
|
+ else if (qid == async) |
|
|
+ ;/* If we're waiting on the same asynchronous queue as we're |
|
|
+ launching on, the queue itself will order work as |
|
|
+ required, so there's no need to wait explicitly. */ |
|
|
+ else |
|
|
+ acc_dev->openacc.async_wait_async_func (qid, async); |
|
|
+ } |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+GOACC_update (int device, size_t mapnum, |
|
|
+ void **hostaddrs, size_t *sizes, unsigned short *kinds, |
|
|
+ int async, int num_waits, ...) |
|
|
+{ |
|
|
+ goacc_lazy_initialize (); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+GOACC_wait (int async, int num_waits, ...) |
|
|
+{ |
|
|
+ if (num_waits) |
|
|
+ { |
|
|
+ va_list ap; |
|
|
+ |
|
|
+ va_start (ap, num_waits); |
|
|
+ goacc_wait (async, num_waits, &ap); |
|
|
+ va_end (ap); |
|
|
+ } |
|
|
+ else if (async == acc_async_sync) |
|
|
+ acc_wait_all (); |
|
|
+ else if (async == acc_async_noval) |
|
|
+ goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval); |
|
|
+} |
|
|
+ |
|
|
+int |
|
|
+GOACC_get_num_threads (void) |
|
|
+{ |
|
|
+ return 1; |
|
|
+} |
|
|
+ |
|
|
+int |
|
|
+GOACC_get_thread_num (void) |
|
|
+{ |
|
|
+ return 0; |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+GOACC_declare (int device, size_t mapnum, |
|
|
+ void **hostaddrs, size_t *sizes, unsigned short *kinds) |
|
|
+{ |
|
|
+ int i; |
|
|
+ |
|
|
+ for (i = 0; i < mapnum; i++) |
|
|
+ { |
|
|
+ unsigned char kind = kinds[i] & 0xff; |
|
|
+ |
|
|
+ if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET) |
|
|
+ continue; |
|
|
+ |
|
|
+ switch (kind) |
|
|
+ { |
|
|
+ case GOMP_MAP_FORCE_ALLOC: |
|
|
+ case GOMP_MAP_FORCE_FROM: |
|
|
+ case GOMP_MAP_FORCE_TO: |
|
|
+ case GOMP_MAP_POINTER: |
|
|
+ case GOMP_MAP_DELETE: |
|
|
+ GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], |
|
|
+ &kinds[i], 0, 0); |
|
|
+ break; |
|
|
+ |
|
|
+ case GOMP_MAP_FORCE_DEVICEPTR: |
|
|
+ break; |
|
|
+ |
|
|
+ case GOMP_MAP_ALLOC: |
|
|
+ if (!acc_is_present (hostaddrs[i], sizes[i])) |
|
|
+ GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], |
|
|
+ &kinds[i], 0, 0); |
|
|
+ break; |
|
|
+ |
|
|
+ case GOMP_MAP_TO: |
|
|
+ GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], |
|
|
+ &kinds[i], 0, 0); |
|
|
+ |
|
|
+ break; |
|
|
+ |
|
|
+ case GOMP_MAP_FROM: |
|
|
+ kinds[i] = GOMP_MAP_FORCE_FROM; |
|
|
+ GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], |
|
|
+ &kinds[i], 0, 0); |
|
|
+ break; |
|
|
+ |
|
|
+ case GOMP_MAP_FORCE_PRESENT: |
|
|
+ if (!acc_is_present (hostaddrs[i], sizes[i])) |
|
|
+ gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i], |
|
|
+ (unsigned long) sizes[i]); |
|
|
+ break; |
|
|
+ |
|
|
+ default: |
|
|
+ assert (0); |
|
|
+ break; |
|
|
+ } |
|
|
+ } |
|
|
+} |
|
|
--- libgomp/oacc-cuda.c.jj 2016-07-13 16:57:04.432535397 +0200 |
|
|
+++ libgomp/oacc-cuda.c 2016-07-13 16:57:04.432535397 +0200 |
|
|
@@ -0,0 +1,86 @@ |
|
|
+/* OpenACC Runtime Library: CUDA support glue. |
|
|
+ |
|
|
+ Copyright (C) 2014-2016 Free Software Foundation, Inc. |
|
|
+ |
|
|
+ Contributed by Mentor Embedded. |
|
|
+ |
|
|
+ This file is part of the GNU Offloading and Multi Processing Library |
|
|
+ (libgomp). |
|
|
+ |
|
|
+ Libgomp is free software; you can redistribute it and/or modify it |
|
|
+ under the terms of the GNU General Public License as published by |
|
|
+ the Free Software Foundation; either version 3, or (at your option) |
|
|
+ any later version. |
|
|
+ |
|
|
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
|
|
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
|
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
|
+ more details. |
|
|
+ |
|
|
+ Under Section 7 of GPL version 3, you are granted additional |
|
|
+ permissions described in the GCC Runtime Library Exception, version |
|
|
+ 3.1, as published by the Free Software Foundation. |
|
|
+ |
|
|
+ You should have received a copy of the GNU General Public License and |
|
|
+ a copy of the GCC Runtime Library Exception along with this program; |
|
|
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
|
|
+ <http://www.gnu.org/licenses/>. */ |
|
|
+ |
|
|
+#include "openacc.h" |
|
|
+#include "config.h" |
|
|
+#include "libgomp.h" |
|
|
+#include "oacc-int.h" |
|
|
+ |
|
|
+void * |
|
|
+acc_get_current_cuda_device (void) |
|
|
+{ |
|
|
+ struct goacc_thread *thr = goacc_thread (); |
|
|
+ |
|
|
+ if (thr && thr->dev && thr->dev->openacc.cuda.get_current_device_func) |
|
|
+ return thr->dev->openacc.cuda.get_current_device_func (); |
|
|
+ |
|
|
+ return NULL; |
|
|
+} |
|
|
+ |
|
|
+void * |
|
|
+acc_get_current_cuda_context (void) |
|
|
+{ |
|
|
+ struct goacc_thread *thr = goacc_thread (); |
|
|
+ |
|
|
+ if (thr && thr->dev && thr->dev->openacc.cuda.get_current_context_func) |
|
|
+ return thr->dev->openacc.cuda.get_current_context_func (); |
|
|
+ |
|
|
+ return NULL; |
|
|
+} |
|
|
+ |
|
|
+void * |
|
|
+acc_get_cuda_stream (int async) |
|
|
+{ |
|
|
+ struct goacc_thread *thr = goacc_thread (); |
|
|
+ |
|
|
+ if (async < 0) |
|
|
+ return NULL; |
|
|
+ |
|
|
+ if (thr && thr->dev && thr->dev->openacc.cuda.get_stream_func) |
|
|
+ return thr->dev->openacc.cuda.get_stream_func (async); |
|
|
+ |
|
|
+ return NULL; |
|
|
+} |
|
|
+ |
|
|
+int |
|
|
+acc_set_cuda_stream (int async, void *stream) |
|
|
+{ |
|
|
+ struct goacc_thread *thr; |
|
|
+ |
|
|
+ if (async < 0 || stream == NULL) |
|
|
+ return 0; |
|
|
+ |
|
|
+ goacc_lazy_initialize (); |
|
|
+ |
|
|
+ thr = goacc_thread (); |
|
|
+ |
|
|
+ if (thr && thr->dev && thr->dev->openacc.cuda.set_stream_func) |
|
|
+ return thr->dev->openacc.cuda.set_stream_func (async, stream); |
|
|
+ |
|
|
+ return -1; |
|
|
+} |
|
|
--- libgomp/openacc_lib.h.jj 2016-07-13 16:57:13.486423134 +0200 |
|
|
+++ libgomp/openacc_lib.h 2016-07-13 16:57:13.486423134 +0200 |
|
|
@@ -0,0 +1,382 @@ |
|
|
+! OpenACC Runtime Library Definitions. -*- mode: fortran -*- |
|
|
+ |
|
|
+! Copyright (C) 2014-2016 Free Software Foundation, Inc. |
|
|
+ |
|
|
+! Contributed by Tobias Burnus <burnus@net-b.de> |
|
|
+! and Mentor Embedded. |
|
|
+ |
|
|
+! This file is part of the GNU Offloading and Multi Processing Library |
|
|
+! (libgomp). |
|
|
+ |
|
|
+! Libgomp is free software; you can redistribute it and/or modify it |
|
|
+! under the terms of the GNU General Public License as published by |
|
|
+! the Free Software Foundation; either version 3, or (at your option) |
|
|
+! any later version. |
|
|
+ |
|
|
+! Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
|
|
+! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
|
+! FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
|
+! more details. |
|
|
+ |
|
|
+! Under Section 7 of GPL version 3, you are granted additional |
|
|
+! permissions described in the GCC Runtime Library Exception, version |
|
|
+! 3.1, as published by the Free Software Foundation. |
|
|
+ |
|
|
+! You should have received a copy of the GNU General Public License and |
|
|
+! a copy of the GCC Runtime Library Exception along with this program; |
|
|
+! see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
|
|
+! <http://www.gnu.org/licenses/>. |
|
|
+ |
|
|
+! NOTE: Due to the use of dimension (..), the code only works when compiled |
|
|
+! with -std=f2008ts/gnu/legacy but not with other standard settings. |
|
|
+! Alternatively, the user can use the module version, which permits |
|
|
+! compilation with -std=f95. |
|
|
+ |
|
|
+ integer, parameter :: acc_device_kind = 4 |
|
|
+ |
|
|
+! Keep in sync with include/gomp-constants.h. |
|
|
+ integer (acc_device_kind), parameter :: acc_device_none = 0 |
|
|
+ integer (acc_device_kind), parameter :: acc_device_default = 1 |
|
|
+ integer (acc_device_kind), parameter :: acc_device_host = 2 |
|
|
+! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 |
|
|
+! removed. |
|
|
+ integer (acc_device_kind), parameter :: acc_device_not_host = 4 |
|
|
+ integer (acc_device_kind), parameter :: acc_device_nvidia = 5 |
|
|
+ |
|
|
+ integer, parameter :: acc_handle_kind = 4 |
|
|
+ |
|
|
+! Keep in sync with include/gomp-constants.h. |
|
|
+ integer (acc_handle_kind), parameter :: acc_async_noval = -1 |
|
|
+ integer (acc_handle_kind), parameter :: acc_async_sync = -2 |
|
|
+ |
|
|
+ integer, parameter :: openacc_version = 201306 |
|
|
+ |
|
|
+ interface acc_get_num_devices |
|
|
+ function acc_get_num_devices_h (d) |
|
|
+ import acc_device_kind |
|
|
+ integer acc_get_num_devices_h |
|
|
+ integer (acc_device_kind) d |
|
|
+ end function |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_set_device_type |
|
|
+ subroutine acc_set_device_type_h (d) |
|
|
+ import acc_device_kind |
|
|
+ integer (acc_device_kind) d |
|
|
+ end subroutine |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_get_device_type |
|
|
+ function acc_get_device_type_h () |
|
|
+ import acc_device_kind |
|
|
+ integer (acc_device_kind) acc_get_device_type_h |
|
|
+ end function |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_set_device_num |
|
|
+ subroutine acc_set_device_num_h (n, d) |
|
|
+ import acc_device_kind |
|
|
+ integer n |
|
|
+ integer (acc_device_kind) d |
|
|
+ end subroutine |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_get_device_num |
|
|
+ function acc_get_device_num_h (d) |
|
|
+ import acc_device_kind |
|
|
+ integer acc_get_device_num_h |
|
|
+ integer (acc_device_kind) d |
|
|
+ end function |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_async_test |
|
|
+ function acc_async_test_h (a) |
|
|
+ logical acc_async_test_h |
|
|
+ integer a |
|
|
+ end function |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_async_test_all |
|
|
+ function acc_async_test_all_h () |
|
|
+ logical acc_async_test_all_h |
|
|
+ end function |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_wait |
|
|
+ subroutine acc_wait_h (a) |
|
|
+ integer a |
|
|
+ end subroutine |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_wait_async |
|
|
+ subroutine acc_wait_async_h (a1, a2) |
|
|
+ integer a1, a2 |
|
|
+ end subroutine |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_wait_all |
|
|
+ subroutine acc_wait_all_h () |
|
|
+ end subroutine |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_wait_all_async |
|
|
+ subroutine acc_wait_all_async_h (a) |
|
|
+ integer a |
|
|
+ end subroutine |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_init |
|
|
+ subroutine acc_init_h (devicetype) |
|
|
+ import acc_device_kind |
|
|
+ integer (acc_device_kind) devicetype |
|
|
+ end subroutine |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_shutdown |
|
|
+ subroutine acc_shutdown_h (devicetype) |
|
|
+ import acc_device_kind |
|
|
+ integer (acc_device_kind) devicetype |
|
|
+ end subroutine |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_on_device |
|
|
+ function acc_on_device_h (devicetype) |
|
|
+ import acc_device_kind |
|
|
+ logical acc_on_device_h |
|
|
+ integer (acc_device_kind) devicetype |
|
|
+ end function |
|
|
+ end interface |
|
|
+ |
|
|
+ ! acc_malloc: Only available in C/C++ |
|
|
+ ! acc_free: Only available in C/C++ |
|
|
+ |
|
|
+ interface acc_copyin |
|
|
+ subroutine acc_copyin_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t |
|
|
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_copyin_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t |
|
|
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_copyin_array_h (a) |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ end subroutine |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_present_or_copyin |
|
|
+ subroutine acc_present_or_copyin_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t |
|
|
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_present_or_copyin_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t |
|
|
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_present_or_copyin_array_h (a) |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ end subroutine |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_pcopyin |
|
|
+ subroutine acc_pcopyin_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t |
|
|
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_pcopyin_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t |
|
|
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_pcopyin_array_h (a) |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ end subroutine |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_create |
|
|
+ subroutine acc_create_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t |
|
|
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_create_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t |
|
|
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_create_array_h (a) |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ end subroutine |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_present_or_create |
|
|
+ subroutine acc_present_or_create_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t |
|
|
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_present_or_create_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t |
|
|
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_present_or_create_array_h (a) |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ end subroutine |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_pcreate |
|
|
+ subroutine acc_pcreate_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t |
|
|
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_pcreate_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t |
|
|
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_pcreate_array_h (a) |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ end subroutine |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_copyout |
|
|
+ subroutine acc_copyout_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t |
|
|
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_copyout_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t |
|
|
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_copyout_array_h (a) |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ end subroutine |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_delete |
|
|
+ subroutine acc_delete_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t |
|
|
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_delete_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t |
|
|
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_delete_array_h (a) |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ end subroutine |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_update_device |
|
|
+ subroutine acc_update_device_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t |
|
|
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_update_device_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t |
|
|
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_update_device_array_h (a) |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ end subroutine |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_update_self |
|
|
+ subroutine acc_update_self_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t |
|
|
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_update_self_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t |
|
|
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_update_self_array_h (a) |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ end subroutine |
|
|
+ end interface |
|
|
+ |
|
|
+ ! acc_map_data: Only available in C/C++ |
|
|
+ ! acc_unmap_data: Only available in C/C++ |
|
|
+ ! acc_deviceptr: Only available in C/C++ |
|
|
+ ! acc_ostptr: Only available in C/C++ |
|
|
+ |
|
|
+ interface acc_is_present |
|
|
+ function acc_is_present_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t |
|
|
+ logical acc_is_present_32_h |
|
|
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ end function |
|
|
+ |
|
|
+ function acc_is_present_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t |
|
|
+ logical acc_is_present_64_h |
|
|
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ end function |
|
|
+ |
|
|
+ function acc_is_present_array_h (a) |
|
|
+ logical acc_is_present_array_h |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ end function |
|
|
+ end interface |
|
|
+ |
|
|
+ ! acc_memcpy_to_device: Only available in C/C++ |
|
|
+ ! acc_memcpy_from_device: Only available in C/C++ |
|
|
--- libgomp/gomp-constants.h.jj 2016-07-14 16:02:47.212545826 +0200 |
|
|
+++ libgomp/gomp-constants.h 2016-05-26 21:04:40.000000000 +0200 |
|
|
@@ -0,0 +1,259 @@ |
|
|
+/* Communication between GCC and libgomp. |
|
|
+ |
|
|
+ Copyright (C) 2014-2015 Free Software Foundation, Inc. |
|
|
+ |
|
|
+ Contributed by Mentor Embedded. |
|
|
+ |
|
|
+ This file is part of the GNU Offloading and Multi Processing Library |
|
|
+ (libgomp). |
|
|
+ |
|
|
+ Libgomp is free software; you can redistribute it and/or modify it |
|
|
+ under the terms of the GNU General Public License as published by |
|
|
+ the Free Software Foundation; either version 3, or (at your option) |
|
|
+ any later version. |
|
|
+ |
|
|
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
|
|
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
|
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
|
+ more details. |
|
|
+ |
|
|
+ Under Section 7 of GPL version 3, you are granted additional |
|
|
+ permissions described in the GCC Runtime Library Exception, version |
|
|
+ 3.1, as published by the Free Software Foundation. |
|
|
+ |
|
|
+ You should have received a copy of the GNU General Public License and |
|
|
+ a copy of the GCC Runtime Library Exception along with this program; |
|
|
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
|
|
+ <http://www.gnu.org/licenses/>. */ |
|
|
+ |
|
|
+#ifndef GOMP_CONSTANTS_H |
|
|
+#define GOMP_CONSTANTS_H 1 |
|
|
+ |
|
|
+/* Memory mapping types. */ |
|
|
+ |
|
|
+/* One byte. */ |
|
|
+#define GOMP_MAP_LAST (1 << 8) |
|
|
+ |
|
|
+#define GOMP_MAP_FLAG_TO (1 << 0) |
|
|
+#define GOMP_MAP_FLAG_FROM (1 << 1) |
|
|
+/* Special map kinds, enumerated starting here. */ |
|
|
+#define GOMP_MAP_FLAG_SPECIAL_0 (1 << 2) |
|
|
+#define GOMP_MAP_FLAG_SPECIAL_1 (1 << 3) |
|
|
+#define GOMP_MAP_FLAG_SPECIAL_2 (1 << 4) |
|
|
+#define GOMP_MAP_FLAG_SPECIAL (GOMP_MAP_FLAG_SPECIAL_1 \ |
|
|
+ | GOMP_MAP_FLAG_SPECIAL_0) |
|
|
+/* Flag to force a specific behavior (or else, trigger a run-time error). */ |
|
|
+#define GOMP_MAP_FLAG_FORCE (1 << 7) |
|
|
+ |
|
|
+enum gomp_map_kind |
|
|
+ { |
|
|
+ /* If not already present, allocate. */ |
|
|
+ GOMP_MAP_ALLOC = 0, |
|
|
+ /* ..., and copy to device. */ |
|
|
+ GOMP_MAP_TO = (GOMP_MAP_ALLOC | GOMP_MAP_FLAG_TO), |
|
|
+ /* ..., and copy from device. */ |
|
|
+ GOMP_MAP_FROM = (GOMP_MAP_ALLOC | GOMP_MAP_FLAG_FROM), |
|
|
+ /* ..., and copy to and from device. */ |
|
|
+ GOMP_MAP_TOFROM = (GOMP_MAP_TO | GOMP_MAP_FROM), |
|
|
+ /* The following kind is an internal only map kind, used for pointer based |
|
|
+ array sections. OMP_CLAUSE_SIZE for these is not the pointer size, |
|
|
+ which is implicitly POINTER_SIZE_UNITS, but the bias. */ |
|
|
+ GOMP_MAP_POINTER = (GOMP_MAP_FLAG_SPECIAL_0 | 0), |
|
|
+ /* Also internal, behaves like GOMP_MAP_TO, but additionally any |
|
|
+ GOMP_MAP_POINTER records consecutive after it which have addresses |
|
|
+ falling into that range will not be ignored if GOMP_MAP_TO_PSET wasn't |
|
|
+ mapped already. */ |
|
|
+ GOMP_MAP_TO_PSET = (GOMP_MAP_FLAG_SPECIAL_0 | 1), |
|
|
+ /* Must already be present. */ |
|
|
+ GOMP_MAP_FORCE_PRESENT = (GOMP_MAP_FLAG_SPECIAL_0 | 2), |
|
|
+ /* Deallocate a mapping, without copying from device. */ |
|
|
+ GOMP_MAP_DELETE = (GOMP_MAP_FLAG_SPECIAL_0 | 3), |
|
|
+ /* Is a device pointer. OMP_CLAUSE_SIZE for these is unused; is implicitly |
|
|
+ POINTER_SIZE_UNITS. */ |
|
|
+ GOMP_MAP_FORCE_DEVICEPTR = (GOMP_MAP_FLAG_SPECIAL_1 | 0), |
|
|
+ /* Do not map, copy bits for firstprivate instead. */ |
|
|
+ /* OpenACC device_resident. */ |
|
|
+ GOMP_MAP_DEVICE_RESIDENT = (GOMP_MAP_FLAG_SPECIAL_1 | 1), |
|
|
+ /* OpenACC link. */ |
|
|
+ GOMP_MAP_LINK = (GOMP_MAP_FLAG_SPECIAL_1 | 2), |
|
|
+ /* Allocate. */ |
|
|
+ GOMP_MAP_FIRSTPRIVATE = (GOMP_MAP_FLAG_SPECIAL | 0), |
|
|
+ /* Similarly, but store the value in the pointer rather than |
|
|
+ pointed by the pointer. */ |
|
|
+ GOMP_MAP_FIRSTPRIVATE_INT = (GOMP_MAP_FLAG_SPECIAL | 1), |
|
|
+ /* Pointer translate host address into device address and copy that |
|
|
+ back to host. */ |
|
|
+ GOMP_MAP_USE_DEVICE_PTR = (GOMP_MAP_FLAG_SPECIAL | 2), |
|
|
+ /* Allocate a zero length array section. Prefer next non-zero length |
|
|
+ mapping over previous non-zero length mapping over zero length mapping |
|
|
+ at the address. If not already mapped, do nothing (and pointer translate |
|
|
+ to NULL). */ |
|
|
+ GOMP_MAP_ZERO_LEN_ARRAY_SECTION = (GOMP_MAP_FLAG_SPECIAL | 3), |
|
|
+ /* Allocate. */ |
|
|
+ GOMP_MAP_FORCE_ALLOC = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_ALLOC), |
|
|
+ /* ..., and copy to device. */ |
|
|
+ GOMP_MAP_FORCE_TO = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_TO), |
|
|
+ /* ..., and copy from device. */ |
|
|
+ GOMP_MAP_FORCE_FROM = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_FROM), |
|
|
+ /* ..., and copy to and from device. */ |
|
|
+ GOMP_MAP_FORCE_TOFROM = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_TOFROM), |
|
|
+ /* If not already present, allocate. And unconditionally copy to |
|
|
+ device. */ |
|
|
+ GOMP_MAP_ALWAYS_TO = (GOMP_MAP_FLAG_SPECIAL_2 | GOMP_MAP_TO), |
|
|
+ /* If not already present, allocate. And unconditionally copy from |
|
|
+ device. */ |
|
|
+ GOMP_MAP_ALWAYS_FROM = (GOMP_MAP_FLAG_SPECIAL_2 |
|
|
+ | GOMP_MAP_FROM), |
|
|
+ /* If not already present, allocate. And unconditionally copy to and from |
|
|
+ device. */ |
|
|
+ GOMP_MAP_ALWAYS_TOFROM = (GOMP_MAP_FLAG_SPECIAL_2 |
|
|
+ | GOMP_MAP_TOFROM), |
|
|
+ /* Map a sparse struct; the address is the base of the structure, alignment |
|
|
+ it's required alignment, and size is the number of adjacent entries |
|
|
+ that belong to the struct. The adjacent entries should be sorted by |
|
|
+ increasing address, so it is easy to determine lowest needed address |
|
|
+ (address of the first adjacent entry) and highest needed address |
|
|
+ (address of the last adjacent entry plus its size). */ |
|
|
+ GOMP_MAP_STRUCT = (GOMP_MAP_FLAG_SPECIAL_2 |
|
|
+ | GOMP_MAP_FLAG_SPECIAL | 0), |
|
|
+ /* On a location of a pointer/reference that is assumed to be already mapped |
|
|
+ earlier, store the translated address of the preceeding mapping. |
|
|
+ No refcount is bumped by this, and the store is done unconditionally. */ |
|
|
+ GOMP_MAP_ALWAYS_POINTER = (GOMP_MAP_FLAG_SPECIAL_2 |
|
|
+ | GOMP_MAP_FLAG_SPECIAL | 1), |
|
|
+ /* Forced deallocation of zero length array section. */ |
|
|
+ GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION |
|
|
+ = (GOMP_MAP_FLAG_SPECIAL_2 |
|
|
+ | GOMP_MAP_FLAG_SPECIAL | 3), |
|
|
+ /* Decrement usage count and deallocate if zero. */ |
|
|
+ GOMP_MAP_RELEASE = (GOMP_MAP_FLAG_SPECIAL_2 |
|
|
+ | GOMP_MAP_DELETE), |
|
|
+ |
|
|
+ /* Internal to GCC, not used in libgomp. */ |
|
|
+ /* Do not map, but pointer assign a pointer instead. */ |
|
|
+ GOMP_MAP_FIRSTPRIVATE_POINTER = (GOMP_MAP_LAST | 1), |
|
|
+ /* Do not map, but pointer assign a reference instead. */ |
|
|
+ GOMP_MAP_FIRSTPRIVATE_REFERENCE = (GOMP_MAP_LAST | 2) |
|
|
+ }; |
|
|
+ |
|
|
+#define GOMP_MAP_COPY_TO_P(X) \ |
|
|
+ (!((X) & GOMP_MAP_FLAG_SPECIAL) \ |
|
|
+ && ((X) & GOMP_MAP_FLAG_TO)) |
|
|
+ |
|
|
+#define GOMP_MAP_COPY_FROM_P(X) \ |
|
|
+ (!((X) & GOMP_MAP_FLAG_SPECIAL) \ |
|
|
+ && ((X) & GOMP_MAP_FLAG_FROM)) |
|
|
+ |
|
|
+#define GOMP_MAP_POINTER_P(X) \ |
|
|
+ ((X) == GOMP_MAP_POINTER) |
|
|
+ |
|
|
+#define GOMP_MAP_ALWAYS_TO_P(X) \ |
|
|
+ (((X) == GOMP_MAP_ALWAYS_TO) || ((X) == GOMP_MAP_ALWAYS_TOFROM)) |
|
|
+ |
|
|
+#define GOMP_MAP_ALWAYS_FROM_P(X) \ |
|
|
+ (((X) == GOMP_MAP_ALWAYS_FROM) || ((X) == GOMP_MAP_ALWAYS_TOFROM)) |
|
|
+ |
|
|
+#define GOMP_MAP_ALWAYS_P(X) \ |
|
|
+ (GOMP_MAP_ALWAYS_TO_P (X) || ((X) == GOMP_MAP_ALWAYS_FROM)) |
|
|
+ |
|
|
+ |
|
|
+/* Asynchronous behavior. Keep in sync with |
|
|
+ libgomp/{openacc.h,openacc.f90,openacc_lib.h}:acc_async_t. */ |
|
|
+ |
|
|
+#define GOMP_ASYNC_NOVAL -1 |
|
|
+#define GOMP_ASYNC_SYNC -2 |
|
|
+ |
|
|
+ |
|
|
+/* Device codes. Keep in sync with |
|
|
+ libgomp/{openacc.h,openacc.f90,openacc_lib.h}:acc_device_t as well as |
|
|
+ libgomp/libgomp-plugin.h. */ |
|
|
+#define GOMP_DEVICE_NONE 0 |
|
|
+#define GOMP_DEVICE_DEFAULT 1 |
|
|
+#define GOMP_DEVICE_HOST 2 |
|
|
+/* #define GOMP_DEVICE_HOST_NONSHM 3 removed. */ |
|
|
+#define GOMP_DEVICE_NOT_HOST 4 |
|
|
+#define GOMP_DEVICE_NVIDIA_PTX 5 |
|
|
+#define GOMP_DEVICE_INTEL_MIC 6 |
|
|
+#define GOMP_DEVICE_HSA 7 |
|
|
+ |
|
|
+#define GOMP_DEVICE_ICV -1 |
|
|
+#define GOMP_DEVICE_HOST_FALLBACK -2 |
|
|
+ |
|
|
+/* GOMP_task/GOMP_taskloop* flags argument. */ |
|
|
+#define GOMP_TASK_FLAG_UNTIED (1 << 0) |
|
|
+#define GOMP_TASK_FLAG_FINAL (1 << 1) |
|
|
+#define GOMP_TASK_FLAG_MERGEABLE (1 << 2) |
|
|
+#define GOMP_TASK_FLAG_DEPEND (1 << 3) |
|
|
+#define GOMP_TASK_FLAG_PRIORITY (1 << 4) |
|
|
+#define GOMP_TASK_FLAG_UP (1 << 8) |
|
|
+#define GOMP_TASK_FLAG_GRAINSIZE (1 << 9) |
|
|
+#define GOMP_TASK_FLAG_IF (1 << 10) |
|
|
+#define GOMP_TASK_FLAG_NOGROUP (1 << 11) |
|
|
+ |
|
|
+/* GOMP_target{_ext,update_ext,enter_exit_data} flags argument. */ |
|
|
+#define GOMP_TARGET_FLAG_NOWAIT (1 << 0) |
|
|
+#define GOMP_TARGET_FLAG_EXIT_DATA (1 << 1) |
|
|
+/* Internal to libgomp. */ |
|
|
+#define GOMP_TARGET_FLAG_UPDATE (1U << 31) |
|
|
+ |
|
|
+/* Versions of libgomp and device-specific plugins. GOMP_VERSION |
|
|
+ should be incremented whenever an ABI-incompatible change is introduced |
|
|
+ to the plugin interface defined in libgomp/libgomp.h. */ |
|
|
+#define GOMP_VERSION 1 |
|
|
+#define GOMP_VERSION_NVIDIA_PTX 1 |
|
|
+#define GOMP_VERSION_INTEL_MIC 0 |
|
|
+#define GOMP_VERSION_HSA 0 |
|
|
+ |
|
|
+#define GOMP_VERSION_PACK(LIB, DEV) (((LIB) << 16) | (DEV)) |
|
|
+#define GOMP_VERSION_LIB(PACK) (((PACK) >> 16) & 0xffff) |
|
|
+#define GOMP_VERSION_DEV(PACK) ((PACK) & 0xffff) |
|
|
+ |
|
|
+#define GOMP_DIM_GANG 0 |
|
|
+#define GOMP_DIM_WORKER 1 |
|
|
+#define GOMP_DIM_VECTOR 2 |
|
|
+#define GOMP_DIM_MAX 3 |
|
|
+#define GOMP_DIM_MASK(X) (1u << (X)) |
|
|
+ |
|
|
+/* Varadic launch arguments. End of list is marked by a zero. */ |
|
|
+#define GOMP_LAUNCH_DIM 1 /* Launch dimensions, op = mask */ |
|
|
+#define GOMP_LAUNCH_ASYNC 2 /* Async, op = cst val if not MAX */ |
|
|
+#define GOMP_LAUNCH_WAIT 3 /* Waits, op = num waits. */ |
|
|
+#define GOMP_LAUNCH_CODE_SHIFT 28 |
|
|
+#define GOMP_LAUNCH_DEVICE_SHIFT 16 |
|
|
+#define GOMP_LAUNCH_OP_SHIFT 0 |
|
|
+#define GOMP_LAUNCH_PACK(CODE,DEVICE,OP) \ |
|
|
+ (((CODE) << GOMP_LAUNCH_CODE_SHIFT) \ |
|
|
+ | ((DEVICE) << GOMP_LAUNCH_DEVICE_SHIFT) \ |
|
|
+ | ((OP) << GOMP_LAUNCH_OP_SHIFT)) |
|
|
+#define GOMP_LAUNCH_CODE(X) (((X) >> GOMP_LAUNCH_CODE_SHIFT) & 0xf) |
|
|
+#define GOMP_LAUNCH_DEVICE(X) (((X) >> GOMP_LAUNCH_DEVICE_SHIFT) & 0xfff) |
|
|
+#define GOMP_LAUNCH_OP(X) (((X) >> GOMP_LAUNCH_OP_SHIFT) & 0xffff) |
|
|
+#define GOMP_LAUNCH_OP_MAX 0xffff |
|
|
+ |
|
|
+/* Bitmask to apply in order to find out the intended device of a target |
|
|
+ argument. */ |
|
|
+#define GOMP_TARGET_ARG_DEVICE_MASK ((1 << 7) - 1) |
|
|
+/* The target argument is significant for all devices. */ |
|
|
+#define GOMP_TARGET_ARG_DEVICE_ALL 0 |
|
|
+ |
|
|
+/* Flag set when the subsequent element in the device-specific argument |
|
|
+ values. */ |
|
|
+#define GOMP_TARGET_ARG_SUBSEQUENT_PARAM (1 << 7) |
|
|
+ |
|
|
+/* Bitmask to apply to a target argument to find out the value identifier. */ |
|
|
+#define GOMP_TARGET_ARG_ID_MASK (((1 << 8) - 1) << 8) |
|
|
+/* Target argument index of NUM_TEAMS. */ |
|
|
+#define GOMP_TARGET_ARG_NUM_TEAMS (1 << 8) |
|
|
+/* Target argument index of THREAD_LIMIT. */ |
|
|
+#define GOMP_TARGET_ARG_THREAD_LIMIT (2 << 8) |
|
|
+ |
|
|
+/* If the value is directly embeded in target argument, it should be a 16-bit |
|
|
+ at most and shifted by this many bits. */ |
|
|
+#define GOMP_TARGET_ARG_VALUE_SHIFT 16 |
|
|
+ |
|
|
+/* HSA specific data structures. */ |
|
|
+ |
|
|
+/* Identifiers of device-specific target arguments. */ |
|
|
+#define GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES (1 << 8) |
|
|
+ |
|
|
+#endif |
|
|
--- libgomp/oacc-mem.c.jj 2016-07-13 16:57:04.433535385 +0200 |
|
|
+++ libgomp/oacc-mem.c 2016-07-14 15:39:44.644631308 +0200 |
|
|
@@ -0,0 +1,204 @@ |
|
|
+/* OpenACC Runtime initialization routines |
|
|
+ |
|
|
+ Copyright (C) 2013-2016 Free Software Foundation, Inc. |
|
|
+ |
|
|
+ Contributed by Mentor Embedded. |
|
|
+ |
|
|
+ This file is part of the GNU Offloading and Multi Processing Library |
|
|
+ (libgomp). |
|
|
+ |
|
|
+ Libgomp is free software; you can redistribute it and/or modify it |
|
|
+ under the terms of the GNU General Public License as published by |
|
|
+ the Free Software Foundation; either version 3, or (at your option) |
|
|
+ any later version. |
|
|
+ |
|
|
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
|
|
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
|
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
|
+ more details. |
|
|
+ |
|
|
+ Under Section 7 of GPL version 3, you are granted additional |
|
|
+ permissions described in the GCC Runtime Library Exception, version |
|
|
+ 3.1, as published by the Free Software Foundation. |
|
|
+ |
|
|
+ You should have received a copy of the GNU General Public License and |
|
|
+ a copy of the GCC Runtime Library Exception along with this program; |
|
|
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
|
|
+ <http://www.gnu.org/licenses/>. */ |
|
|
+ |
|
|
+#include "openacc.h" |
|
|
+#include "config.h" |
|
|
+#include "libgomp.h" |
|
|
+#include "gomp-constants.h" |
|
|
+#include "oacc-int.h" |
|
|
+#include <stdint.h> |
|
|
+#include <string.h> |
|
|
+#include <assert.h> |
|
|
+ |
|
|
+/* OpenACC is silent on how memory exhaustion is indicated. We return |
|
|
+ NULL. */ |
|
|
+ |
|
|
+void * |
|
|
+acc_malloc (size_t s) |
|
|
+{ |
|
|
+ if (!s) |
|
|
+ return NULL; |
|
|
+ |
|
|
+ goacc_lazy_initialize (); |
|
|
+ return malloc (s); |
|
|
+} |
|
|
+ |
|
|
+/* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event |
|
|
+ the device address is mapped. We choose to check if it mapped, |
|
|
+ and if it is, to unmap it. */ |
|
|
+void |
|
|
+acc_free (void *d) |
|
|
+{ |
|
|
+ return free (d); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+acc_memcpy_to_device (void *d, void *h, size_t s) |
|
|
+{ |
|
|
+ memmove (d, h, s); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+acc_memcpy_from_device (void *h, void *d, size_t s) |
|
|
+{ |
|
|
+ memmove (h, d, s); |
|
|
+} |
|
|
+ |
|
|
+/* Return the device pointer that corresponds to host data H. Or NULL |
|
|
+ if no mapping. */ |
|
|
+ |
|
|
+void * |
|
|
+acc_deviceptr (void *h) |
|
|
+{ |
|
|
+ goacc_lazy_initialize (); |
|
|
+ return h; |
|
|
+} |
|
|
+ |
|
|
+/* Return the host pointer that corresponds to device data D. Or NULL |
|
|
+ if no mapping. */ |
|
|
+ |
|
|
+void * |
|
|
+acc_hostptr (void *d) |
|
|
+{ |
|
|
+ goacc_lazy_initialize (); |
|
|
+ return d; |
|
|
+} |
|
|
+ |
|
|
+/* Return 1 if host data [H,+S] is present on the device. */ |
|
|
+ |
|
|
+int |
|
|
+acc_is_present (void *h, size_t s) |
|
|
+{ |
|
|
+ if (!s || !h) |
|
|
+ return 0; |
|
|
+ |
|
|
+ goacc_lazy_initialize (); |
|
|
+ return h != NULL; |
|
|
+} |
|
|
+ |
|
|
+/* Create a mapping for host [H,+S] -> device [D,+S] */ |
|
|
+ |
|
|
+void |
|
|
+acc_map_data (void *h, void *d, size_t s) |
|
|
+{ |
|
|
+ goacc_lazy_initialize (); |
|
|
+ |
|
|
+ if (d != h) |
|
|
+ gomp_fatal ("cannot map data on shared-memory system"); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+acc_unmap_data (void *h) |
|
|
+{ |
|
|
+} |
|
|
+ |
|
|
+#define FLAG_PRESENT (1 << 0) |
|
|
+#define FLAG_CREATE (1 << 1) |
|
|
+#define FLAG_COPY (1 << 2) |
|
|
+ |
|
|
+static void * |
|
|
+present_create_copy (unsigned f, void *h, size_t s) |
|
|
+{ |
|
|
+ if (!h || !s) |
|
|
+ gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s); |
|
|
+ |
|
|
+ goacc_lazy_initialize (); |
|
|
+ return h; |
|
|
+} |
|
|
+ |
|
|
+void * |
|
|
+acc_create (void *h, size_t s) |
|
|
+{ |
|
|
+ return present_create_copy (FLAG_CREATE, h, s); |
|
|
+} |
|
|
+ |
|
|
+void * |
|
|
+acc_copyin (void *h, size_t s) |
|
|
+{ |
|
|
+ return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s); |
|
|
+} |
|
|
+ |
|
|
+void * |
|
|
+acc_present_or_create (void *h, size_t s) |
|
|
+{ |
|
|
+ return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s); |
|
|
+} |
|
|
+ |
|
|
+void * |
|
|
+acc_present_or_copyin (void *h, size_t s) |
|
|
+{ |
|
|
+ return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s); |
|
|
+} |
|
|
+ |
|
|
+#define FLAG_COPYOUT (1 << 0) |
|
|
+ |
|
|
+static void |
|
|
+delete_copyout (unsigned f, void *h, size_t s, const char *libfnname) |
|
|
+{ |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+acc_delete (void *h , size_t s) |
|
|
+{ |
|
|
+ delete_copyout (0, h, s, __FUNCTION__); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+acc_copyout (void *h, size_t s) |
|
|
+{ |
|
|
+ delete_copyout (FLAG_COPYOUT, h, s, __FUNCTION__); |
|
|
+} |
|
|
+ |
|
|
+static void |
|
|
+update_dev_host (int is_dev, void *h, size_t s) |
|
|
+{ |
|
|
+ goacc_lazy_initialize (); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+acc_update_device (void *h, size_t s) |
|
|
+{ |
|
|
+ update_dev_host (1, h, s); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+acc_update_self (void *h, size_t s) |
|
|
+{ |
|
|
+ update_dev_host (0, h, s); |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, |
|
|
+ void *kinds) |
|
|
+{ |
|
|
+} |
|
|
+ |
|
|
+void |
|
|
+gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum) |
|
|
+{ |
|
|
+} |
|
|
--- libgomp/oacc-plugin.h.jj 2016-07-13 16:57:13.487423121 +0200 |
|
|
+++ libgomp/oacc-plugin.h 2016-07-13 16:57:13.487423121 +0200 |
|
|
@@ -0,0 +1,33 @@ |
|
|
+/* Copyright (C) 2014-2016 Free Software Foundation, Inc. |
|
|
+ |
|
|
+ Contributed by Mentor Embedded. |
|
|
+ |
|
|
+ This file is part of the GNU Offloading and Multi Processing Library |
|
|
+ (libgomp). |
|
|
+ |
|
|
+ Libgomp is free software; you can redistribute it and/or modify it |
|
|
+ under the terms of the GNU General Public License as published by |
|
|
+ the Free Software Foundation; either version 3, or (at your option) |
|
|
+ any later version. |
|
|
+ |
|
|
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
|
|
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
|
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
|
+ more details. |
|
|
+ |
|
|
+ Under Section 7 of GPL version 3, you are granted additional |
|
|
+ permissions described in the GCC Runtime Library Exception, version |
|
|
+ 3.1, as published by the Free Software Foundation. |
|
|
+ |
|
|
+ You should have received a copy of the GNU General Public License and |
|
|
+ a copy of the GCC Runtime Library Exception along with this program; |
|
|
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
|
|
+ <http://www.gnu.org/licenses/>. */ |
|
|
+ |
|
|
+#ifndef OACC_PLUGIN_H |
|
|
+#define OACC_PLUGIN_H 1 |
|
|
+ |
|
|
+extern void GOMP_PLUGIN_async_unmap_vars (void *, int); |
|
|
+extern void *GOMP_PLUGIN_acc_thread (void); |
|
|
+ |
|
|
+#endif |
|
|
--- libgomp/taskloop.c.jj 2016-07-13 16:57:18.935355570 +0200 |
|
|
+++ libgomp/taskloop.c 2016-07-13 16:57:18.935355570 +0200 |
|
|
@@ -0,0 +1,340 @@ |
|
|
+/* Copyright (C) 2015-2016 Free Software Foundation, Inc. |
|
|
+ Contributed by Jakub Jelinek <jakub@redhat.com>. |
|
|
+ |
|
|
+ This file is part of the GNU Offloading and Multi Processing Library |
|
|
+ (libgomp). |
|
|
+ |
|
|
+ Libgomp is free software; you can redistribute it and/or modify it |
|
|
+ under the terms of the GNU General Public License as published by |
|
|
+ the Free Software Foundation; either version 3, or (at your option) |
|
|
+ any later version. |
|
|
+ |
|
|
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
|
|
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
|
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
|
+ more details. |
|
|
+ |
|
|
+ Under Section 7 of GPL version 3, you are granted additional |
|
|
+ permissions described in the GCC Runtime Library Exception, version |
|
|
+ 3.1, as published by the Free Software Foundation. |
|
|
+ |
|
|
+ You should have received a copy of the GNU General Public License and |
|
|
+ a copy of the GCC Runtime Library Exception along with this program; |
|
|
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
|
|
+ <http://www.gnu.org/licenses/>. */ |
|
|
+ |
|
|
+/* This file handles the taskloop construct. It is included twice, once |
|
|
+ for the long and once for unsigned long long variant. */ |
|
|
+ |
|
|
+/* Called when encountering an explicit task directive. If IF_CLAUSE is |
|
|
+ false, then we must not delay in executing the task. If UNTIED is true, |
|
|
+ then the task may be executed by any member of the team. */ |
|
|
+ |
|
|
+void |
|
|
+GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), |
|
|
+ long arg_size, long arg_align, unsigned flags, |
|
|
+ unsigned long num_tasks, int priority, |
|
|
+ TYPE start, TYPE end, TYPE step) |
|
|
+{ |
|
|
+ struct gomp_thread *thr = gomp_thread (); |
|
|
+ struct gomp_team *team = thr->ts.team; |
|
|
+ |
|
|
+#ifdef HAVE_BROKEN_POSIX_SEMAPHORES |
|
|
+ /* If pthread_mutex_* is used for omp_*lock*, then each task must be |
|
|
+ tied to one thread all the time. This means UNTIED tasks must be |
|
|
+ tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN |
|
|
+ might be running on different thread than FN. */ |
|
|
+ if (cpyfn) |
|
|
+ flags &= ~GOMP_TASK_FLAG_IF; |
|
|
+ flags &= ~GOMP_TASK_FLAG_UNTIED; |
|
|
+#endif |
|
|
+ |
|
|
+ /* If parallel or taskgroup has been cancelled, don't start new tasks. */ |
|
|
+ if (team && gomp_team_barrier_cancelled (&team->barrier)) |
|
|
+ return; |
|
|
+ |
|
|
+#ifdef TYPE_is_long |
|
|
+ TYPE s = step; |
|
|
+ if (step > 0) |
|
|
+ { |
|
|
+ if (start >= end) |
|
|
+ return; |
|
|
+ s--; |
|
|
+ } |
|
|
+ else |
|
|
+ { |
|
|
+ if (start <= end) |
|
|
+ return; |
|
|
+ s++; |
|
|
+ } |
|
|
+ UTYPE n = (end - start + s) / step; |
|
|
+#else |
|
|
+ UTYPE n; |
|
|
+ if (flags & GOMP_TASK_FLAG_UP) |
|
|
+ { |
|
|
+ if (start >= end) |
|
|
+ return; |
|
|
+ n = (end - start + step - 1) / step; |
|
|
+ } |
|
|
+ else |
|
|
+ { |
|
|
+ if (start <= end) |
|
|
+ return; |
|
|
+ n = (start - end - step - 1) / -step; |
|
|
+ } |
|
|
+#endif |
|
|
+ |
|
|
+ TYPE task_step = step; |
|
|
+ unsigned long nfirst = n; |
|
|
+ if (flags & GOMP_TASK_FLAG_GRAINSIZE) |
|
|
+ { |
|
|
+ unsigned long grainsize = num_tasks; |
|
|
+#ifdef TYPE_is_long |
|
|
+ num_tasks = n / grainsize; |
|
|
+#else |
|
|
+ UTYPE ndiv = n / grainsize; |
|
|
+ num_tasks = ndiv; |
|
|
+ if (num_tasks != ndiv) |
|
|
+ num_tasks = ~0UL; |
|
|
+#endif |
|
|
+ if (num_tasks <= 1) |
|
|
+ { |
|
|
+ num_tasks = 1; |
|
|
+ task_step = end - start; |
|
|
+ } |
|
|
+ else if (num_tasks >= grainsize |
|
|
+#ifndef TYPE_is_long |
|
|
+ && num_tasks != ~0UL |
|
|
+#endif |
|
|
+ ) |
|
|
+ { |
|
|
+ UTYPE mul = num_tasks * grainsize; |
|
|
+ task_step = (TYPE) grainsize * step; |
|
|
+ if (mul != n) |
|
|
+ { |
|
|
+ task_step += step; |
|
|
+ nfirst = n - mul - 1; |
|
|
+ } |
|
|
+ } |
|
|
+ else |
|
|
+ { |
|
|
+ UTYPE div = n / num_tasks; |
|
|
+ UTYPE mod = n % num_tasks; |
|
|
+ task_step = (TYPE) div * step; |
|
|
+ if (mod) |
|
|
+ { |
|
|
+ task_step += step; |
|
|
+ nfirst = mod - 1; |
|
|
+ } |
|
|
+ } |
|
|
+ } |
|
|
+ else |
|
|
+ { |
|
|
+ if (num_tasks == 0) |
|
|
+ num_tasks = team ? team->nthreads : 1; |
|
|
+ if (num_tasks >= n) |
|
|
+ num_tasks = n; |
|
|
+ else |
|
|
+ { |
|
|
+ UTYPE div = n / num_tasks; |
|
|
+ UTYPE mod = n % num_tasks; |
|
|
+ task_step = (TYPE) div * step; |
|
|
+ if (mod) |
|
|
+ { |
|
|
+ task_step += step; |
|
|
+ nfirst = mod - 1; |
|
|
+ } |
|
|
+ } |
|
|
+ } |
|
|
+ |
|
|
+ if (flags & GOMP_TASK_FLAG_NOGROUP) |
|
|
+ { |
|
|
+ if (thr->task && thr->task->taskgroup && thr->task->taskgroup->cancelled) |
|
|
+ return; |
|
|
+ } |
|
|
+ else |
|
|
+ ialias_call (GOMP_taskgroup_start) (); |
|
|
+ |
|
|
+ if (priority > gomp_max_task_priority_var) |
|
|
+ priority = gomp_max_task_priority_var; |
|
|
+ |
|
|
+ if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL |
|
|
+ || (thr->task && thr->task->final_task) |
|
|
+ || team->task_count + num_tasks > 64 * team->nthreads) |
|
|
+ { |
|
|
+ unsigned long i; |
|
|
+ if (__builtin_expect (cpyfn != NULL, 0)) |
|
|
+ { |
|
|
+ struct gomp_task task[num_tasks]; |
|
|
+ struct gomp_task *parent = thr->task; |
|
|
+ arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1); |
|
|
+ char buf[num_tasks * arg_size + arg_align - 1]; |
|
|
+ char *arg = (char *) (((uintptr_t) buf + arg_align - 1) |
|
|
+ & ~(uintptr_t) (arg_align - 1)); |
|
|
+ char *orig_arg = arg; |
|
|
+ for (i = 0; i < num_tasks; i++) |
|
|
+ { |
|
|
+ gomp_init_task (&task[i], parent, gomp_icv (false)); |
|
|
+ task[i].priority = priority; |
|
|
+ task[i].kind = GOMP_TASK_UNDEFERRED; |
|
|
+ task[i].final_task = (thr->task && thr->task->final_task) |
|
|
+ || (flags & GOMP_TASK_FLAG_FINAL); |
|
|
+ if (thr->task) |
|
|
+ { |
|
|
+ task[i].in_tied_task = thr->task->in_tied_task; |
|
|
+ task[i].taskgroup = thr->task->taskgroup; |
|
|
+ } |
|
|
+ thr->task = &task[i]; |
|
|
+ cpyfn (arg, data); |
|
|
+ arg += arg_size; |
|
|
+ } |
|
|
+ arg = orig_arg; |
|
|
+ for (i = 0; i < num_tasks; i++) |
|
|
+ { |
|
|
+ thr->task = &task[i]; |
|
|
+ ((TYPE *)arg)[0] = start; |
|
|
+ start += task_step; |
|
|
+ ((TYPE *)arg)[1] = start; |
|
|
+ if (i == nfirst) |
|
|
+ task_step -= step; |
|
|
+ fn (arg); |
|
|
+ arg += arg_size; |
|
|
+ if (!priority_queue_empty_p (&task[i].children_queue, |
|
|
+ MEMMODEL_RELAXED)) |
|
|
+ { |
|
|
+ gomp_mutex_lock (&team->task_lock); |
|
|
+ gomp_clear_parent (&task[i].children_queue); |
|
|
+ gomp_mutex_unlock (&team->task_lock); |
|
|
+ } |
|
|
+ gomp_end_task (); |
|
|
+ } |
|
|
+ } |
|
|
+ else |
|
|
+ for (i = 0; i < num_tasks; i++) |
|
|
+ { |
|
|
+ struct gomp_task task; |
|
|
+ |
|
|
+ gomp_init_task (&task, thr->task, gomp_icv (false)); |
|
|
+ task.priority = priority; |
|
|
+ task.kind = GOMP_TASK_UNDEFERRED; |
|
|
+ task.final_task = (thr->task && thr->task->final_task) |
|
|
+ || (flags & GOMP_TASK_FLAG_FINAL); |
|
|
+ if (thr->task) |
|
|
+ { |
|
|
+ task.in_tied_task = thr->task->in_tied_task; |
|
|
+ task.taskgroup = thr->task->taskgroup; |
|
|
+ } |
|
|
+ thr->task = &task; |
|
|
+ ((TYPE *)data)[0] = start; |
|
|
+ start += task_step; |
|
|
+ ((TYPE *)data)[1] = start; |
|
|
+ if (i == nfirst) |
|
|
+ task_step -= step; |
|
|
+ fn (data); |
|
|
+ if (!priority_queue_empty_p (&task.children_queue, |
|
|
+ MEMMODEL_RELAXED)) |
|
|
+ { |
|
|
+ gomp_mutex_lock (&team->task_lock); |
|
|
+ gomp_clear_parent (&task.children_queue); |
|
|
+ gomp_mutex_unlock (&team->task_lock); |
|
|
+ } |
|
|
+ gomp_end_task (); |
|
|
+ } |
|
|
+ } |
|
|
+ else |
|
|
+ { |
|
|
+ struct gomp_task *tasks[num_tasks]; |
|
|
+ struct gomp_task *parent = thr->task; |
|
|
+ struct gomp_taskgroup *taskgroup = parent->taskgroup; |
|
|
+ char *arg; |
|
|
+ int do_wake; |
|
|
+ unsigned long i; |
|
|
+ |
|
|
+ for (i = 0; i < num_tasks; i++) |
|
|
+ { |
|
|
+ struct gomp_task *task |
|
|
+ = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1); |
|
|
+ tasks[i] = task; |
|
|
+ arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1) |
|
|
+ & ~(uintptr_t) (arg_align - 1)); |
|
|
+ gomp_init_task (task, parent, gomp_icv (false)); |
|
|
+ task->priority = priority; |
|
|
+ task->kind = GOMP_TASK_UNDEFERRED; |
|
|
+ task->in_tied_task = parent->in_tied_task; |
|
|
+ task->taskgroup = taskgroup; |
|
|
+ thr->task = task; |
|
|
+ if (cpyfn) |
|
|
+ { |
|
|
+ cpyfn (arg, data); |
|
|
+ task->copy_ctors_done = true; |
|
|
+ } |
|
|
+ else |
|
|
+ memcpy (arg, data, arg_size); |
|
|
+ ((TYPE *)arg)[0] = start; |
|
|
+ start += task_step; |
|
|
+ ((TYPE *)arg)[1] = start; |
|
|
+ if (i == nfirst) |
|
|
+ task_step -= step; |
|
|
+ thr->task = parent; |
|
|
+ task->kind = GOMP_TASK_WAITING; |
|
|
+ task->fn = fn; |
|
|
+ task->fn_data = arg; |
|
|
+ task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1; |
|
|
+ } |
|
|
+ gomp_mutex_lock (&team->task_lock); |
|
|
+ /* If parallel or taskgroup has been cancelled, don't start new |
|
|
+ tasks. */ |
|
|
+ if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier) |
|
|
+ || (taskgroup && taskgroup->cancelled)) |
|
|
+ && cpyfn == NULL, 0)) |
|
|
+ { |
|
|
+ gomp_mutex_unlock (&team->task_lock); |
|
|
+ for (i = 0; i < num_tasks; i++) |
|
|
+ { |
|
|
+ gomp_finish_task (tasks[i]); |
|
|
+ free (tasks[i]); |
|
|
+ } |
|
|
+ if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) |
|
|
+ ialias_call (GOMP_taskgroup_end) (); |
|
|
+ return; |
|
|
+ } |
|
|
+ if (taskgroup) |
|
|
+ taskgroup->num_children += num_tasks; |
|
|
+ for (i = 0; i < num_tasks; i++) |
|
|
+ { |
|
|
+ struct gomp_task *task = tasks[i]; |
|
|
+ priority_queue_insert (PQ_CHILDREN, &parent->children_queue, |
|
|
+ task, priority, |
|
|
+ PRIORITY_INSERT_BEGIN, |
|
|
+ /*last_parent_depends_on=*/false, |
|
|
+ task->parent_depends_on); |
|
|
+ if (taskgroup) |
|
|
+ priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, |
|
|
+ task, priority, PRIORITY_INSERT_BEGIN, |
|
|
+ /*last_parent_depends_on=*/false, |
|
|
+ task->parent_depends_on); |
|
|
+ priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority, |
|
|
+ PRIORITY_INSERT_END, |
|
|
+ /*last_parent_depends_on=*/false, |
|
|
+ task->parent_depends_on); |
|
|
+ ++team->task_count; |
|
|
+ ++team->task_queued_count; |
|
|
+ } |
|
|
+ gomp_team_barrier_set_task_pending (&team->barrier); |
|
|
+ if (team->task_running_count + !parent->in_tied_task |
|
|
+ < team->nthreads) |
|
|
+ { |
|
|
+ do_wake = team->nthreads - team->task_running_count |
|
|
+ - !parent->in_tied_task; |
|
|
+ if ((unsigned long) do_wake > num_tasks) |
|
|
+ do_wake = num_tasks; |
|
|
+ } |
|
|
+ else |
|
|
+ do_wake = 0; |
|
|
+ gomp_mutex_unlock (&team->task_lock); |
|
|
+ if (do_wake) |
|
|
+ gomp_team_barrier_wake (&team->barrier, do_wake); |
|
|
+ } |
|
|
+ if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) |
|
|
+ ialias_call (GOMP_taskgroup_end) (); |
|
|
+} |
|
|
--- libgomp/priority_queue.h.jj 2016-07-13 16:57:04.438535323 +0200 |
|
|
+++ libgomp/priority_queue.h 2016-07-13 16:57:04.438535323 +0200 |
|
|
@@ -0,0 +1,485 @@ |
|
|
+/* Copyright (C) 2015-2016 Free Software Foundation, Inc. |
|
|
+ Contributed by Aldy Hernandez <aldyh@redhat.com>. |
|
|
+ |
|
|
+ This file is part of the GNU Offloading and Multi Processing Library |
|
|
+ (libgomp). |
|
|
+ |
|
|
+ Libgomp is free software; you can redistribute it and/or modify it |
|
|
+ under the terms of the GNU General Public License as published by |
|
|
+ the Free Software Foundation; either version 3, or (at your option) |
|
|
+ any later version. |
|
|
+ |
|
|
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
|
|
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
|
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
|
+ more details. |
|
|
+ |
|
|
+ Under Section 7 of GPL version 3, you are granted additional |
|
|
+ permissions described in the GCC Runtime Library Exception, version |
|
|
+ 3.1, as published by the Free Software Foundation. |
|
|
+ |
|
|
+ You should have received a copy of the GNU General Public License and |
|
|
+ a copy of the GCC Runtime Library Exception along with this program; |
|
|
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
|
|
+ <http://www.gnu.org/licenses/>. */ |
|
|
+ |
|
|
+/* Header file for a priority queue of GOMP tasks. */ |
|
|
+ |
|
|
+/* ?? Perhaps all the priority_tree_* functions are complex and rare |
|
|
+ enough to go out-of-line and be moved to priority_queue.c. ?? */ |
|
|
+ |
|
|
+#ifndef _PRIORITY_QUEUE_H_ |
|
|
+#define _PRIORITY_QUEUE_H_ |
|
|
+ |
|
|
+/* One task. */ |
|
|
+ |
|
|
+struct priority_node |
|
|
+{ |
|
|
+ /* Next and previous chains in a circular doubly linked list for |
|
|
+ tasks within this task's priority. */ |
|
|
+ struct priority_node *next, *prev; |
|
|
+}; |
|
|
+ |
|
|
+/* All tasks within the same priority. */ |
|
|
+ |
|
|
+struct priority_list |
|
|
+{ |
|
|
+ /* Priority of the tasks in this set. */ |
|
|
+ int priority; |
|
|
+ |
|
|
+ /* Tasks. */ |
|
|
+ struct priority_node *tasks; |
|
|
+ |
|
|
+ /* This points to the last of the higher priority WAITING tasks. |
|
|
+ Remember that for the children queue, we have: |
|
|
+ |
|
|
+ parent_depends_on WAITING tasks. |
|
|
+ !parent_depends_on WAITING tasks. |
|
|
+ TIED tasks. |
|
|
+ |
|
|
+ This is a pointer to the last of the parent_depends_on WAITING |
|
|
+ tasks which are essentially, higher priority items within their |
|
|
+ priority. */ |
|
|
+ struct priority_node *last_parent_depends_on; |
|
|
+}; |
|
|
+ |
|
|
+/* Another splay tree instantiation, for priority_list's. */ |
|
|
+typedef struct prio_splay_tree_node_s *prio_splay_tree_node; |
|
|
+typedef struct prio_splay_tree_s *prio_splay_tree; |
|
|
+typedef struct prio_splay_tree_key_s *prio_splay_tree_key; |
|
|
+struct prio_splay_tree_key_s { |
|
|
+ /* This structure must only containing a priority_list, as we cast |
|
|
+ prio_splay_tree_key to priority_list throughout. */ |
|
|
+ struct priority_list l; |
|
|
+}; |
|
|
+#define splay_tree_prefix prio |
|
|
+#include "splay-tree.h" |
|
|
+ |
|
|
+/* The entry point into a priority queue of tasks. |
|
|
+ |
|
|
+ There are two alternate implementations with which to store tasks: |
|
|
+ as a balanced tree of sorts, or as a simple list of tasks. If |
|
|
+ there are only priority-0 items (ROOT is NULL), we use the simple |
|
|
+ list, otherwise (ROOT is non-NULL) we use the tree. */ |
|
|
+ |
|
|
+struct priority_queue |
|
|
+{ |
|
|
+ /* If t.root != NULL, this is a splay tree of priority_lists to hold |
|
|
+ all tasks. This is only used if multiple priorities are in play, |
|
|
+ otherwise we use the priority_list `l' below to hold all |
|
|
+ (priority-0) tasks. */ |
|
|
+ struct prio_splay_tree_s t; |
|
|
+ |
|
|
+ /* If T above is NULL, only priority-0 items exist, so keep them |
|
|
+ in a simple list. */ |
|
|
+ struct priority_list l; |
|
|
+}; |
|
|
+ |
|
|
+enum priority_insert_type { |
|
|
+ /* Insert at the beginning of a priority list. */ |
|
|
+ PRIORITY_INSERT_BEGIN, |
|
|
+ /* Insert at the end of a priority list. */ |
|
|
+ PRIORITY_INSERT_END |
|
|
+}; |
|
|
+ |
|
|
+/* Used to determine in which queue a given priority node belongs in. |
|
|
+ See pnode field of gomp_task. */ |
|
|
+ |
|
|
+enum priority_queue_type |
|
|
+{ |
|
|
+ PQ_TEAM, /* Node belongs in gomp_team's task_queue. */ |
|
|
+ PQ_CHILDREN, /* Node belongs in parent's children_queue. */ |
|
|
+ PQ_TASKGROUP, /* Node belongs in taskgroup->taskgroup_queue. */ |
|
|
+ PQ_IGNORED = 999 |
|
|
+}; |
|
|
+ |
|
|
+/* Priority queue implementation prototypes. */ |
|
|
+ |
|
|
+extern bool priority_queue_task_in_queue_p (enum priority_queue_type, |
|
|
+ struct priority_queue *, |
|
|
+ struct gomp_task *); |
|
|
+extern void priority_queue_dump (enum priority_queue_type, |
|
|
+ struct priority_queue *); |
|
|
+extern void priority_queue_verify (enum priority_queue_type, |
|
|
+ struct priority_queue *, bool); |
|
|
+extern void priority_tree_remove (enum priority_queue_type, |
|
|
+ struct priority_queue *, |
|
|
+ struct priority_node *); |
|
|
+extern struct gomp_task *priority_tree_next_task (enum priority_queue_type, |
|
|
+ struct priority_queue *, |
|
|
+ enum priority_queue_type, |
|
|
+ struct priority_queue *, |
|
|
+ bool *); |
|
|
+ |
|
|
+/* Return TRUE if there is more than one priority in HEAD. This is |
|
|
+ used throughout to to choose between the fast path (priority 0 only |
|
|
+ items) and a world with multiple priorities. */ |
|
|
+ |
|
|
+static inline bool |
|
|
+priority_queue_multi_p (struct priority_queue *head) |
|
|
+{ |
|
|
+ return __builtin_expect (head->t.root != NULL, 0); |
|
|
+} |
|
|
+ |
|
|
+/* Initialize a priority queue. */ |
|
|
+ |
|
|
+static inline void |
|
|
+priority_queue_init (struct priority_queue *head) |
|
|
+{ |
|
|
+ head->t.root = NULL; |
|
|
+ /* To save a few microseconds, we don't initialize head->l.priority |
|
|
+ to 0 here. It is implied that priority will be 0 if head->t.root |
|
|
+ == NULL. |
|
|
+ |
|
|
+ priority_tree_insert() will fix this when we encounter multiple |
|
|
+ priorities. */ |
|
|
+ head->l.tasks = NULL; |
|
|
+ head->l.last_parent_depends_on = NULL; |
|
|
+} |
|
|
+ |
|
|
+static inline void |
|
|
+priority_queue_free (struct priority_queue *head) |
|
|
+{ |
|
|
+ /* There's nothing to do, as tasks were freed as they were removed |
|
|
+ in priority_queue_remove. */ |
|
|
+} |
|
|
+ |
|
|
+/* Forward declarations. */ |
|
|
+static inline size_t priority_queue_offset (enum priority_queue_type); |
|
|
+static inline struct gomp_task *priority_node_to_task |
|
|
+ (enum priority_queue_type, |
|
|
+ struct priority_node *); |
|
|
+static inline struct priority_node *task_to_priority_node |
|
|
+ (enum priority_queue_type, |
|
|
+ struct gomp_task *); |
|
|
+ |
|
|
+/* Return TRUE if priority queue HEAD is empty. |
|
|
+ |
|
|
+ MODEL IS MEMMODEL_ACQUIRE if we should use an acquire atomic to |
|
|
+ read from the root of the queue, otherwise MEMMODEL_RELAXED if we |
|
|
+ should use a plain load. */ |
|
|
+ |
|
|
+static inline _Bool |
|
|
+priority_queue_empty_p (struct priority_queue *head, enum memmodel model) |
|
|
+{ |
|
|
+ /* Note: The acquire barriers on the loads here synchronize with |
|
|
+ the write of a NULL in gomp_task_run_post_remove_parent. It is |
|
|
+ not necessary that we synchronize with other non-NULL writes at |
|
|
+ this point, but we must ensure that all writes to memory by a |
|
|
+ child thread task work function are seen before we exit from |
|
|
+ GOMP_taskwait. */ |
|
|
+ if (priority_queue_multi_p (head)) |
|
|
+ { |
|
|
+ if (model == MEMMODEL_ACQUIRE) |
|
|
+ return __atomic_load_n (&head->t.root, MEMMODEL_ACQUIRE) == NULL; |
|
|
+ return head->t.root == NULL; |
|
|
+ } |
|
|
+ if (model == MEMMODEL_ACQUIRE) |
|
|
+ return __atomic_load_n (&head->l.tasks, MEMMODEL_ACQUIRE) == NULL; |
|
|
+ return head->l.tasks == NULL; |
|
|
+} |
|
|
+ |
|
|
+/* Look for a given PRIORITY in HEAD. Return it if found, otherwise |
|
|
+ return NULL. This only applies to the tree variant in HEAD. There |
|
|
+ is no point in searching for priorities in HEAD->L. */ |
|
|
+ |
|
|
+static inline struct priority_list * |
|
|
+priority_queue_lookup_priority (struct priority_queue *head, int priority) |
|
|
+{ |
|
|
+ if (head->t.root == NULL) |
|
|
+ return NULL; |
|
|
+ struct prio_splay_tree_key_s k; |
|
|
+ k.l.priority = priority; |
|
|
+ return (struct priority_list *) |
|
|
+ prio_splay_tree_lookup (&head->t, &k); |
|
|
+} |
|
|
+ |
|
|
+/* Insert task in DATA, with PRIORITY, in the priority list in LIST. |
|
|
+ LIST contains items of type TYPE. |
|
|
+ |
|
|
+ If POS is PRIORITY_INSERT_BEGIN, the new task is inserted at the |
|
|
+ top of its respective priority. If POS is PRIORITY_INSERT_END, the |
|
|
+ task is inserted at the end of its priority. |
|
|
+ |
|
|
+ If ADJUST_PARENT_DEPENDS_ON is TRUE, LIST is a children queue, and |
|
|
+ we must keep track of higher and lower priority WAITING tasks by |
|
|
+ keeping the queue's last_parent_depends_on field accurate. This |
|
|
+ only applies to the children queue, and the caller must ensure LIST |
|
|
+ is a children queue in this case. |
|
|
+ |
|
|
+ If ADJUST_PARENT_DEPENDS_ON is TRUE, TASK_IS_PARENT_DEPENDS_ON is |
|
|
+ set to the task's parent_depends_on field. If |
|
|
+ ADJUST_PARENT_DEPENDS_ON is FALSE, this field is irrelevant. |
|
|
+ |
|
|
+ Return the new priority_node. */ |
|
|
+ |
|
|
+static inline void |
|
|
+priority_list_insert (enum priority_queue_type type, |
|
|
+ struct priority_list *list, |
|
|
+ struct gomp_task *task, |
|
|
+ int priority, |
|
|
+ enum priority_insert_type pos, |
|
|
+ bool adjust_parent_depends_on, |
|
|
+ bool task_is_parent_depends_on) |
|
|
+{ |
|
|
+ struct priority_node *node = task_to_priority_node (type, task); |
|
|
+ if (list->tasks) |
|
|
+ { |
|
|
+ /* If we are keeping track of higher/lower priority items, |
|
|
+ but this is a lower priority WAITING task |
|
|
+ (parent_depends_on != NULL), put it after all ready to |
|
|
+ run tasks. See the comment in |
|
|
+ priority_queue_upgrade_task for a visual on how tasks |
|
|
+ should be organized. */ |
|
|
+ if (adjust_parent_depends_on |
|
|
+ && pos == PRIORITY_INSERT_BEGIN |
|
|
+ && list->last_parent_depends_on |
|
|
+ && !task_is_parent_depends_on) |
|
|
+ { |
|
|
+ struct priority_node *last_parent_depends_on |
|
|
+ = list->last_parent_depends_on; |
|
|
+ node->next = last_parent_depends_on->next; |
|
|
+ node->prev = last_parent_depends_on; |
|
|
+ } |
|
|
+ /* Otherwise, put it at the top/bottom of the queue. */ |
|
|
+ else |
|
|
+ { |
|
|
+ node->next = list->tasks; |
|
|
+ node->prev = list->tasks->prev; |
|
|
+ if (pos == PRIORITY_INSERT_BEGIN) |
|
|
+ list->tasks = node; |
|
|
+ } |
|
|
+ node->next->prev = node; |
|
|
+ node->prev->next = node; |
|
|
+ } |
|
|
+ else |
|
|
+ { |
|
|
+ node->next = node; |
|
|
+ node->prev = node; |
|
|
+ list->tasks = node; |
|
|
+ } |
|
|
+ if (adjust_parent_depends_on |
|
|
+ && list->last_parent_depends_on == NULL |
|
|
+ && task_is_parent_depends_on) |
|
|
+ list->last_parent_depends_on = node; |
|
|
+} |
|
|
+ |
|
|
+/* Tree version of priority_list_insert. */ |
|
|
+ |
|
|
+static inline void |
|
|
+priority_tree_insert (enum priority_queue_type type, |
|
|
+ struct priority_queue *head, |
|
|
+ struct gomp_task *task, |
|
|
+ int priority, |
|
|
+ enum priority_insert_type pos, |
|
|
+ bool adjust_parent_depends_on, |
|
|
+ bool task_is_parent_depends_on) |
|
|
+{ |
|
|
+ if (__builtin_expect (head->t.root == NULL, 0)) |
|
|
+ { |
|
|
+ /* The first time around, transfer any priority 0 items to the |
|
|
+ tree. */ |
|
|
+ if (head->l.tasks != NULL) |
|
|
+ { |
|
|
+ prio_splay_tree_node k = gomp_malloc (sizeof (*k)); |
|
|
+ k->left = NULL; |
|
|
+ k->right = NULL; |
|
|
+ k->key.l.priority = 0; |
|
|
+ k->key.l.tasks = head->l.tasks; |
|
|
+ k->key.l.last_parent_depends_on = head->l.last_parent_depends_on; |
|
|
+ prio_splay_tree_insert (&head->t, k); |
|
|
+ head->l.tasks = NULL; |
|
|
+ } |
|
|
+ } |
|
|
+ struct priority_list *list |
|
|
+ = priority_queue_lookup_priority (head, priority); |
|
|
+ if (!list) |
|
|
+ { |
|
|
+ prio_splay_tree_node k = gomp_malloc (sizeof (*k)); |
|
|
+ k->left = NULL; |
|
|
+ k->right = NULL; |
|
|
+ k->key.l.priority = priority; |
|
|
+ k->key.l.tasks = NULL; |
|
|
+ k->key.l.last_parent_depends_on = NULL; |
|
|
+ prio_splay_tree_insert (&head->t, k); |
|
|
+ list = &k->key.l; |
|
|
+ } |
|
|
+ priority_list_insert (type, list, task, priority, pos, |
|
|
+ adjust_parent_depends_on, |
|
|
+ task_is_parent_depends_on); |
|
|
+} |
|
|
+ |
|
|
+/* Generic version of priority_*_insert. */ |
|
|
+ |
|
|
+static inline void |
|
|
+priority_queue_insert (enum priority_queue_type type, |
|
|
+ struct priority_queue *head, |
|
|
+ struct gomp_task *task, |
|
|
+ int priority, |
|
|
+ enum priority_insert_type pos, |
|
|
+ bool adjust_parent_depends_on, |
|
|
+ bool task_is_parent_depends_on) |
|
|
+{ |
|
|
+#if _LIBGOMP_CHECKING_ |
|
|
+ if (priority_queue_task_in_queue_p (type, head, task)) |
|
|
+ gomp_fatal ("Attempt to insert existing task %p", task); |
|
|
+#endif |
|
|
+ if (priority_queue_multi_p (head) || __builtin_expect (priority > 0, 0)) |
|
|
+ priority_tree_insert (type, head, task, priority, pos, |
|
|
+ adjust_parent_depends_on, |
|
|
+ task_is_parent_depends_on); |
|
|
+ else |
|
|
+ priority_list_insert (type, &head->l, task, priority, pos, |
|
|
+ adjust_parent_depends_on, |
|
|
+ task_is_parent_depends_on); |
|
|
+} |
|
|
+ |
|
|
+/* If multiple priorities are in play, return the highest priority |
|
|
+ task from within Q1 and Q2, while giving preference to tasks from |
|
|
+ Q1. If the returned task is chosen from Q1, *Q1_CHOSEN_P is set to |
|
|
+ TRUE, otherwise it is set to FALSE. |
|
|
+ |
|
|
+ If multiple priorities are not in play (only 0 priorities are |
|
|
+ available), the next task is chosen exclusively from Q1. |
|
|
+ |
|
|
+ As a special case, Q2 can be NULL, in which case, we just choose |
|
|
+ the highest priority WAITING task in Q1. This is an optimization |
|
|
+ to speed up looking through only one queue. |
|
|
+ |
|
|
+ We assume Q1 has at least one item. */ |
|
|
+ |
|
|
+static inline struct gomp_task * |
|
|
+priority_queue_next_task (enum priority_queue_type t1, |
|
|
+ struct priority_queue *q1, |
|
|
+ enum priority_queue_type t2, |
|
|
+ struct priority_queue *q2, |
|
|
+ bool *q1_chosen_p) |
|
|
+{ |
|
|
+#if _LIBGOMP_CHECKING_ |
|
|
+ if (priority_queue_empty_p (q1, MEMMODEL_RELAXED)) |
|
|
+ gomp_fatal ("priority_queue_next_task: Q1 is empty"); |
|
|
+#endif |
|
|
+ if (priority_queue_multi_p (q1)) |
|
|
+ { |
|
|
+ struct gomp_task *t |
|
|
+ = priority_tree_next_task (t1, q1, t2, q2, q1_chosen_p); |
|
|
+ /* If T is NULL, there are no WAITING tasks in Q1. In which |
|
|
+ case, return any old (non-waiting) task which will cause the |
|
|
+ caller to do the right thing when checking T->KIND == |
|
|
+ GOMP_TASK_WAITING. */ |
|
|
+ if (!t) |
|
|
+ { |
|
|
+#if _LIBGOMP_CHECKING_ |
|
|
+ if (*q1_chosen_p == false) |
|
|
+ gomp_fatal ("priority_queue_next_task inconsistency"); |
|
|
+#endif |
|
|
+ return priority_node_to_task (t1, q1->t.root->key.l.tasks); |
|
|
+ } |
|
|
+ return t; |
|
|
+ } |
|
|
+ else |
|
|
+ { |
|
|
+ *q1_chosen_p = true; |
|
|
+ return priority_node_to_task (t1, q1->l.tasks); |
|
|
+ } |
|
|
+} |
|
|
+ |
|
|
+/* Remove NODE from LIST. |
|
|
+ |
|
|
+ If we are removing the one and only item in the list, and MODEL is |
|
|
+ MEMMODEL_RELEASE, use an atomic release to clear the list. |
|
|
+ |
|
|
+ If the list becomes empty after the remove, return TRUE. */ |
|
|
+ |
|
|
+static inline bool |
|
|
+priority_list_remove (struct priority_list *list, |
|
|
+ struct priority_node *node, |
|
|
+ enum memmodel model) |
|
|
+{ |
|
|
+ bool empty = false; |
|
|
+ node->prev->next = node->next; |
|
|
+ node->next->prev = node->prev; |
|
|
+ if (list->tasks == node) |
|
|
+ { |
|
|
+ if (node->next != node) |
|
|
+ list->tasks = node->next; |
|
|
+ else |
|
|
+ { |
|
|
+ /* We access task->children in GOMP_taskwait outside of |
|
|
+ the task lock mutex region, so need a release barrier |
|
|
+ here to ensure memory written by child_task->fn above |
|
|
+ is flushed before the NULL is written. */ |
|
|
+ if (model == MEMMODEL_RELEASE) |
|
|
+ __atomic_store_n (&list->tasks, NULL, MEMMODEL_RELEASE); |
|
|
+ else |
|
|
+ list->tasks = NULL; |
|
|
+ empty = true; |
|
|
+ goto remove_out; |
|
|
+ } |
|
|
+ } |
|
|
+remove_out: |
|
|
+#if _LIBGOMP_CHECKING_ |
|
|
+ memset (node, 0xaf, sizeof (*node)); |
|
|
+#endif |
|
|
+ return empty; |
|
|
+} |
|
|
+ |
|
|
+/* This is the generic version of priority_list_remove. |
|
|
+ |
|
|
+ Remove NODE from priority queue HEAD. HEAD contains tasks of type TYPE. |
|
|
+ |
|
|
+ If we are removing the one and only item in the priority queue and |
|
|
+ MODEL is MEMMODEL_RELEASE, use an atomic release to clear the queue. |
|
|
+ |
|
|
+ If the queue becomes empty after the remove, return TRUE. */ |
|
|
+ |
|
|
+static inline bool |
|
|
+priority_queue_remove (enum priority_queue_type type, |
|
|
+ struct priority_queue *head, |
|
|
+ struct gomp_task *task, |
|
|
+ enum memmodel model) |
|
|
+{ |
|
|
+#if _LIBGOMP_CHECKING_ |
|
|
+ if (!priority_queue_task_in_queue_p (type, head, task)) |
|
|
+ gomp_fatal ("Attempt to remove missing task %p", task); |
|
|
+#endif |
|
|
+ if (priority_queue_multi_p (head)) |
|
|
+ { |
|
|
+ priority_tree_remove (type, head, task_to_priority_node (type, task)); |
|
|
+ if (head->t.root == NULL) |
|
|
+ { |
|
|
+ if (model == MEMMODEL_RELEASE) |
|
|
+ /* Errr, we store NULL twice, the alternative would be to |
|
|
+ use an atomic release directly in the splay tree |
|
|
+ routines. Worth it? */ |
|
|
+ __atomic_store_n (&head->t.root, NULL, MEMMODEL_RELEASE); |
|
|
+ return true; |
|
|
+ } |
|
|
+ return false; |
|
|
+ } |
|
|
+ else |
|
|
+ return priority_list_remove (&head->l, |
|
|
+ task_to_priority_node (type, task), model); |
|
|
+} |
|
|
+ |
|
|
+#endif /* _PRIORITY_QUEUE_H_ */ |
|
|
--- libgomp/priority_queue.c.jj 2016-07-13 16:57:04.435535360 +0200 |
|
|
+++ libgomp/priority_queue.c 2016-07-13 16:57:04.435535360 +0200 |
|
|
@@ -0,0 +1,300 @@ |
|
|
+/* Copyright (C) 2015-2016 Free Software Foundation, Inc. |
|
|
+ Contributed by Aldy Hernandez <aldyh@redhat.com>. |
|
|
+ |
|
|
+ This file is part of the GNU Offloading and Multi Processing Library |
|
|
+ (libgomp). |
|
|
+ |
|
|
+ Libgomp is free software; you can redistribute it and/or modify it |
|
|
+ under the terms of the GNU General Public License as published by |
|
|
+ the Free Software Foundation; either version 3, or (at your option) |
|
|
+ any later version. |
|
|
+ |
|
|
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
|
|
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
|
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
|
+ more details. |
|
|
+ |
|
|
+ Under Section 7 of GPL version 3, you are granted additional |
|
|
+ permissions described in the GCC Runtime Library Exception, version |
|
|
+ 3.1, as published by the Free Software Foundation. |
|
|
+ |
|
|
+ You should have received a copy of the GNU General Public License and |
|
|
+ a copy of the GCC Runtime Library Exception along with this program; |
|
|
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
|
|
+ <http://www.gnu.org/licenses/>. */ |
|
|
+ |
|
|
+/* Priority queue implementation of GOMP tasks. */ |
|
|
+ |
|
|
+#include "libgomp.h" |
|
|
+ |
|
|
+#if _LIBGOMP_CHECKING_ |
|
|
+#include <stdio.h> |
|
|
+ |
|
|
+/* Sanity check to verify whether a TASK is in LIST. Return TRUE if |
|
|
+ found, FALSE otherwise. |
|
|
+ |
|
|
+ TYPE is the type of priority queue this task resides in. */ |
|
|
+ |
|
|
+static inline bool |
|
|
+priority_queue_task_in_list_p (enum priority_queue_type type, |
|
|
+ struct priority_list *list, |
|
|
+ struct gomp_task *task) |
|
|
+{ |
|
|
+ struct priority_node *p = list->tasks; |
|
|
+ do |
|
|
+ { |
|
|
+ if (priority_node_to_task (type, p) == task) |
|
|
+ return true; |
|
|
+ p = p->next; |
|
|
+ } |
|
|
+ while (p != list->tasks); |
|
|
+ return false; |
|
|
+} |
|
|
+ |
|
|
+/* Tree version of priority_queue_task_in_list_p. */ |
|
|
+ |
|
|
+static inline bool |
|
|
+priority_queue_task_in_tree_p (enum priority_queue_type type, |
|
|
+ struct priority_queue *head, |
|
|
+ struct gomp_task *task) |
|
|
+{ |
|
|
+ struct priority_list *list |
|
|
+ = priority_queue_lookup_priority (head, task->priority); |
|
|
+ if (!list) |
|
|
+ return false; |
|
|
+ return priority_queue_task_in_list_p (type, list, task); |
|
|
+} |
|
|
+ |
|
|
+/* Generic version of priority_queue_task_in_list_p that works for |
|
|
+ trees or lists. */ |
|
|
+ |
|
|
+bool |
|
|
+priority_queue_task_in_queue_p (enum priority_queue_type type, |
|
|
+ struct priority_queue *head, |
|
|
+ struct gomp_task *task) |
|
|
+{ |
|
|
+ if (priority_queue_empty_p (head, MEMMODEL_RELAXED)) |
|
|
+ return false; |
|
|
+ if (priority_queue_multi_p (head)) |
|
|
+ return priority_queue_task_in_tree_p (type, head, task); |
|
|
+ else |
|
|
+ return priority_queue_task_in_list_p (type, &head->l, task); |
|
|
+} |
|
|
+ |
|
|
+/* Sanity check LIST to make sure the tasks therein are in the right |
|
|
+ order. LIST is a priority list of type TYPE. |
|
|
+ |
|
|
+ The expected order is that GOMP_TASK_WAITING tasks come before |
|
|
+ GOMP_TASK_TIED/GOMP_TASK_ASYNC_RUNNING ones. |
|
|
+ |
|
|
+ If CHECK_DEPS is TRUE, we also check that parent_depends_on WAITING |
|
|
+ tasks come before !parent_depends_on WAITING tasks. This is only |
|
|
+ applicable to the children queue, and the caller is expected to |
|
|
+ ensure that we are verifying the children queue. */ |
|
|
+ |
|
|
+static void |
|
|
+priority_list_verify (enum priority_queue_type type, |
|
|
+ struct priority_list *list, bool check_deps) |
|
|
+{ |
|
|
+ bool seen_tied = false; |
|
|
+ bool seen_plain_waiting = false; |
|
|
+ struct priority_node *p = list->tasks; |
|
|
+ while (1) |
|
|
+ { |
|
|
+ struct gomp_task *t = priority_node_to_task (type, p); |
|
|
+ if (seen_tied && t->kind == GOMP_TASK_WAITING) |
|
|
+ gomp_fatal ("priority_queue_verify: WAITING task after TIED"); |
|
|
+ if (t->kind >= GOMP_TASK_TIED) |
|
|
+ seen_tied = true; |
|
|
+ else if (check_deps && t->kind == GOMP_TASK_WAITING) |
|
|
+ { |
|
|
+ if (t->parent_depends_on) |
|
|
+ { |
|
|
+ if (seen_plain_waiting) |
|
|
+ gomp_fatal ("priority_queue_verify: " |
|
|
+ "parent_depends_on after !parent_depends_on"); |
|
|
+ } |
|
|
+ else |
|
|
+ seen_plain_waiting = true; |
|
|
+ } |
|
|
+ p = p->next; |
|
|
+ if (p == list->tasks) |
|
|
+ break; |
|
|
+ } |
|
|
+} |
|
|
+ |
|
|
+/* Callback type for priority_tree_verify_callback. */ |
|
|
+struct cbtype |
|
|
+{ |
|
|
+ enum priority_queue_type type; |
|
|
+ bool check_deps; |
|
|
+}; |
|
|
+ |
|
|
+/* Verify every task in NODE. |
|
|
+ |
|
|
+ Callback for splay_tree_foreach. */ |
|
|
+ |
|
|
+static void |
|
|
+priority_tree_verify_callback (prio_splay_tree_key key, void *data) |
|
|
+{ |
|
|
+ struct cbtype *cb = (struct cbtype *) data; |
|
|
+ priority_list_verify (cb->type, &key->l, cb->check_deps); |
|
|
+} |
|
|
+ |
|
|
+/* Generic version of priority_list_verify. |
|
|
+ |
|
|
+ Sanity check HEAD to make sure the tasks therein are in the right |
|
|
+ order. The priority_queue holds tasks of type TYPE. |
|
|
+ |
|
|
+ If CHECK_DEPS is TRUE, we also check that parent_depends_on WAITING |
|
|
+ tasks come before !parent_depends_on WAITING tasks. This is only |
|
|
+ applicable to the children queue, and the caller is expected to |
|
|
+ ensure that we are verifying the children queue. */ |
|
|
+ |
|
|
+void |
|
|
+priority_queue_verify (enum priority_queue_type type, |
|
|
+ struct priority_queue *head, bool check_deps) |
|
|
+{ |
|
|
+ if (priority_queue_empty_p (head, MEMMODEL_RELAXED)) |
|
|
+ return; |
|
|
+ if (priority_queue_multi_p (head)) |
|
|
+ { |
|
|
+ struct cbtype cb = { type, check_deps }; |
|
|
+ prio_splay_tree_foreach (&head->t, |
|
|
+ priority_tree_verify_callback, &cb); |
|
|
+ } |
|
|
+ else |
|
|
+ priority_list_verify (type, &head->l, check_deps); |
|
|
+} |
|
|
+#endif /* _LIBGOMP_CHECKING_ */ |
|
|
+ |
|
|
+/* Remove NODE from priority queue HEAD, wherever it may be inside the |
|
|
+ tree. HEAD contains tasks of type TYPE. */ |
|
|
+ |
|
|
+void |
|
|
+priority_tree_remove (enum priority_queue_type type, |
|
|
+ struct priority_queue *head, |
|
|
+ struct priority_node *node) |
|
|
+{ |
|
|
+ /* ?? The only reason this function is not inlined is because we |
|
|
+ need to find the priority within gomp_task (which has not been |
|
|
+ completely defined in the header file). If the lack of inlining |
|
|
+ is a concern, we could pass the priority number as a |
|
|
+ parameter, or we could move this to libgomp.h. */ |
|
|
+ int priority = priority_node_to_task (type, node)->priority; |
|
|
+ |
|
|
+ /* ?? We could avoid this lookup by keeping a pointer to the key in |
|
|
+ the priority_node. */ |
|
|
+ struct priority_list *list |
|
|
+ = priority_queue_lookup_priority (head, priority); |
|
|
+#if _LIBGOMP_CHECKING_ |
|
|
+ if (!list) |
|
|
+ gomp_fatal ("Unable to find priority %d", priority); |
|
|
+#endif |
|
|
+ /* If NODE was the last in its priority, clean up the priority. */ |
|
|
+ if (priority_list_remove (list, node, MEMMODEL_RELAXED)) |
|
|
+ { |
|
|
+ prio_splay_tree_remove (&head->t, (prio_splay_tree_key) list); |
|
|
+ list->tasks = NULL; |
|
|
+#if _LIBGOMP_CHECKING_ |
|
|
+ memset (list, 0xaf, sizeof (*list)); |
|
|
+#endif |
|
|
+ free (list); |
|
|
+ } |
|
|
+} |
|
|
+ |
|
|
+/* Return the highest priority WAITING task in a splay tree NODE. If |
|
|
+ there are no WAITING tasks available, return NULL. |
|
|
+ |
|
|
+ NODE is a priority list containing tasks of type TYPE. |
|
|
+ |
|
|
+ The right most node in a tree contains the highest priority. |
|
|
+ Recurse down to find such a node. If the task at that max node is |
|
|
+ not WAITING, bubble back up and look at the remaining tasks |
|
|
+ in-order. */ |
|
|
+ |
|
|
+static struct gomp_task * |
|
|
+priority_tree_next_task_1 (enum priority_queue_type type, |
|
|
+ prio_splay_tree_node node) |
|
|
+{ |
|
|
+ again: |
|
|
+ if (!node) |
|
|
+ return NULL; |
|
|
+ struct gomp_task *ret = priority_tree_next_task_1 (type, node->right); |
|
|
+ if (ret) |
|
|
+ return ret; |
|
|
+ ret = priority_node_to_task (type, node->key.l.tasks); |
|
|
+ if (ret->kind == GOMP_TASK_WAITING) |
|
|
+ return ret; |
|
|
+ node = node->left; |
|
|
+ goto again; |
|
|
+} |
|
|
+ |
|
|
+/* Return the highest priority WAITING task from within Q1 and Q2, |
|
|
+ while giving preference to tasks from Q1. Q1 is a queue containing |
|
|
+ items of type TYPE1. Q2 is a queue containing items of type TYPE2. |
|
|
+ |
|
|
+ Since we are mostly interested in Q1, if there are no WAITING tasks |
|
|
+ in Q1, we don't bother checking Q2, and just return NULL. |
|
|
+ |
|
|
+ As a special case, Q2 can be NULL, in which case, we just choose |
|
|
+ the highest priority WAITING task in Q1. This is an optimization |
|
|
+ to speed up looking through only one queue. |
|
|
+ |
|
|
+ If the returned task is chosen from Q1, *Q1_CHOSEN_P is set to |
|
|
+ TRUE, otherwise it is set to FALSE. */ |
|
|
+ |
|
|
+struct gomp_task * |
|
|
+priority_tree_next_task (enum priority_queue_type type1, |
|
|
+ struct priority_queue *q1, |
|
|
+ enum priority_queue_type type2, |
|
|
+ struct priority_queue *q2, |
|
|
+ bool *q1_chosen_p) |
|
|
+{ |
|
|
+ struct gomp_task *t1 = priority_tree_next_task_1 (type1, q1->t.root); |
|
|
+ if (!t1 |
|
|
+ /* Special optimization when only searching through one queue. */ |
|
|
+ || !q2) |
|
|
+ { |
|
|
+ *q1_chosen_p = true; |
|
|
+ return t1; |
|
|
+ } |
|
|
+ struct gomp_task *t2 = priority_tree_next_task_1 (type2, q2->t.root); |
|
|
+ if (!t2 || t1->priority > t2->priority) |
|
|
+ { |
|
|
+ *q1_chosen_p = true; |
|
|
+ return t1; |
|
|
+ } |
|
|
+ if (t2->priority > t1->priority) |
|
|
+ { |
|
|
+ *q1_chosen_p = false; |
|
|
+ return t2; |
|
|
+ } |
|
|
+ /* If we get here, the priorities are the same, so we must look at |
|
|
+ parent_depends_on to make our decision. */ |
|
|
+#if _LIBGOMP_CHECKING_ |
|
|
+ if (t1 != t2) |
|
|
+ gomp_fatal ("priority_tree_next_task: t1 != t2"); |
|
|
+#endif |
|
|
+ if (t2->parent_depends_on && !t1->parent_depends_on) |
|
|
+ { |
|
|
+ *q1_chosen_p = false; |
|
|
+ return t2; |
|
|
+ } |
|
|
+ *q1_chosen_p = true; |
|
|
+ return t1; |
|
|
+} |
|
|
+ |
|
|
+/* Priority splay trees comparison function. */ |
|
|
+static inline int |
|
|
+prio_splay_compare (prio_splay_tree_key x, prio_splay_tree_key y) |
|
|
+{ |
|
|
+ if (x->l.priority == y->l.priority) |
|
|
+ return 0; |
|
|
+ return x->l.priority < y->l.priority ? -1 : 1; |
|
|
+} |
|
|
+ |
|
|
+/* Define another splay tree instantiation, for priority_list's. */ |
|
|
+#define splay_tree_prefix prio |
|
|
+#define splay_tree_c |
|
|
+#include "splay-tree.h" |
|
|
--- libgomp/openacc.f90.jj 2016-07-13 16:57:04.434535373 +0200 |
|
|
+++ libgomp/openacc.f90 2016-07-14 19:01:54.901230875 +0200 |
|
|
@@ -0,0 +1,911 @@ |
|
|
+! OpenACC Runtime Library Definitions. |
|
|
+ |
|
|
+! Copyright (C) 2014-2016 Free Software Foundation, Inc. |
|
|
+ |
|
|
+! Contributed by Tobias Burnus <burnus@net-b.de> |
|
|
+! and Mentor Embedded. |
|
|
+ |
|
|
+! This file is part of the GNU Offloading and Multi Processing Library |
|
|
+! (libgomp). |
|
|
+ |
|
|
+! Libgomp is free software; you can redistribute it and/or modify it |
|
|
+! under the terms of the GNU General Public License as published by |
|
|
+! the Free Software Foundation; either version 3, or (at your option) |
|
|
+! any later version. |
|
|
+ |
|
|
+! Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
|
|
+! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
|
+! FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
|
+! more details. |
|
|
+ |
|
|
+! Under Section 7 of GPL version 3, you are granted additional |
|
|
+! permissions described in the GCC Runtime Library Exception, version |
|
|
+! 3.1, as published by the Free Software Foundation. |
|
|
+ |
|
|
+! You should have received a copy of the GNU General Public License and |
|
|
+! a copy of the GCC Runtime Library Exception along with this program; |
|
|
+! see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
|
|
+! <http://www.gnu.org/licenses/>. |
|
|
+ |
|
|
+module openacc_kinds |
|
|
+ use iso_fortran_env, only: int32 |
|
|
+ implicit none |
|
|
+ |
|
|
+ private :: int32 |
|
|
+ public :: acc_device_kind |
|
|
+ |
|
|
+ integer, parameter :: acc_device_kind = int32 |
|
|
+ |
|
|
+ public :: acc_device_none, acc_device_default, acc_device_host |
|
|
+ public :: acc_device_not_host, acc_device_nvidia |
|
|
+ |
|
|
+ ! Keep in sync with include/gomp-constants.h. |
|
|
+ integer (acc_device_kind), parameter :: acc_device_none = 0 |
|
|
+ integer (acc_device_kind), parameter :: acc_device_default = 1 |
|
|
+ integer (acc_device_kind), parameter :: acc_device_host = 2 |
|
|
+ ! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed. |
|
|
+ integer (acc_device_kind), parameter :: acc_device_not_host = 4 |
|
|
+ integer (acc_device_kind), parameter :: acc_device_nvidia = 5 |
|
|
+ |
|
|
+ public :: acc_handle_kind |
|
|
+ |
|
|
+ integer, parameter :: acc_handle_kind = int32 |
|
|
+ |
|
|
+ public :: acc_async_noval, acc_async_sync |
|
|
+ |
|
|
+ ! Keep in sync with include/gomp-constants.h. |
|
|
+ integer (acc_handle_kind), parameter :: acc_async_noval = -1 |
|
|
+ integer (acc_handle_kind), parameter :: acc_async_sync = -2 |
|
|
+ |
|
|
+end module |
|
|
+ |
|
|
+module openacc_internal |
|
|
+ use openacc_kinds |
|
|
+ implicit none |
|
|
+ |
|
|
+ interface |
|
|
+ function acc_get_num_devices_h (d) |
|
|
+ import |
|
|
+ integer acc_get_num_devices_h |
|
|
+ integer (acc_device_kind) d |
|
|
+ end function |
|
|
+ |
|
|
+ subroutine acc_set_device_type_h (d) |
|
|
+ import |
|
|
+ integer (acc_device_kind) d |
|
|
+ end subroutine |
|
|
+ |
|
|
+ function acc_get_device_type_h () |
|
|
+ import |
|
|
+ integer (acc_device_kind) acc_get_device_type_h |
|
|
+ end function |
|
|
+ |
|
|
+ subroutine acc_set_device_num_h (n, d) |
|
|
+ import |
|
|
+ integer n |
|
|
+ integer (acc_device_kind) d |
|
|
+ end subroutine |
|
|
+ |
|
|
+ function acc_get_device_num_h (d) |
|
|
+ import |
|
|
+ integer acc_get_device_num_h |
|
|
+ integer (acc_device_kind) d |
|
|
+ end function |
|
|
+ |
|
|
+ function acc_async_test_h (a) |
|
|
+ logical acc_async_test_h |
|
|
+ integer a |
|
|
+ end function |
|
|
+ |
|
|
+ function acc_async_test_all_h () |
|
|
+ logical acc_async_test_all_h |
|
|
+ end function |
|
|
+ |
|
|
+ subroutine acc_wait_h (a) |
|
|
+ integer a |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_wait_async_h (a1, a2) |
|
|
+ integer a1, a2 |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_wait_all_h () |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_wait_all_async_h (a) |
|
|
+ integer a |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_init_h (d) |
|
|
+ import |
|
|
+ integer (acc_device_kind) d |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_shutdown_h (d) |
|
|
+ import |
|
|
+ integer (acc_device_kind) d |
|
|
+ end subroutine |
|
|
+ |
|
|
+ function acc_on_device_h (d) |
|
|
+ import |
|
|
+ integer (acc_device_kind) d |
|
|
+ logical acc_on_device_h |
|
|
+ end function |
|
|
+ |
|
|
+ subroutine acc_copyin_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_copyin_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_copyin_array_h (a) |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_present_or_copyin_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_present_or_copyin_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_present_or_copyin_array_h (a) |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_create_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_create_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_create_array_h (a) |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_present_or_create_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_present_or_create_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_present_or_create_array_h (a) |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_copyout_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_copyout_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_copyout_array_h (a) |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_delete_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_delete_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_delete_array_h (a) |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_update_device_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_update_device_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_update_device_array_h (a) |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_update_self_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_update_self_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_update_self_array_h (a) |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ end subroutine |
|
|
+ |
|
|
+ function acc_is_present_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t |
|
|
+ logical acc_is_present_32_h |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ end function |
|
|
+ |
|
|
+ function acc_is_present_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t |
|
|
+ logical acc_is_present_64_h |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ end function |
|
|
+ |
|
|
+ function acc_is_present_array_h (a) |
|
|
+ logical acc_is_present_array_h |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ end function |
|
|
+ end interface |
|
|
+ |
|
|
+ interface |
|
|
+ function acc_get_num_devices_l (d) & |
|
|
+ bind (C, name = "acc_get_num_devices") |
|
|
+ use iso_c_binding, only: c_int |
|
|
+ integer (c_int) :: acc_get_num_devices_l |
|
|
+ integer (c_int), value :: d |
|
|
+ end function |
|
|
+ |
|
|
+ subroutine acc_set_device_type_l (d) & |
|
|
+ bind (C, name = "acc_set_device_type") |
|
|
+ use iso_c_binding, only: c_int |
|
|
+ integer (c_int), value :: d |
|
|
+ end subroutine |
|
|
+ |
|
|
+ function acc_get_device_type_l () & |
|
|
+ bind (C, name = "acc_get_device_type") |
|
|
+ use iso_c_binding, only: c_int |
|
|
+ integer (c_int) :: acc_get_device_type_l |
|
|
+ end function |
|
|
+ |
|
|
+ subroutine acc_set_device_num_l (n, d) & |
|
|
+ bind (C, name = "acc_set_device_num") |
|
|
+ use iso_c_binding, only: c_int |
|
|
+ integer (c_int), value :: n, d |
|
|
+ end subroutine |
|
|
+ |
|
|
+ function acc_get_device_num_l (d) & |
|
|
+ bind (C, name = "acc_get_device_num") |
|
|
+ use iso_c_binding, only: c_int |
|
|
+ integer (c_int) :: acc_get_device_num_l |
|
|
+ integer (c_int), value :: d |
|
|
+ end function |
|
|
+ |
|
|
+ function acc_async_test_l (a) & |
|
|
+ bind (C, name = "acc_async_test") |
|
|
+ use iso_c_binding, only: c_int |
|
|
+ integer (c_int) :: acc_async_test_l |
|
|
+ integer (c_int), value :: a |
|
|
+ end function |
|
|
+ |
|
|
+ function acc_async_test_all_l () & |
|
|
+ bind (C, name = "acc_async_test_all") |
|
|
+ use iso_c_binding, only: c_int |
|
|
+ integer (c_int) :: acc_async_test_all_l |
|
|
+ end function |
|
|
+ |
|
|
+ subroutine acc_wait_l (a) & |
|
|
+ bind (C, name = "acc_wait") |
|
|
+ use iso_c_binding, only: c_int |
|
|
+ integer (c_int), value :: a |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_wait_async_l (a1, a2) & |
|
|
+ bind (C, name = "acc_wait_async") |
|
|
+ use iso_c_binding, only: c_int |
|
|
+ integer (c_int), value :: a1, a2 |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_wait_all_l () & |
|
|
+ bind (C, name = "acc_wait_all") |
|
|
+ use iso_c_binding, only: c_int |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_wait_all_async_l (a) & |
|
|
+ bind (C, name = "acc_wait_all_async") |
|
|
+ use iso_c_binding, only: c_int |
|
|
+ integer (c_int), value :: a |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_init_l (d) & |
|
|
+ bind (C, name = "acc_init") |
|
|
+ use iso_c_binding, only: c_int |
|
|
+ integer (c_int), value :: d |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_shutdown_l (d) & |
|
|
+ bind (C, name = "acc_shutdown") |
|
|
+ use iso_c_binding, only: c_int |
|
|
+ integer (c_int), value :: d |
|
|
+ end subroutine |
|
|
+ |
|
|
+ function acc_on_device_l (d) & |
|
|
+ bind (C, name = "acc_on_device") |
|
|
+ use iso_c_binding, only: c_int |
|
|
+ integer (c_int) :: acc_on_device_l |
|
|
+ integer (c_int), value :: d |
|
|
+ end function |
|
|
+ |
|
|
+ subroutine acc_copyin_l (a, len) & |
|
|
+ bind (C, name = "acc_copyin") |
|
|
+ use iso_c_binding, only: c_size_t |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_size_t), value :: len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_present_or_copyin_l (a, len) & |
|
|
+ bind (C, name = "acc_present_or_copyin") |
|
|
+ use iso_c_binding, only: c_size_t |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_size_t), value :: len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_create_l (a, len) & |
|
|
+ bind (C, name = "acc_create") |
|
|
+ use iso_c_binding, only: c_size_t |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_size_t), value :: len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_present_or_create_l (a, len) & |
|
|
+ bind (C, name = "acc_present_or_create") |
|
|
+ use iso_c_binding, only: c_size_t |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_size_t), value :: len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_copyout_l (a, len) & |
|
|
+ bind (C, name = "acc_copyout") |
|
|
+ use iso_c_binding, only: c_size_t |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_size_t), value :: len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_delete_l (a, len) & |
|
|
+ bind (C, name = "acc_delete") |
|
|
+ use iso_c_binding, only: c_size_t |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_size_t), value :: len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_update_device_l (a, len) & |
|
|
+ bind (C, name = "acc_update_device") |
|
|
+ use iso_c_binding, only: c_size_t |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_size_t), value :: len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ subroutine acc_update_self_l (a, len) & |
|
|
+ bind (C, name = "acc_update_self") |
|
|
+ use iso_c_binding, only: c_size_t |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_size_t), value :: len |
|
|
+ end subroutine |
|
|
+ |
|
|
+ function acc_is_present_l (a, len) & |
|
|
+ bind (C, name = "acc_is_present") |
|
|
+ use iso_c_binding, only: c_int32_t, c_size_t |
|
|
+ integer (c_int32_t) :: acc_is_present_l |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_size_t), value :: len |
|
|
+ end function |
|
|
+ end interface |
|
|
+end module |
|
|
+ |
|
|
+module openacc |
|
|
+ use openacc_kinds |
|
|
+ use openacc_internal |
|
|
+ implicit none |
|
|
+ |
|
|
+ public :: openacc_version |
|
|
+ |
|
|
+ public :: acc_get_num_devices, acc_set_device_type, acc_get_device_type |
|
|
+ public :: acc_set_device_num, acc_get_device_num, acc_async_test |
|
|
+ public :: acc_async_test_all, acc_wait, acc_wait_async, acc_wait_all |
|
|
+ public :: acc_wait_all_async, acc_init, acc_shutdown, acc_on_device |
|
|
+ public :: acc_copyin, acc_present_or_copyin, acc_pcopyin, acc_create |
|
|
+ public :: acc_present_or_create, acc_pcreate, acc_copyout, acc_delete |
|
|
+ public :: acc_update_device, acc_update_self, acc_is_present |
|
|
+ |
|
|
+ integer, parameter :: openacc_version = 201306 |
|
|
+ |
|
|
+ interface acc_get_num_devices |
|
|
+ procedure :: acc_get_num_devices_h |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_set_device_type |
|
|
+ procedure :: acc_set_device_type_h |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_get_device_type |
|
|
+ procedure :: acc_get_device_type_h |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_set_device_num |
|
|
+ procedure :: acc_set_device_num_h |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_get_device_num |
|
|
+ procedure :: acc_get_device_num_h |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_async_test |
|
|
+ procedure :: acc_async_test_h |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_async_test_all |
|
|
+ procedure :: acc_async_test_all_h |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_wait |
|
|
+ procedure :: acc_wait_h |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_wait_async |
|
|
+ procedure :: acc_wait_async_h |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_wait_all |
|
|
+ procedure :: acc_wait_all_h |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_wait_all_async |
|
|
+ procedure :: acc_wait_all_async_h |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_init |
|
|
+ procedure :: acc_init_h |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_shutdown |
|
|
+ procedure :: acc_shutdown_h |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_on_device |
|
|
+ procedure :: acc_on_device_h |
|
|
+ end interface |
|
|
+ |
|
|
+ ! acc_malloc: Only available in C/C++ |
|
|
+ ! acc_free: Only available in C/C++ |
|
|
+ |
|
|
+ ! As vendor extension, the following code supports both 32bit and 64bit |
|
|
+ ! arguments for "size"; the OpenACC standard only permits default-kind |
|
|
+ ! integers, which are of kind 4 (i.e. 32 bits). |
|
|
+ ! Additionally, the two-argument version also takes arrays as argument. |
|
|
+ ! and the one argument version also scalars. Note that the code assumes |
|
|
+ ! that the arrays are contiguous. |
|
|
+ |
|
|
+ interface acc_copyin |
|
|
+ procedure :: acc_copyin_32_h |
|
|
+ procedure :: acc_copyin_64_h |
|
|
+ procedure :: acc_copyin_array_h |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_present_or_copyin |
|
|
+ procedure :: acc_present_or_copyin_32_h |
|
|
+ procedure :: acc_present_or_copyin_64_h |
|
|
+ procedure :: acc_present_or_copyin_array_h |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_pcopyin |
|
|
+ procedure :: acc_present_or_copyin_32_h |
|
|
+ procedure :: acc_present_or_copyin_64_h |
|
|
+ procedure :: acc_present_or_copyin_array_h |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_create |
|
|
+ procedure :: acc_create_32_h |
|
|
+ procedure :: acc_create_64_h |
|
|
+ procedure :: acc_create_array_h |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_present_or_create |
|
|
+ procedure :: acc_present_or_create_32_h |
|
|
+ procedure :: acc_present_or_create_64_h |
|
|
+ procedure :: acc_present_or_create_array_h |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_pcreate |
|
|
+ procedure :: acc_present_or_create_32_h |
|
|
+ procedure :: acc_present_or_create_64_h |
|
|
+ procedure :: acc_present_or_create_array_h |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_copyout |
|
|
+ procedure :: acc_copyout_32_h |
|
|
+ procedure :: acc_copyout_64_h |
|
|
+ procedure :: acc_copyout_array_h |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_delete |
|
|
+ procedure :: acc_delete_32_h |
|
|
+ procedure :: acc_delete_64_h |
|
|
+ procedure :: acc_delete_array_h |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_update_device |
|
|
+ procedure :: acc_update_device_32_h |
|
|
+ procedure :: acc_update_device_64_h |
|
|
+ procedure :: acc_update_device_array_h |
|
|
+ end interface |
|
|
+ |
|
|
+ interface acc_update_self |
|
|
+ procedure :: acc_update_self_32_h |
|
|
+ procedure :: acc_update_self_64_h |
|
|
+ procedure :: acc_update_self_array_h |
|
|
+ end interface |
|
|
+ |
|
|
+ ! acc_map_data: Only available in C/C++ |
|
|
+ ! acc_unmap_data: Only available in C/C++ |
|
|
+ ! acc_deviceptr: Only available in C/C++ |
|
|
+ ! acc_hostptr: Only available in C/C++ |
|
|
+ |
|
|
+ interface acc_is_present |
|
|
+ procedure :: acc_is_present_32_h |
|
|
+ procedure :: acc_is_present_64_h |
|
|
+ procedure :: acc_is_present_array_h |
|
|
+ end interface |
|
|
+ |
|
|
+ ! acc_memcpy_to_device: Only available in C/C++ |
|
|
+ ! acc_memcpy_from_device: Only available in C/C++ |
|
|
+ |
|
|
+end module |
|
|
+ |
|
|
+function acc_get_num_devices_h (d) |
|
|
+ use openacc_internal, only: acc_get_num_devices_l |
|
|
+ use openacc_kinds |
|
|
+ integer acc_get_num_devices_h |
|
|
+ integer (acc_device_kind) d |
|
|
+ acc_get_num_devices_h = acc_get_num_devices_l (d) |
|
|
+end function |
|
|
+ |
|
|
+subroutine acc_set_device_type_h (d) |
|
|
+ use openacc_internal, only: acc_set_device_type_l |
|
|
+ use openacc_kinds |
|
|
+ integer (acc_device_kind) d |
|
|
+ call acc_set_device_type_l (d) |
|
|
+end subroutine |
|
|
+ |
|
|
+function acc_get_device_type_h () |
|
|
+ use openacc_internal, only: acc_get_device_type_l |
|
|
+ use openacc_kinds |
|
|
+ integer (acc_device_kind) acc_get_device_type_h |
|
|
+ acc_get_device_type_h = acc_get_device_type_l () |
|
|
+end function |
|
|
+ |
|
|
+subroutine acc_set_device_num_h (n, d) |
|
|
+ use openacc_internal, only: acc_set_device_num_l |
|
|
+ use openacc_kinds |
|
|
+ integer n |
|
|
+ integer (acc_device_kind) d |
|
|
+ call acc_set_device_num_l (n, d) |
|
|
+end subroutine |
|
|
+ |
|
|
+function acc_get_device_num_h (d) |
|
|
+ use openacc_internal, only: acc_get_device_num_l |
|
|
+ use openacc_kinds |
|
|
+ integer acc_get_device_num_h |
|
|
+ integer (acc_device_kind) d |
|
|
+ acc_get_device_num_h = acc_get_device_num_l (d) |
|
|
+end function |
|
|
+ |
|
|
+function acc_async_test_h (a) |
|
|
+ use openacc_internal, only: acc_async_test_l |
|
|
+ logical acc_async_test_h |
|
|
+ integer a |
|
|
+ if (acc_async_test_l (a) .eq. 1) then |
|
|
+ acc_async_test_h = .TRUE. |
|
|
+ else |
|
|
+ acc_async_test_h = .FALSE. |
|
|
+ end if |
|
|
+end function |
|
|
+ |
|
|
+function acc_async_test_all_h () |
|
|
+ use openacc_internal, only: acc_async_test_all_l |
|
|
+ logical acc_async_test_all_h |
|
|
+ if (acc_async_test_all_l () .eq. 1) then |
|
|
+ acc_async_test_all_h = .TRUE. |
|
|
+ else |
|
|
+ acc_async_test_all_h = .FALSE. |
|
|
+ end if |
|
|
+end function |
|
|
+ |
|
|
+subroutine acc_wait_h (a) |
|
|
+ use openacc_internal, only: acc_wait_l |
|
|
+ integer a |
|
|
+ call acc_wait_l (a) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_wait_async_h (a1, a2) |
|
|
+ use openacc_internal, only: acc_wait_async_l |
|
|
+ integer a1, a2 |
|
|
+ call acc_wait_async_l (a1, a2) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_wait_all_h () |
|
|
+ use openacc_internal, only: acc_wait_all_l |
|
|
+ call acc_wait_all_l () |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_wait_all_async_h (a) |
|
|
+ use openacc_internal, only: acc_wait_all_async_l |
|
|
+ integer a |
|
|
+ call acc_wait_all_async_l (a) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_init_h (d) |
|
|
+ use openacc_internal, only: acc_init_l |
|
|
+ use openacc_kinds |
|
|
+ integer (acc_device_kind) d |
|
|
+ call acc_init_l (d) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_shutdown_h (d) |
|
|
+ use openacc_internal, only: acc_shutdown_l |
|
|
+ use openacc_kinds |
|
|
+ integer (acc_device_kind) d |
|
|
+ call acc_shutdown_l (d) |
|
|
+end subroutine |
|
|
+ |
|
|
+function acc_on_device_h (d) |
|
|
+ use openacc_internal, only: acc_on_device_l |
|
|
+ use openacc_kinds |
|
|
+ integer (acc_device_kind) d |
|
|
+ logical acc_on_device_h |
|
|
+ if (acc_on_device_l (d) .eq. 1) then |
|
|
+ acc_on_device_h = .TRUE. |
|
|
+ else |
|
|
+ acc_on_device_h = .FALSE. |
|
|
+ end if |
|
|
+end function |
|
|
+ |
|
|
+subroutine acc_copyin_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t, c_size_t |
|
|
+ use openacc_internal, only: acc_copyin_l |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ call acc_copyin_l (a, int (len, kind = c_size_t)) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_copyin_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t, c_size_t |
|
|
+ use openacc_internal, only: acc_copyin_l |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ call acc_copyin_l (a, int (len, kind = c_size_t)) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_copyin_array_h (a) |
|
|
+ use openacc_internal, only: acc_copyin_l |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ call acc_copyin_l (a, sizeof (a)) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_present_or_copyin_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t, c_size_t |
|
|
+ use openacc_internal, only: acc_present_or_copyin_l |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ call acc_present_or_copyin_l (a, int (len, kind = c_size_t)) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_present_or_copyin_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t, c_size_t |
|
|
+ use openacc_internal, only: acc_present_or_copyin_l |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ call acc_present_or_copyin_l (a, int (len, kind = c_size_t)) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_present_or_copyin_array_h (a) |
|
|
+ use openacc_internal, only: acc_present_or_copyin_l |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ call acc_present_or_copyin_l (a, sizeof (a)) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_create_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t, c_size_t |
|
|
+ use openacc_internal, only: acc_create_l |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ call acc_create_l (a, int (len, kind = c_size_t)) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_create_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t, c_size_t |
|
|
+ use openacc_internal, only: acc_create_l |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ call acc_create_l (a, int (len, kind = c_size_t)) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_create_array_h (a) |
|
|
+ use openacc_internal, only: acc_create_l |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ call acc_create_l (a, sizeof (a)) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_present_or_create_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t, c_size_t |
|
|
+ use openacc_internal, only: acc_present_or_create_l |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ call acc_present_or_create_l (a, int (len, kind = c_size_t)) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_present_or_create_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t, c_size_t |
|
|
+ use openacc_internal, only: acc_present_or_create_l |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ call acc_present_or_create_l (a, int (len, kind = c_size_t)) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_present_or_create_array_h (a) |
|
|
+ use openacc_internal, only: acc_present_or_create_l |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ call acc_present_or_create_l (a, sizeof (a)) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_copyout_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t, c_size_t |
|
|
+ use openacc_internal, only: acc_copyout_l |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ call acc_copyout_l (a, int (len, kind = c_size_t)) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_copyout_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t, c_size_t |
|
|
+ use openacc_internal, only: acc_copyout_l |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ call acc_copyout_l (a, int (len, kind = c_size_t)) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_copyout_array_h (a) |
|
|
+ use openacc_internal, only: acc_copyout_l |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ call acc_copyout_l (a, sizeof (a)) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_delete_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t, c_size_t |
|
|
+ use openacc_internal, only: acc_delete_l |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ call acc_delete_l (a, int (len, kind = c_size_t)) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_delete_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t, c_size_t |
|
|
+ use openacc_internal, only: acc_delete_l |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ call acc_delete_l (a, int (len, kind = c_size_t)) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_delete_array_h (a) |
|
|
+ use openacc_internal, only: acc_delete_l |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ call acc_delete_l (a, sizeof (a)) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_update_device_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t, c_size_t |
|
|
+ use openacc_internal, only: acc_update_device_l |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ call acc_update_device_l (a, int (len, kind = c_size_t)) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_update_device_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t, c_size_t |
|
|
+ use openacc_internal, only: acc_update_device_l |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ call acc_update_device_l (a, int (len, kind = c_size_t)) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_update_device_array_h (a) |
|
|
+ use openacc_internal, only: acc_update_device_l |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ call acc_update_device_l (a, sizeof (a)) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_update_self_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t, c_size_t |
|
|
+ use openacc_internal, only: acc_update_self_l |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ call acc_update_self_l (a, int (len, kind = c_size_t)) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_update_self_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t, c_size_t |
|
|
+ use openacc_internal, only: acc_update_self_l |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ call acc_update_self_l (a, int (len, kind = c_size_t)) |
|
|
+end subroutine |
|
|
+ |
|
|
+subroutine acc_update_self_array_h (a) |
|
|
+ use openacc_internal, only: acc_update_self_l |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ call acc_update_self_l (a, sizeof (a)) |
|
|
+end subroutine |
|
|
+ |
|
|
+function acc_is_present_32_h (a, len) |
|
|
+ use iso_c_binding, only: c_int32_t, c_size_t |
|
|
+ use openacc_internal, only: acc_is_present_l |
|
|
+ logical acc_is_present_32_h |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int32_t) len |
|
|
+ if (acc_is_present_l (a, int (len, kind = c_size_t)) .eq. 1) then |
|
|
+ acc_is_present_32_h = .TRUE. |
|
|
+ else |
|
|
+ acc_is_present_32_h = .FALSE. |
|
|
+ end if |
|
|
+end function |
|
|
+ |
|
|
+function acc_is_present_64_h (a, len) |
|
|
+ use iso_c_binding, only: c_int64_t, c_size_t |
|
|
+ use openacc_internal, only: acc_is_present_l |
|
|
+ logical acc_is_present_64_h |
|
|
+ type (*), dimension (*) :: a |
|
|
+ integer (c_int64_t) len |
|
|
+ if (acc_is_present_l (a, int (len, kind = c_size_t)) .eq. 1) then |
|
|
+ acc_is_present_64_h = .TRUE. |
|
|
+ else |
|
|
+ acc_is_present_64_h = .FALSE. |
|
|
+ end if |
|
|
+end function |
|
|
+ |
|
|
+function acc_is_present_array_h (a) |
|
|
+ use openacc_internal, only: acc_is_present_l |
|
|
+ logical acc_is_present_array_h |
|
|
+ type (*), dimension (..), contiguous :: a |
|
|
+ acc_is_present_array_h = acc_is_present_l (a, sizeof (a)) == 1 |
|
|
+end function
|
|
|
|