You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
172 lines
7.2 KiB
172 lines
7.2 KiB
Partial backport: |
|
|
|
Skipped elide.h since rw-locks are not backported. |
|
|
|
commit ca6e601a9d4a72b3699cca15bad12ac1716bf49a |
|
Author: Torvald Riegel <triegel@redhat.com> |
|
Date: Wed Nov 30 17:53:11 2016 +0100 |
|
|
|
Use C11-like atomics instead of plain memory accesses in x86 lock elision. |
|
|
|
This uses atomic operations to access lock elision metadata that is accessed |
|
concurrently (ie, adapt_count fields). The size of the data is less than a |
|
word but accessed only with atomic loads and stores; therefore, we add |
|
support for shorter-size atomic load and stores too. |
|
|
|
* include/atomic.h (__atomic_check_size_ls): New. |
|
(atomic_load_relaxed, atomic_load_acquire, atomic_store_relaxed, |
|
atomic_store_release): Use it. |
|
* sysdeps/x86/elide.h (ACCESS_ONCE): Remove. |
|
(elision_adapt, ELIDE_LOCK): Use atomics. |
|
* sysdeps/unix/sysv/linux/x86/elision-lock.c (__lll_lock_elision): Use |
|
atomics and improve code comments. |
|
* sysdeps/unix/sysv/linux/x86/elision-trylock.c |
|
(__lll_trylock_elision): Likewise. |
|
|
|
Index: glibc-2.17-c758a686/include/atomic.h |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/include/atomic.h |
|
+++ glibc-2.17-c758a686/include/atomic.h |
|
@@ -567,6 +567,20 @@ void __atomic_link_error (void); |
|
if (sizeof (*mem) != 4) \ |
|
__atomic_link_error (); |
|
# endif |
|
+/* We additionally provide 8b and 16b atomic loads and stores; we do not yet |
|
+ need other atomic operations of such sizes, and restricting the support to |
|
+ loads and stores makes this easier for archs that do not have native |
|
+ support for atomic operations to less-than-word-sized data. */ |
|
+# if __HAVE_64B_ATOMICS == 1 |
|
+# define __atomic_check_size_ls(mem) \ |
|
+ if ((sizeof (*mem) != 1) && (sizeof (*mem) != 2) && (sizeof (*mem) != 4) \ |
|
+ && (sizeof (*mem) != 8)) \ |
|
+ __atomic_link_error (); |
|
+# else |
|
+# define __atomic_check_size_ls(mem) \ |
|
+ if ((sizeof (*mem) != 1) && (sizeof (*mem) != 2) && sizeof (*mem) != 4) \ |
|
+ __atomic_link_error (); |
|
+# endif |
|
|
|
# define atomic_thread_fence_acquire() \ |
|
__atomic_thread_fence (__ATOMIC_ACQUIRE) |
|
@@ -576,18 +590,20 @@ void __atomic_link_error (void); |
|
__atomic_thread_fence (__ATOMIC_SEQ_CST) |
|
|
|
# define atomic_load_relaxed(mem) \ |
|
- ({ __atomic_check_size((mem)); __atomic_load_n ((mem), __ATOMIC_RELAXED); }) |
|
+ ({ __atomic_check_size_ls((mem)); \ |
|
+ __atomic_load_n ((mem), __ATOMIC_RELAXED); }) |
|
# define atomic_load_acquire(mem) \ |
|
- ({ __atomic_check_size((mem)); __atomic_load_n ((mem), __ATOMIC_ACQUIRE); }) |
|
+ ({ __atomic_check_size_ls((mem)); \ |
|
+ __atomic_load_n ((mem), __ATOMIC_ACQUIRE); }) |
|
|
|
# define atomic_store_relaxed(mem, val) \ |
|
do { \ |
|
- __atomic_check_size((mem)); \ |
|
+ __atomic_check_size_ls((mem)); \ |
|
__atomic_store_n ((mem), (val), __ATOMIC_RELAXED); \ |
|
} while (0) |
|
# define atomic_store_release(mem, val) \ |
|
do { \ |
|
- __atomic_check_size((mem)); \ |
|
+ __atomic_check_size_ls((mem)); \ |
|
__atomic_store_n ((mem), (val), __ATOMIC_RELEASE); \ |
|
} while (0) |
|
|
|
Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-lock.c |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/elision-lock.c |
|
+++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-lock.c |
|
@@ -44,7 +44,13 @@ |
|
int |
|
__lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private) |
|
{ |
|
- if (*adapt_count <= 0) |
|
+ /* adapt_count can be accessed concurrently; these accesses can be both |
|
+ inside of transactions (if critical sections are nested and the outer |
|
+ critical section uses lock elision) and outside of transactions. Thus, |
|
+ we need to use atomic accesses to avoid data races. However, the |
|
+ value of adapt_count is just a hint, so relaxed MO accesses are |
|
+ sufficient. */ |
|
+ if (atomic_load_relaxed (adapt_count) <= 0) |
|
{ |
|
unsigned status; |
|
int try_xbegin; |
|
@@ -70,15 +76,20 @@ __lll_lock_elision (int *futex, short *a |
|
&& _XABORT_CODE (status) == _ABORT_LOCK_BUSY) |
|
{ |
|
/* Right now we skip here. Better would be to wait a bit |
|
- and retry. This likely needs some spinning. */ |
|
- if (*adapt_count != aconf.skip_lock_busy) |
|
- *adapt_count = aconf.skip_lock_busy; |
|
+ and retry. This likely needs some spinning. See |
|
+ above for why relaxed MO is sufficient. */ |
|
+ if (atomic_load_relaxed (adapt_count) |
|
+ != aconf.skip_lock_busy) |
|
+ atomic_store_relaxed (adapt_count, aconf.skip_lock_busy); |
|
} |
|
/* Internal abort. There is no chance for retry. |
|
Use the normal locking and next time use lock. |
|
- Be careful to avoid writing to the lock. */ |
|
- else if (*adapt_count != aconf.skip_lock_internal_abort) |
|
- *adapt_count = aconf.skip_lock_internal_abort; |
|
+ Be careful to avoid writing to the lock. See above for why |
|
+ relaxed MO is sufficient. */ |
|
+ else if (atomic_load_relaxed (adapt_count) |
|
+ != aconf.skip_lock_internal_abort) |
|
+ atomic_store_relaxed (adapt_count, |
|
+ aconf.skip_lock_internal_abort); |
|
break; |
|
} |
|
} |
|
@@ -87,7 +98,8 @@ __lll_lock_elision (int *futex, short *a |
|
{ |
|
/* Use a normal lock until the threshold counter runs out. |
|
Lost updates possible. */ |
|
- (*adapt_count)--; |
|
+ atomic_store_relaxed (adapt_count, |
|
+ atomic_load_relaxed (adapt_count) - 1); |
|
} |
|
|
|
/* Use a normal lock here. */ |
|
Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-trylock.c |
|
=================================================================== |
|
--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/elision-trylock.c |
|
+++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-trylock.c |
|
@@ -36,8 +36,10 @@ __lll_trylock_elision (int *futex, short |
|
return an error. */ |
|
_xabort (_ABORT_NESTED_TRYLOCK); |
|
|
|
- /* Only try a transaction if it's worth it. */ |
|
- if (*adapt_count <= 0) |
|
+ /* Only try a transaction if it's worth it. See __lll_lock_elision for |
|
+ why we need atomic accesses. Relaxed MO is sufficient because this is |
|
+ just a hint. */ |
|
+ if (atomic_load_relaxed (adapt_count) <= 0) |
|
{ |
|
unsigned status; |
|
|
|
@@ -55,16 +57,18 @@ __lll_trylock_elision (int *futex, short |
|
if (!(status & _XABORT_RETRY)) |
|
{ |
|
/* Internal abort. No chance for retry. For future |
|
- locks don't try speculation for some time. */ |
|
- if (*adapt_count != aconf.skip_trylock_internal_abort) |
|
- *adapt_count = aconf.skip_trylock_internal_abort; |
|
+ locks don't try speculation for some time. See above for MO. */ |
|
+ if (atomic_load_relaxed (adapt_count) |
|
+ != aconf.skip_lock_internal_abort) |
|
+ atomic_store_relaxed (adapt_count, aconf.skip_lock_internal_abort); |
|
} |
|
/* Could do some retries here. */ |
|
} |
|
else |
|
{ |
|
- /* Lost updates are possible, but harmless. */ |
|
- (*adapt_count)--; |
|
+ /* Lost updates are possible but harmless (see above). */ |
|
+ atomic_store_relaxed (adapt_count, |
|
+ atomic_load_relaxed (adapt_count) - 1); |
|
} |
|
|
|
return lll_trylock (*futex);
|
|
|