[2557] | 1 | From efccdcdb63a7f7cc7cc1816f0d5e2524eb084c72 Mon Sep 17 00:00:00 2001 |
---|
| 2 | From: Thomas Gleixner <tglx@linutronix.de> |
---|
| 3 | Date: Tue, 3 Jun 2014 12:27:08 +0000 |
---|
| 4 | Subject: [PATCH 4/4] futex: Make lookup_pi_state more robust |
---|
| 5 | |
---|
| 6 | commit 54a217887a7b658e2650c3feff22756ab80c7339 upstream. |
---|
| 7 | |
---|
| 8 | The current implementation of lookup_pi_state has ambigous handling of |
---|
| 9 | the TID value 0 in the user space futex. We can get into the kernel |
---|
| 10 | even if the TID value is 0, because either there is a stale waiters bit |
---|
| 11 | or the owner died bit is set or we are called from the requeue_pi path |
---|
| 12 | or from user space just for fun. |
---|
| 13 | |
---|
| 14 | The current code avoids an explicit sanity check for pid = 0 in case |
---|
| 15 | that kernel internal state (waiters) are found for the user space |
---|
| 16 | address. This can lead to state leakage and worse under some |
---|
| 17 | circumstances. |
---|
| 18 | |
---|
| 19 | Handle the cases explicit: |
---|
| 20 | |
---|
| 21 | Waiter | pi_state | pi->owner | uTID | uODIED | ? |
---|
| 22 | |
---|
| 23 | [1] NULL | --- | --- | 0 | 0/1 | Valid |
---|
| 24 | [2] NULL | --- | --- | >0 | 0/1 | Valid |
---|
| 25 | |
---|
| 26 | [3] Found | NULL | -- | Any | 0/1 | Invalid |
---|
| 27 | |
---|
| 28 | [4] Found | Found | NULL | 0 | 1 | Valid |
---|
| 29 | [5] Found | Found | NULL | >0 | 1 | Invalid |
---|
| 30 | |
---|
| 31 | [6] Found | Found | task | 0 | 1 | Valid |
---|
| 32 | |
---|
| 33 | [7] Found | Found | NULL | Any | 0 | Invalid |
---|
| 34 | |
---|
| 35 | [8] Found | Found | task | ==taskTID | 0/1 | Valid |
---|
| 36 | [9] Found | Found | task | 0 | 0 | Invalid |
---|
| 37 | [10] Found | Found | task | !=taskTID | 0/1 | Invalid |
---|
| 38 | |
---|
| 39 | [1] Indicates that the kernel can acquire the futex atomically. We |
---|
| 40 | came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit. |
---|
| 41 | |
---|
| 42 | [2] Valid, if TID does not belong to a kernel thread. If no matching |
---|
| 43 | thread is found then it indicates that the owner TID has died. |
---|
| 44 | |
---|
| 45 | [3] Invalid. The waiter is queued on a non PI futex |
---|
| 46 | |
---|
| 47 | [4] Valid state after exit_robust_list(), which sets the user space |
---|
| 48 | value to FUTEX_WAITERS | FUTEX_OWNER_DIED. |
---|
| 49 | |
---|
| 50 | [5] The user space value got manipulated between exit_robust_list() |
---|
| 51 | and exit_pi_state_list() |
---|
| 52 | |
---|
| 53 | [6] Valid state after exit_pi_state_list() which sets the new owner in |
---|
| 54 | the pi_state but cannot access the user space value. |
---|
| 55 | |
---|
| 56 | [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set. |
---|
| 57 | |
---|
| 58 | [8] Owner and user space value match |
---|
| 59 | |
---|
| 60 | [9] There is no transient state which sets the user space TID to 0 |
---|
| 61 | except exit_robust_list(), but this is indicated by the |
---|
| 62 | FUTEX_OWNER_DIED bit. See [4] |
---|
| 63 | |
---|
| 64 | [10] There is no transient state which leaves owner and user space |
---|
| 65 | TID out of sync. |
---|
| 66 | |
---|
| 67 | Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
---|
| 68 | Cc: Kees Cook <keescook@chromium.org> |
---|
| 69 | Cc: Will Drewry <wad@chromium.org> |
---|
| 70 | Cc: Darren Hart <dvhart@linux.intel.com> |
---|
| 71 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
---|
| 72 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
---|
| 73 | --- |
---|
| 74 | kernel/futex.c | 134 ++++++++++++++++++++++++++++++++++++++++++++------------ |
---|
| 75 | 1 file changed, 106 insertions(+), 28 deletions(-) |
---|
| 76 | |
---|
| 77 | diff --git a/kernel/futex.c b/kernel/futex.c |
---|
| 78 | index 9720c42..625a4e6 100644 |
---|
| 79 | --- a/kernel/futex.c |
---|
| 80 | +++ b/kernel/futex.c |
---|
| 81 | @@ -592,10 +592,58 @@ void exit_pi_state_list(struct task_struct *curr) |
---|
| 82 | raw_spin_unlock_irq(&curr->pi_lock); |
---|
| 83 | } |
---|
| 84 | |
---|
| 85 | +/* |
---|
| 86 | + * We need to check the following states: |
---|
| 87 | + * |
---|
| 88 | + * Waiter | pi_state | pi->owner | uTID | uODIED | ? |
---|
| 89 | + * |
---|
| 90 | + * [1] NULL | --- | --- | 0 | 0/1 | Valid |
---|
| 91 | + * [2] NULL | --- | --- | >0 | 0/1 | Valid |
---|
| 92 | + * |
---|
| 93 | + * [3] Found | NULL | -- | Any | 0/1 | Invalid |
---|
| 94 | + * |
---|
| 95 | + * [4] Found | Found | NULL | 0 | 1 | Valid |
---|
| 96 | + * [5] Found | Found | NULL | >0 | 1 | Invalid |
---|
| 97 | + * |
---|
| 98 | + * [6] Found | Found | task | 0 | 1 | Valid |
---|
| 99 | + * |
---|
| 100 | + * [7] Found | Found | NULL | Any | 0 | Invalid |
---|
| 101 | + * |
---|
| 102 | + * [8] Found | Found | task | ==taskTID | 0/1 | Valid |
---|
| 103 | + * [9] Found | Found | task | 0 | 0 | Invalid |
---|
| 104 | + * [10] Found | Found | task | !=taskTID | 0/1 | Invalid |
---|
| 105 | + * |
---|
| 106 | + * [1] Indicates that the kernel can acquire the futex atomically. We |
---|
| 107 | + * came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit. |
---|
| 108 | + * |
---|
| 109 | + * [2] Valid, if TID does not belong to a kernel thread. If no matching |
---|
| 110 | + * thread is found then it indicates that the owner TID has died. |
---|
| 111 | + * |
---|
| 112 | + * [3] Invalid. The waiter is queued on a non PI futex |
---|
| 113 | + * |
---|
| 114 | + * [4] Valid state after exit_robust_list(), which sets the user space |
---|
| 115 | + * value to FUTEX_WAITERS | FUTEX_OWNER_DIED. |
---|
| 116 | + * |
---|
| 117 | + * [5] The user space value got manipulated between exit_robust_list() |
---|
| 118 | + * and exit_pi_state_list() |
---|
| 119 | + * |
---|
| 120 | + * [6] Valid state after exit_pi_state_list() which sets the new owner in |
---|
| 121 | + * the pi_state but cannot access the user space value. |
---|
| 122 | + * |
---|
| 123 | + * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set. |
---|
| 124 | + * |
---|
| 125 | + * [8] Owner and user space value match |
---|
| 126 | + * |
---|
| 127 | + * [9] There is no transient state which sets the user space TID to 0 |
---|
| 128 | + * except exit_robust_list(), but this is indicated by the |
---|
| 129 | + * FUTEX_OWNER_DIED bit. See [4] |
---|
| 130 | + * |
---|
| 131 | + * [10] There is no transient state which leaves owner and user space |
---|
| 132 | + * TID out of sync. |
---|
| 133 | + */ |
---|
| 134 | static int |
---|
| 135 | lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, |
---|
| 136 | - union futex_key *key, struct futex_pi_state **ps, |
---|
| 137 | - struct task_struct *task) |
---|
| 138 | + union futex_key *key, struct futex_pi_state **ps) |
---|
| 139 | { |
---|
| 140 | struct futex_pi_state *pi_state = NULL; |
---|
| 141 | struct futex_q *this, *next; |
---|
| 142 | @@ -608,12 +656,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, |
---|
| 143 | plist_for_each_entry_safe(this, next, head, list) { |
---|
| 144 | if (match_futex(&this->key, key)) { |
---|
| 145 | /* |
---|
| 146 | - * Another waiter already exists - bump up |
---|
| 147 | - * the refcount and return its pi_state: |
---|
| 148 | + * Sanity check the waiter before increasing |
---|
| 149 | + * the refcount and attaching to it. |
---|
| 150 | */ |
---|
| 151 | pi_state = this->pi_state; |
---|
| 152 | /* |
---|
| 153 | - * Userspace might have messed up non-PI and PI futexes |
---|
| 154 | + * Userspace might have messed up non-PI and |
---|
| 155 | + * PI futexes [3] |
---|
| 156 | */ |
---|
| 157 | if (unlikely(!pi_state)) |
---|
| 158 | return -EINVAL; |
---|
| 159 | @@ -621,44 +670,70 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, |
---|
| 160 | WARN_ON(!atomic_read(&pi_state->refcount)); |
---|
| 161 | |
---|
| 162 | /* |
---|
| 163 | - * When pi_state->owner is NULL then the owner died |
---|
| 164 | - * and another waiter is on the fly. pi_state->owner |
---|
| 165 | - * is fixed up by the task which acquires |
---|
| 166 | - * pi_state->rt_mutex. |
---|
| 167 | - * |
---|
| 168 | - * We do not check for pid == 0 which can happen when |
---|
| 169 | - * the owner died and robust_list_exit() cleared the |
---|
| 170 | - * TID. |
---|
| 171 | + * Handle the owner died case: |
---|
| 172 | */ |
---|
| 173 | - if (pid && pi_state->owner) { |
---|
| 174 | + if (uval & FUTEX_OWNER_DIED) { |
---|
| 175 | /* |
---|
| 176 | - * Bail out if user space manipulated the |
---|
| 177 | - * futex value. |
---|
| 178 | + * exit_pi_state_list sets owner to NULL and |
---|
| 179 | + * wakes the topmost waiter. The task which |
---|
| 180 | + * acquires the pi_state->rt_mutex will fixup |
---|
| 181 | + * owner. |
---|
| 182 | */ |
---|
| 183 | - if (pid != task_pid_vnr(pi_state->owner)) |
---|
| 184 | + if (!pi_state->owner) { |
---|
| 185 | + /* |
---|
| 186 | + * No pi state owner, but the user |
---|
| 187 | + * space TID is not 0. Inconsistent |
---|
| 188 | + * state. [5] |
---|
| 189 | + */ |
---|
| 190 | + if (pid) |
---|
| 191 | + return -EINVAL; |
---|
| 192 | + /* |
---|
| 193 | + * Take a ref on the state and |
---|
| 194 | + * return. [4] |
---|
| 195 | + */ |
---|
| 196 | + goto out_state; |
---|
| 197 | + } |
---|
| 198 | + |
---|
| 199 | + /* |
---|
| 200 | + * If TID is 0, then either the dying owner |
---|
| 201 | + * has not yet executed exit_pi_state_list() |
---|
| 202 | + * or some waiter acquired the rtmutex in the |
---|
| 203 | + * pi state, but did not yet fixup the TID in |
---|
| 204 | + * user space. |
---|
| 205 | + * |
---|
| 206 | + * Take a ref on the state and return. [6] |
---|
| 207 | + */ |
---|
| 208 | + if (!pid) |
---|
| 209 | + goto out_state; |
---|
| 210 | + } else { |
---|
| 211 | + /* |
---|
| 212 | + * If the owner died bit is not set, |
---|
| 213 | + * then the pi_state must have an |
---|
| 214 | + * owner. [7] |
---|
| 215 | + */ |
---|
| 216 | + if (!pi_state->owner) |
---|
| 217 | return -EINVAL; |
---|
| 218 | } |
---|
| 219 | |
---|
| 220 | /* |
---|
| 221 | - * Protect against a corrupted uval. If uval |
---|
| 222 | - * is 0x80000000 then pid is 0 and the waiter |
---|
| 223 | - * bit is set. So the deadlock check in the |
---|
| 224 | - * calling code has failed and we did not fall |
---|
| 225 | - * into the check above due to !pid. |
---|
| 226 | + * Bail out if user space manipulated the |
---|
| 227 | + * futex value. If pi state exists then the |
---|
| 228 | + * owner TID must be the same as the user |
---|
| 229 | + * space TID. [9/10] |
---|
| 230 | */ |
---|
| 231 | - if (task && pi_state->owner == task) |
---|
| 232 | - return -EDEADLK; |
---|
| 233 | + if (pid != task_pid_vnr(pi_state->owner)) |
---|
| 234 | + return -EINVAL; |
---|
| 235 | |
---|
| 236 | + out_state: |
---|
| 237 | atomic_inc(&pi_state->refcount); |
---|
| 238 | *ps = pi_state; |
---|
| 239 | - |
---|
| 240 | return 0; |
---|
| 241 | } |
---|
| 242 | } |
---|
| 243 | |
---|
| 244 | /* |
---|
| 245 | * We are the first waiter - try to look up the real owner and attach |
---|
| 246 | - * the new pi_state to it, but bail out when TID = 0 |
---|
| 247 | + * the new pi_state to it, but bail out when TID = 0 [1] |
---|
| 248 | */ |
---|
| 249 | if (!pid) |
---|
| 250 | return -ESRCH; |
---|
| 251 | @@ -691,6 +766,9 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, |
---|
| 252 | return ret; |
---|
| 253 | } |
---|
| 254 | |
---|
| 255 | + /* |
---|
| 256 | + * No existing pi state. First waiter. [2] |
---|
| 257 | + */ |
---|
| 258 | pi_state = alloc_pi_state(); |
---|
| 259 | |
---|
| 260 | /* |
---|
| 261 | @@ -811,7 +889,7 @@ retry: |
---|
| 262 | * We dont have the lock. Look up the PI state (or create it if |
---|
| 263 | * we are the first waiter): |
---|
| 264 | */ |
---|
| 265 | - ret = lookup_pi_state(uval, hb, key, ps, task); |
---|
| 266 | + ret = lookup_pi_state(uval, hb, key, ps); |
---|
| 267 | |
---|
| 268 | if (unlikely(ret)) { |
---|
| 269 | switch (ret) { |
---|
| 270 | @@ -1414,7 +1492,7 @@ retry_private: |
---|
| 271 | * rereading and handing potential crap to |
---|
| 272 | * lookup_pi_state. |
---|
| 273 | */ |
---|
| 274 | - ret = lookup_pi_state(ret, hb2, &key2, &pi_state, NULL); |
---|
| 275 | + ret = lookup_pi_state(ret, hb2, &key2, &pi_state); |
---|
| 276 | } |
---|
| 277 | |
---|
| 278 | switch (ret) { |
---|
| 279 | -- |
---|
| 280 | 1.7.10.4 |
---|
| 281 | |
---|