1 /* 2 * linux/mm/oom_kill.c 3 * 4 * Copyright (C) 1998,2000 Rik van Riel 5 * Thanks go out to Claus Fischer for some serious inspiration and 6 * for goading me into coding this file... 7 * 8 * The routines in this file are used to kill a process when 9 * we're seriously out of memory. This gets called from kswapd() 10 * in linux/mm/vmscan.c when we really run out of memory. 11 * 12 * Since we won't call these routines often (on a well-configured 13 * machine) this file will double as a 'coding guide' and a signpost 14 * for newbie kernel hackers. It features several pointers to major 15 * kernel subsystems and hints as to where to find out what things do. 16 */ 17 18 #include <linux/mm.h> 19 #include <linux/sched.h> 20 #include <linux/swap.h> 21 #include <linux/timex.h> 22 #include <linux/jiffies.h> 23 24 /* #define DEBUG */ 25 26 /** 27 * oom_badness - calculate a numeric value for how bad this task has been 28 * @p: task struct of which task we should calculate 29 * 30 * The formula used is relatively simple and documented inline in the 31 * function. The main rationale is that we want to select a good task 32 * to kill when we run out of memory. 33 * 34 * Good in this context means that: 35 * 1) we lose the minimum amount of work done 36 * 2) we recover a large amount of memory 37 * 3) we don't kill anything innocent of eating tons of memory 38 * 4) we want to kill the minimum amount of processes (one) 39 * 5) we try to kill the process the user expects us to kill, this 40 * algorithm has been meticulously tuned to meet the principle 41 * of least surprise ... (be careful when you change it) 42 */ 43 44 static int badness(struct task_struct *p) 45 { 46 int points, cpu_time, run_time, s; 47 48 if (!p->mm) 49 return 0; 50 51 if (p->flags & PF_MEMDIE) 52 return 0; 53 /* 54 * The memory size of the process is the basis for the badness. 55 */ 56 points = p->mm->total_vm; 57 58 /* 59 * CPU time is in seconds and run time is in minutes. There is no 60 * particular reason for this other than that it turned out to work 61 * very well in practice. 62 */ 63 cpu_time = (p->utime + p->stime) >> (SHIFT_HZ + 3); 64 run_time = (get_jiffies_64() - p->start_time) >> (SHIFT_HZ + 10); 65 66 s = int_sqrt(cpu_time); 67 if (s) 68 points /= s; 69 s = int_sqrt(int_sqrt(run_time)); 70 if (s) 71 points /= s; 72 73 /* 74 * Niced processes are most likely less important, so double 75 * their badness points. 76 */ 77 if (task_nice(p) > 0) 78 points *= 2; 79 80 /* 81 * Superuser processes are usually more important, so we make it 82 * less likely that we kill those. 83 */ 84 if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_ADMIN) || 85 p->uid == 0 || p->euid == 0) 86 points /= 4; 87 88 /* 89 * We don't want to kill a process with direct hardware access. 90 * Not only could that mess up the hardware, but usually users 91 * tend to only have this flag set on applications they think 92 * of as important. 93 */ 94 if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) 95 points /= 4; 96 #ifdef DEBUG 97 printk(KERN_DEBUG "OOMkill: task %d (%s) got %d points\n", 98 p->pid, p->comm, points); 99 #endif 100 return points; 101 } 102 103 /* 104 * Simple selection loop. We chose the process with the highest 105 * number of 'points'. We expect the caller will lock the tasklist. 106 * 107 * (not docbooked, we don't want this one cluttering up the manual) 108 */ 109 static struct task_struct * select_bad_process(void) 110 { 111 int maxpoints = 0; 112 struct task_struct *g, *p; 113 struct task_struct *chosen = NULL; 114 115 do_each_thread(g, p) 116 if (p->pid) { 117 int points = badness(p); 118 if (points > maxpoints) { 119 chosen = p; 120 maxpoints = points; 121 } 122 if (p->flags & PF_SWAPOFF) 123 return p; 124 } 125 while_each_thread(g, p); 126 return chosen; 127 } 128 129 /** 130 * We must be careful though to never send SIGKILL a process with 131 * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that 132 * we select a process with CAP_SYS_RAW_IO set). 133 */ 134 static void __oom_kill_task(task_t *p) 135 { 136 task_lock(p); 137 if (!p->mm || p->mm == &init_mm) { 138 WARN_ON(1); 139 printk(KERN_WARNING "tried to kill an mm-less task!\n"); 140 task_unlock(p); 141 return; 142 } 143 task_unlock(p); 144 printk(KERN_ERR "Out of Memory: Killed process %d (%s).\n", p->pid, p->comm); 145 146 /* 147 * We give our sacrificial lamb high priority and access to 148 * all the memory it needs. That way it should be able to 149 * exit() and clear out its resources quickly... 150 */ 151 p->time_slice = HZ; 152 p->flags |= PF_MEMALLOC | PF_MEMDIE; 153 154 /* This process has hardware access, be more careful. */ 155 if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) { 156 force_sig(SIGTERM, p); 157 } else { 158 force_sig(SIGKILL, p); 159 } 160 } 161 162 static struct mm_struct *oom_kill_task(task_t *p) 163 { 164 struct mm_struct *mm = get_task_mm(p); 165 if (!mm || mm == &init_mm) 166 return NULL; 167 __oom_kill_task(p); 168 return mm; 169 } 170 171 172 /** 173 * oom_kill - kill the "best" process when we run out of memory 174 * 175 * If we run out of memory, we have the choice between either 176 * killing a random task (bad), letting the system crash (worse) 177 * OR try to be smart about which process to kill. Note that we 178 * don't have to be perfect here, we just have to be good. 179 */ 180 static void oom_kill(void) 181 { 182 struct mm_struct *mm; 183 struct task_struct *g, *p, *q; 184 185 read_lock(&tasklist_lock); 186 retry: 187 p = select_bad_process(); 188 189 /* Found nothing?!?! Either we hang forever, or we panic. */ 190 if (!p) { 191 show_free_areas(); 192 panic("Out of memory and no killable processes...\n"); 193 } 194 195 mm = oom_kill_task(p); 196 if (!mm) 197 goto retry; 198 /* 199 * kill all processes that share the ->mm (i.e. all threads), 200 * but are in a different thread group 201 */ 202 do_each_thread(g, q) 203 if (q->mm == mm && q->tgid != p->tgid) 204 __oom_kill_task(q); 205 while_each_thread(g, q); 206 if (!p->mm) 207 printk(KERN_INFO "Fixed up OOM kill of mm-less task\n"); 208 read_unlock(&tasklist_lock); 209 mmput(mm); 210 211 /* 212 * Make kswapd go out of the way, so "p" has a good chance of 213 * killing itself before someone else gets the chance to ask 214 * for more memory. 215 */ 216 yield(); 217 return; 218 } 219 220 /** 221 * out_of_memory - is the system out of memory? 222 */ 223 void out_of_memory(int gfp_mask) 224 { 225 /* 226 * oom_lock protects out_of_memory()'s static variables. 227 * It's a global lock; this is not performance-critical. 228 */ 229 static spinlock_t oom_lock = SPIN_LOCK_UNLOCKED; 230 static unsigned long first, last, count, lastkill; 231 unsigned long now, since; 232 233 spin_lock(&oom_lock); 234 now = jiffies; 235 since = now - last; 236 last = now; 237 238 /* 239 * If it's been a long time since last failure, 240 * we're not oom. 241 */ 242 if (since > 5*HZ) 243 goto reset; 244 245 /* 246 * If we haven't tried for at least one second, 247 * we're not really oom. 248 */ 249 since = now - first; 250 if (since < HZ) 251 goto out_unlock; 252 253 /* 254 * If we have gotten only a few failures, 255 * we're not really oom. 256 */ 257 if (++count < 10) 258 goto out_unlock; 259 260 /* 261 * If we just killed a process, wait a while 262 * to give that task a chance to exit. This 263 * avoids killing multiple processes needlessly. 264 */ 265 since = now - lastkill; 266 if (since < HZ*5) 267 goto out_unlock; 268 269 /* 270 * Ok, really out of memory. Kill something. 271 */ 272 lastkill = now; 273 274 printk("oom-killer: gfp_mask=0x%x\n", gfp_mask); 275 show_free_areas(); 276 277 /* oom_kill() sleeps */ 278 spin_unlock(&oom_lock); 279 oom_kill(); 280 spin_lock(&oom_lock); 281 282 reset: 283 /* 284 * We dropped the lock above, so check to be sure the variable 285 * first only ever increases to prevent false OOM's. 286 */ 287 if (time_after(now, first)) 288 first = now; 289 count = 0; 290 291 out_unlock: 292 spin_unlock(&oom_lock); 293 } 294
This page was automatically generated by LXR 0.3.1. • Linux is a registered trademark of Linus Torvalds