1 /* 2 * linux/mm/swap_state.c 3 * 4 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 5 * Swap reorganised 29.12.95, Stephen Tweedie 6 * 7 * Rewritten to use page cache, (C) 1998 Stephen Tweedie 8 */ 9 #include <linux/module.h> 10 #include <linux/mm.h> 11 #include <linux/kernel_stat.h> 12 #include <linux/swap.h> 13 #include <linux/init.h> 14 #include <linux/pagemap.h> 15 #include <linux/buffer_head.h> 16 #include <linux/backing-dev.h> 17 18 #include <asm/pgtable.h> 19 20 /* 21 * swapper_space is a fiction, retained to simplify the path through 22 * vmscan's shrink_list, to make sync_page look nicer, and to allow 23 * future use of radix_tree tags in the swap cache. 24 */ 25 static struct address_space_operations swap_aops = { 26 .writepage = swap_writepage, 27 .sync_page = block_sync_page, 28 .set_page_dirty = __set_page_dirty_nobuffers, 29 }; 30 31 static struct backing_dev_info swap_backing_dev_info = { 32 .memory_backed = 1, /* Does not contribute to dirty memory */ 33 .unplug_io_fn = swap_unplug_io_fn, 34 }; 35 36 struct address_space swapper_space = { 37 .page_tree = RADIX_TREE_INIT(GFP_ATOMIC), 38 .tree_lock = SPIN_LOCK_UNLOCKED, 39 .a_ops = &swap_aops, 40 .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), 41 .backing_dev_info = &swap_backing_dev_info, 42 }; 43 EXPORT_SYMBOL(swapper_space); 44 45 #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) 46 47 static struct { 48 unsigned long add_total; 49 unsigned long del_total; 50 unsigned long find_success; 51 unsigned long find_total; 52 unsigned long noent_race; 53 unsigned long exist_race; 54 } swap_cache_info; 55 56 void show_swap_cache_info(void) 57 { 58 printk("Swap cache: add %lu, delete %lu, find %lu/%lu, race %lu+%lu\n", 59 swap_cache_info.add_total, swap_cache_info.del_total, 60 swap_cache_info.find_success, swap_cache_info.find_total, 61 swap_cache_info.noent_race, swap_cache_info.exist_race); 62 } 63 64 /* 65 * __add_to_swap_cache resembles add_to_page_cache on swapper_space, 66 * but sets SwapCache flag and private instead of mapping and index. 67 */ 68 static int __add_to_swap_cache(struct page *page, 69 swp_entry_t entry, int gfp_mask) 70 { 71 int error; 72 73 BUG_ON(PageSwapCache(page)); 74 BUG_ON(PagePrivate(page)); 75 error = radix_tree_preload(gfp_mask); 76 if (!error) { 77 spin_lock_irq(&swapper_space.tree_lock); 78 error = radix_tree_insert(&swapper_space.page_tree, 79 entry.val, page); 80 if (!error) { 81 page_cache_get(page); 82 SetPageLocked(page); 83 SetPageSwapCache(page); 84 page->private = entry.val; 85 total_swapcache_pages++; 86 pagecache_acct(1); 87 } 88 spin_unlock_irq(&swapper_space.tree_lock); 89 radix_tree_preload_end(); 90 } 91 return error; 92 } 93 94 static int add_to_swap_cache(struct page *page, swp_entry_t entry) 95 { 96 int error; 97 98 if (!swap_duplicate(entry)) { 99 INC_CACHE_INFO(noent_race); 100 return -ENOENT; 101 } 102 error = __add_to_swap_cache(page, entry, GFP_KERNEL); 103 /* 104 * Anon pages are already on the LRU, we don't run lru_cache_add here. 105 */ 106 if (error) { 107 swap_free(entry); 108 if (error == -EEXIST) 109 INC_CACHE_INFO(exist_race); 110 return error; 111 } 112 INC_CACHE_INFO(add_total); 113 return 0; 114 } 115 116 /* 117 * This must be called only on pages that have 118 * been verified to be in the swap cache. 119 */ 120 void __delete_from_swap_cache(struct page *page) 121 { 122 BUG_ON(!PageLocked(page)); 123 BUG_ON(!PageSwapCache(page)); 124 BUG_ON(PageWriteback(page)); 125 126 radix_tree_delete(&swapper_space.page_tree, page->private); 127 page->private = 0; 128 ClearPageSwapCache(page); 129 total_swapcache_pages--; 130 pagecache_acct(-1); 131 INC_CACHE_INFO(del_total); 132 } 133 134 /** 135 * add_to_swap - allocate swap space for a page 136 * @page: page we want to move to swap 137 * 138 * Allocate swap space for the page and add the page to the 139 * swap cache. Caller needs to hold the page lock. 140 */ 141 int add_to_swap(struct page * page) 142 { 143 swp_entry_t entry; 144 int pf_flags; 145 int err; 146 147 if (!PageLocked(page)) 148 BUG(); 149 150 for (;;) { 151 entry = get_swap_page(); 152 if (!entry.val) 153 return 0; 154 155 /* Radix-tree node allocations are performing 156 * GFP_ATOMIC allocations under PF_MEMALLOC. 157 * They can completely exhaust the page allocator. 158 * 159 * So PF_MEMALLOC is dropped here. This causes the slab 160 * allocations to fail earlier, so radix-tree nodes will 161 * then be allocated from the mempool reserves. 162 * 163 * We're still using __GFP_HIGH for radix-tree node 164 * allocations, so some of the emergency pools are available, 165 * just not all of them. 166 */ 167 168 pf_flags = current->flags; 169 current->flags &= ~PF_MEMALLOC; 170 171 /* 172 * Add it to the swap cache and mark it dirty 173 */ 174 err = __add_to_swap_cache(page, entry, GFP_ATOMIC); 175 176 if (pf_flags & PF_MEMALLOC) 177 current->flags |= PF_MEMALLOC; 178 179 switch (err) { 180 case 0: /* Success */ 181 SetPageUptodate(page); 182 SetPageDirty(page); 183 INC_CACHE_INFO(add_total); 184 return 1; 185 case -EEXIST: 186 /* Raced with "speculative" read_swap_cache_async */ 187 INC_CACHE_INFO(exist_race); 188 swap_free(entry); 189 continue; 190 default: 191 /* -ENOMEM radix-tree allocation failure */ 192 swap_free(entry); 193 return 0; 194 } 195 } 196 } 197 198 /* 199 * This must be called only on pages that have 200 * been verified to be in the swap cache and locked. 201 * It will never put the page into the free list, 202 * the caller has a reference on the page. 203 */ 204 void delete_from_swap_cache(struct page *page) 205 { 206 swp_entry_t entry; 207 208 BUG_ON(!PageSwapCache(page)); 209 BUG_ON(!PageLocked(page)); 210 BUG_ON(PageWriteback(page)); 211 BUG_ON(PagePrivate(page)); 212 213 entry.val = page->private; 214 215 spin_lock_irq(&swapper_space.tree_lock); 216 __delete_from_swap_cache(page); 217 spin_unlock_irq(&swapper_space.tree_lock); 218 219 swap_free(entry); 220 page_cache_release(page); 221 } 222 223 /* 224 * Strange swizzling function only for use by shmem_writepage 225 */ 226 int move_to_swap_cache(struct page *page, swp_entry_t entry) 227 { 228 int err = __add_to_swap_cache(page, entry, GFP_ATOMIC); 229 if (!err) { 230 remove_from_page_cache(page); 231 page_cache_release(page); /* pagecache ref */ 232 if (!swap_duplicate(entry)) 233 BUG(); 234 SetPageDirty(page); 235 INC_CACHE_INFO(add_total); 236 } else if (err == -EEXIST) 237 INC_CACHE_INFO(exist_race); 238 return err; 239 } 240 241 /* 242 * Strange swizzling function for shmem_getpage (and shmem_unuse) 243 */ 244 int move_from_swap_cache(struct page *page, unsigned long index, 245 struct address_space *mapping) 246 { 247 int err = add_to_page_cache(page, mapping, index, GFP_ATOMIC); 248 if (!err) { 249 delete_from_swap_cache(page); 250 /* shift page from clean_pages to dirty_pages list */ 251 ClearPageDirty(page); 252 set_page_dirty(page); 253 } 254 return err; 255 } 256 257 /* 258 * If we are the only user, then try to free up the swap cache. 259 * 260 * Its ok to check for PageSwapCache without the page lock 261 * here because we are going to recheck again inside 262 * exclusive_swap_page() _with_ the lock. 263 * - Marcelo 264 */ 265 static inline void free_swap_cache(struct page *page) 266 { 267 if (PageSwapCache(page) && !TestSetPageLocked(page)) { 268 remove_exclusive_swap_page(page); 269 unlock_page(page); 270 } 271 } 272 273 /* 274 * Perform a free_page(), also freeing any swap cache associated with 275 * this page if it is the last user of the page. Can not do a lock_page, 276 * as we are holding the page_table_lock spinlock. 277 */ 278 void free_page_and_swap_cache(struct page *page) 279 { 280 free_swap_cache(page); 281 page_cache_release(page); 282 } 283 284 /* 285 * Passed an array of pages, drop them all from swapcache and then release 286 * them. They are removed from the LRU and freed if this is their last use. 287 */ 288 void free_pages_and_swap_cache(struct page **pages, int nr) 289 { 290 int chunk = 16; 291 struct page **pagep = pages; 292 293 lru_add_drain(); 294 while (nr) { 295 int todo = min(chunk, nr); 296 int i; 297 298 for (i = 0; i < todo; i++) 299 free_swap_cache(pagep[i]); 300 release_pages(pagep, todo, 0); 301 pagep += todo; 302 nr -= todo; 303 } 304 } 305 306 /* 307 * Lookup a swap entry in the swap cache. A found page will be returned 308 * unlocked and with its refcount incremented - we rely on the kernel 309 * lock getting page table operations atomic even if we drop the page 310 * lock before returning. 311 */ 312 struct page * lookup_swap_cache(swp_entry_t entry) 313 { 314 struct page *page; 315 316 spin_lock_irq(&swapper_space.tree_lock); 317 page = radix_tree_lookup(&swapper_space.page_tree, entry.val); 318 if (page) { 319 page_cache_get(page); 320 INC_CACHE_INFO(find_success); 321 } 322 spin_unlock_irq(&swapper_space.tree_lock); 323 INC_CACHE_INFO(find_total); 324 return page; 325 } 326 327 /* 328 * Locate a page of swap in physical memory, reserving swap cache space 329 * and reading the disk if it is not already cached. 330 * A failure return means that either the page allocation failed or that 331 * the swap entry is no longer in use. 332 */ 333 struct page *read_swap_cache_async(swp_entry_t entry, 334 struct vm_area_struct *vma, unsigned long addr) 335 { 336 struct page *found_page, *new_page = NULL; 337 int err; 338 339 do { 340 /* 341 * First check the swap cache. Since this is normally 342 * called after lookup_swap_cache() failed, re-calling 343 * that would confuse statistics. 344 */ 345 spin_lock_irq(&swapper_space.tree_lock); 346 found_page = radix_tree_lookup(&swapper_space.page_tree, 347 entry.val); 348 if (found_page) 349 page_cache_get(found_page); 350 spin_unlock_irq(&swapper_space.tree_lock); 351 if (found_page) 352 break; 353 354 /* 355 * Get a new page to read into from swap. 356 */ 357 if (!new_page) { 358 new_page = alloc_page_vma(GFP_HIGHUSER, vma, addr); 359 if (!new_page) 360 break; /* Out of memory */ 361 } 362 363 /* 364 * Associate the page with swap entry in the swap cache. 365 * May fail (-ENOENT) if swap entry has been freed since 366 * our caller observed it. May fail (-EEXIST) if there 367 * is already a page associated with this entry in the 368 * swap cache: added by a racing read_swap_cache_async, 369 * or by try_to_swap_out (or shmem_writepage) re-using 370 * the just freed swap entry for an existing page. 371 * May fail (-ENOMEM) if radix-tree node allocation failed. 372 */ 373 err = add_to_swap_cache(new_page, entry); 374 if (!err) { 375 /* 376 * Initiate read into locked page and return. 377 */ 378 lru_cache_add_active(new_page); 379 swap_readpage(NULL, new_page); 380 return new_page; 381 } 382 } while (err != -ENOENT && err != -ENOMEM); 383 384 if (new_page) 385 page_cache_release(new_page); 386 return found_page; 387 } 388
This page was automatically generated by LXR 0.3.1. • Linux is a registered trademark of Linus Torvalds