1 /* 2 * High memory handling common code and variables. 3 * 4 * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de 5 * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de 6 * 7 * 8 * Redesigned the x86 32-bit VM architecture to deal with 9 * 64-bit physical space. With current x86 CPUs this 10 * means up to 64 Gigabytes physical RAM. 11 * 12 * Rewrote high memory support to move the page cache into 13 * high memory. Implemented permanent (schedulable) kmaps 14 * based on Linus' idea. 15 * 16 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> 17 */ 18 19 #include <linux/mm.h> 20 #include <linux/module.h> 21 #include <linux/swap.h> 22 #include <linux/bio.h> 23 #include <linux/pagemap.h> 24 #include <linux/mempool.h> 25 #include <linux/blkdev.h> 26 #include <linux/init.h> 27 #include <linux/hash.h> 28 #include <linux/highmem.h> 29 #include <asm/tlbflush.h> 30 31 static mempool_t *page_pool, *isa_page_pool; 32 33 static void *page_pool_alloc(int gfp_mask, void *data) 34 { 35 int gfp = gfp_mask | (int) (long) data; 36 37 return alloc_page(gfp); 38 } 39 40 static void page_pool_free(void *page, void *data) 41 { 42 __free_page(page); 43 } 44 45 /* 46 * Virtual_count is not a pure "count". 47 * 0 means that it is not mapped, and has not been mapped 48 * since a TLB flush - it is usable. 49 * 1 means that there are no users, but it has been mapped 50 * since the last TLB flush - so we can't use it. 51 * n means that there are (n-1) current users of it. 52 */ 53 #ifdef CONFIG_HIGHMEM 54 static int pkmap_count[LAST_PKMAP]; 55 static unsigned int last_pkmap_nr; 56 static spinlock_t kmap_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; 57 58 pte_t * pkmap_page_table; 59 60 static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait); 61 62 static void flush_all_zero_pkmaps(void) 63 { 64 int i; 65 66 flush_cache_kmaps(); 67 68 for (i = 0; i < LAST_PKMAP; i++) { 69 struct page *page; 70 71 /* 72 * zero means we don't have anything to do, 73 * >1 means that it is still in use. Only 74 * a count of 1 means that it is free but 75 * needs to be unmapped 76 */ 77 if (pkmap_count[i] != 1) 78 continue; 79 pkmap_count[i] = 0; 80 81 /* sanity check */ 82 if (pte_none(pkmap_page_table[i])) 83 BUG(); 84 85 /* 86 * Don't need an atomic fetch-and-clear op here; 87 * no-one has the page mapped, and cannot get at 88 * its virtual address (and hence PTE) without first 89 * getting the kmap_lock (which is held here). 90 * So no dangers, even with speculative execution. 91 */ 92 page = pte_page(pkmap_page_table[i]); 93 pte_clear(&pkmap_page_table[i]); 94 95 set_page_address(page, NULL); 96 } 97 flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP)); 98 } 99 100 static inline unsigned long map_new_virtual(struct page *page) 101 { 102 unsigned long vaddr; 103 int count; 104 105 start: 106 count = LAST_PKMAP; 107 /* Find an empty entry */ 108 for (;;) { 109 last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK; 110 if (!last_pkmap_nr) { 111 flush_all_zero_pkmaps(); 112 count = LAST_PKMAP; 113 } 114 if (!pkmap_count[last_pkmap_nr]) 115 break; /* Found a usable entry */ 116 if (--count) 117 continue; 118 119 /* 120 * Sleep for somebody else to unmap their entries 121 */ 122 { 123 DECLARE_WAITQUEUE(wait, current); 124 125 __set_current_state(TASK_UNINTERRUPTIBLE); 126 add_wait_queue(&pkmap_map_wait, &wait); 127 spin_unlock(&kmap_lock); 128 schedule(); 129 remove_wait_queue(&pkmap_map_wait, &wait); 130 spin_lock(&kmap_lock); 131 132 /* Somebody else might have mapped it while we slept */ 133 if (page_address(page)) 134 return (unsigned long)page_address(page); 135 136 /* Re-start */ 137 goto start; 138 } 139 } 140 vaddr = PKMAP_ADDR(last_pkmap_nr); 141 set_pte(&(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot)); 142 143 pkmap_count[last_pkmap_nr] = 1; 144 set_page_address(page, (void *)vaddr); 145 146 return vaddr; 147 } 148 149 void fastcall *kmap_high(struct page *page) 150 { 151 unsigned long vaddr; 152 153 /* 154 * For highmem pages, we can't trust "virtual" until 155 * after we have the lock. 156 * 157 * We cannot call this from interrupts, as it may block 158 */ 159 spin_lock(&kmap_lock); 160 vaddr = (unsigned long)page_address(page); 161 if (!vaddr) 162 vaddr = map_new_virtual(page); 163 pkmap_count[PKMAP_NR(vaddr)]++; 164 if (pkmap_count[PKMAP_NR(vaddr)] < 2) 165 BUG(); 166 spin_unlock(&kmap_lock); 167 return (void*) vaddr; 168 } 169 170 EXPORT_SYMBOL(kmap_high); 171 172 void fastcall kunmap_high(struct page *page) 173 { 174 unsigned long vaddr; 175 unsigned long nr; 176 int need_wakeup; 177 178 spin_lock(&kmap_lock); 179 vaddr = (unsigned long)page_address(page); 180 if (!vaddr) 181 BUG(); 182 nr = PKMAP_NR(vaddr); 183 184 /* 185 * A count must never go down to zero 186 * without a TLB flush! 187 */ 188 need_wakeup = 0; 189 switch (--pkmap_count[nr]) { 190 case 0: 191 BUG(); 192 case 1: 193 /* 194 * Avoid an unnecessary wake_up() function call. 195 * The common case is pkmap_count[] == 1, but 196 * no waiters. 197 * The tasks queued in the wait-queue are guarded 198 * by both the lock in the wait-queue-head and by 199 * the kmap_lock. As the kmap_lock is held here, 200 * no need for the wait-queue-head's lock. Simply 201 * test if the queue is empty. 202 */ 203 need_wakeup = waitqueue_active(&pkmap_map_wait); 204 } 205 spin_unlock(&kmap_lock); 206 207 /* do wake-up, if needed, race-free outside of the spin lock */ 208 if (need_wakeup) 209 wake_up(&pkmap_map_wait); 210 } 211 212 EXPORT_SYMBOL(kunmap_high); 213 214 #define POOL_SIZE 64 215 216 static __init int init_emergency_pool(void) 217 { 218 struct sysinfo i; 219 si_meminfo(&i); 220 si_swapinfo(&i); 221 222 if (!i.totalhigh) 223 return 0; 224 225 page_pool = mempool_create(POOL_SIZE, page_pool_alloc, page_pool_free, NULL); 226 if (!page_pool) 227 BUG(); 228 printk("highmem bounce pool size: %d pages\n", POOL_SIZE); 229 230 return 0; 231 } 232 233 __initcall(init_emergency_pool); 234 235 /* 236 * highmem version, map in to vec 237 */ 238 static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom) 239 { 240 unsigned long flags; 241 unsigned char *vto; 242 243 local_irq_save(flags); 244 vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ); 245 memcpy(vto + to->bv_offset, vfrom, to->bv_len); 246 kunmap_atomic(vto, KM_BOUNCE_READ); 247 local_irq_restore(flags); 248 } 249 250 #else /* CONFIG_HIGHMEM */ 251 252 #define bounce_copy_vec(to, vfrom) \ 253 memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len) 254 255 #endif 256 257 #define ISA_POOL_SIZE 16 258 259 /* 260 * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA 261 * as the max address, so check if the pool has already been created. 262 */ 263 int init_emergency_isa_pool(void) 264 { 265 if (isa_page_pool) 266 return 0; 267 268 isa_page_pool = mempool_create(ISA_POOL_SIZE, page_pool_alloc, page_pool_free, (void *) __GFP_DMA); 269 if (!isa_page_pool) 270 BUG(); 271 272 printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE); 273 return 0; 274 } 275 276 /* 277 * Simple bounce buffer support for highmem pages. Depending on the 278 * queue gfp mask set, *to may or may not be a highmem page. kmap it 279 * always, it will do the Right Thing 280 */ 281 static void copy_to_high_bio_irq(struct bio *to, struct bio *from) 282 { 283 unsigned char *vfrom; 284 struct bio_vec *tovec, *fromvec; 285 int i; 286 287 __bio_for_each_segment(tovec, to, i, 0) { 288 fromvec = from->bi_io_vec + i; 289 290 /* 291 * not bounced 292 */ 293 if (tovec->bv_page == fromvec->bv_page) 294 continue; 295 296 /* 297 * fromvec->bv_offset and fromvec->bv_len might have been 298 * modified by the block layer, so use the original copy, 299 * bounce_copy_vec already uses tovec->bv_len 300 */ 301 vfrom = page_address(fromvec->bv_page) + tovec->bv_offset; 302 303 bounce_copy_vec(tovec, vfrom); 304 } 305 } 306 307 static void bounce_end_io(struct bio *bio, mempool_t *pool) 308 { 309 struct bio *bio_orig = bio->bi_private; 310 struct bio_vec *bvec, *org_vec; 311 int i, err = 0; 312 313 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 314 err = -EIO; 315 316 /* 317 * free up bounce indirect pages used 318 */ 319 __bio_for_each_segment(bvec, bio, i, 0) { 320 org_vec = bio_orig->bi_io_vec + i; 321 if (bvec->bv_page == org_vec->bv_page) 322 continue; 323 324 mempool_free(bvec->bv_page, pool); 325 } 326 327 bio_endio(bio_orig, bio_orig->bi_size, err); 328 bio_put(bio); 329 } 330 331 static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done,int err) 332 { 333 if (bio->bi_size) 334 return 1; 335 336 bounce_end_io(bio, page_pool); 337 return 0; 338 } 339 340 static int bounce_end_io_write_isa(struct bio *bio, unsigned int bytes_done, int err) 341 { 342 if (bio->bi_size) 343 return 1; 344 345 bounce_end_io(bio, isa_page_pool); 346 return 0; 347 } 348 349 static void __bounce_end_io_read(struct bio *bio, mempool_t *pool) 350 { 351 struct bio *bio_orig = bio->bi_private; 352 353 if (test_bit(BIO_UPTODATE, &bio->bi_flags)) 354 copy_to_high_bio_irq(bio_orig, bio); 355 356 bounce_end_io(bio, pool); 357 } 358 359 static int bounce_end_io_read(struct bio *bio, unsigned int bytes_done, int err) 360 { 361 if (bio->bi_size) 362 return 1; 363 364 __bounce_end_io_read(bio, page_pool); 365 return 0; 366 } 367 368 static int bounce_end_io_read_isa(struct bio *bio, unsigned int bytes_done, int err) 369 { 370 if (bio->bi_size) 371 return 1; 372 373 __bounce_end_io_read(bio, isa_page_pool); 374 return 0; 375 } 376 377 static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig, 378 mempool_t *pool) 379 { 380 struct page *page; 381 struct bio *bio = NULL; 382 int i, rw = bio_data_dir(*bio_orig); 383 struct bio_vec *to, *from; 384 385 bio_for_each_segment(from, *bio_orig, i) { 386 page = from->bv_page; 387 388 /* 389 * is destination page below bounce pfn? 390 */ 391 if (page_to_pfn(page) < q->bounce_pfn) 392 continue; 393 394 /* 395 * irk, bounce it 396 */ 397 if (!bio) 398 bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt); 399 400 to = bio->bi_io_vec + i; 401 402 to->bv_page = mempool_alloc(pool, q->bounce_gfp); 403 to->bv_len = from->bv_len; 404 to->bv_offset = from->bv_offset; 405 406 if (rw == WRITE) { 407 char *vto, *vfrom; 408 409 vto = page_address(to->bv_page) + to->bv_offset; 410 vfrom = kmap(from->bv_page) + from->bv_offset; 411 memcpy(vto, vfrom, to->bv_len); 412 kunmap(from->bv_page); 413 } 414 } 415 416 /* 417 * no pages bounced 418 */ 419 if (!bio) 420 return; 421 422 /* 423 * at least one page was bounced, fill in possible non-highmem 424 * pages 425 */ 426 bio_for_each_segment(from, *bio_orig, i) { 427 to = bio_iovec_idx(bio, i); 428 if (!to->bv_page) { 429 to->bv_page = from->bv_page; 430 to->bv_len = from->bv_len; 431 to->bv_offset = from->bv_offset; 432 } 433 } 434 435 bio->bi_bdev = (*bio_orig)->bi_bdev; 436 bio->bi_flags |= (1 << BIO_BOUNCED); 437 bio->bi_sector = (*bio_orig)->bi_sector; 438 bio->bi_rw = (*bio_orig)->bi_rw; 439 440 bio->bi_vcnt = (*bio_orig)->bi_vcnt; 441 bio->bi_idx = (*bio_orig)->bi_idx; 442 bio->bi_size = (*bio_orig)->bi_size; 443 444 if (pool == page_pool) { 445 bio->bi_end_io = bounce_end_io_write; 446 if (rw == READ) 447 bio->bi_end_io = bounce_end_io_read; 448 } else { 449 bio->bi_end_io = bounce_end_io_write_isa; 450 if (rw == READ) 451 bio->bi_end_io = bounce_end_io_read_isa; 452 } 453 454 bio->bi_private = *bio_orig; 455 *bio_orig = bio; 456 } 457 458 void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig) 459 { 460 mempool_t *pool; 461 462 /* 463 * for non-isa bounce case, just check if the bounce pfn is equal 464 * to or bigger than the highest pfn in the system -- in that case, 465 * don't waste time iterating over bio segments 466 */ 467 if (!(q->bounce_gfp & GFP_DMA)) { 468 if (q->bounce_pfn >= blk_max_pfn) 469 return; 470 pool = page_pool; 471 } else { 472 BUG_ON(!isa_page_pool); 473 pool = isa_page_pool; 474 } 475 476 /* 477 * slow path 478 */ 479 __blk_queue_bounce(q, bio_orig, pool); 480 } 481 482 EXPORT_SYMBOL(blk_queue_bounce); 483 484 #if defined(HASHED_PAGE_VIRTUAL) 485 486 #define PA_HASH_ORDER 7 487 488 /* 489 * Describes one page->virtual association 490 */ 491 struct page_address_map { 492 struct page *page; 493 void *virtual; 494 struct list_head list; 495 }; 496 497 /* 498 * page_address_map freelist, allocated from page_address_maps. 499 */ 500 static struct list_head page_address_pool; /* freelist */ 501 static spinlock_t pool_lock; /* protects page_address_pool */ 502 503 /* 504 * Hash table bucket 505 */ 506 static struct page_address_slot { 507 struct list_head lh; /* List of page_address_maps */ 508 spinlock_t lock; /* Protect this bucket's list */ 509 } ____cacheline_aligned_in_smp page_address_htable[1<<PA_HASH_ORDER]; 510 511 static struct page_address_slot *page_slot(struct page *page) 512 { 513 return &page_address_htable[hash_ptr(page, PA_HASH_ORDER)]; 514 } 515 516 void *page_address(struct page *page) 517 { 518 unsigned long flags; 519 void *ret; 520 struct page_address_slot *pas; 521 522 if (!PageHighMem(page)) 523 return lowmem_page_address(page); 524 525 pas = page_slot(page); 526 ret = NULL; 527 spin_lock_irqsave(&pas->lock, flags); 528 if (!list_empty(&pas->lh)) { 529 struct page_address_map *pam; 530 531 list_for_each_entry(pam, &pas->lh, list) { 532 if (pam->page == page) { 533 ret = pam->virtual; 534 goto done; 535 } 536 } 537 } 538 done: 539 spin_unlock_irqrestore(&pas->lock, flags); 540 return ret; 541 } 542 543 EXPORT_SYMBOL(page_address); 544 545 void set_page_address(struct page *page, void *virtual) 546 { 547 unsigned long flags; 548 struct page_address_slot *pas; 549 struct page_address_map *pam; 550 551 BUG_ON(!PageHighMem(page)); 552 553 pas = page_slot(page); 554 if (virtual) { /* Add */ 555 BUG_ON(list_empty(&page_address_pool)); 556 557 spin_lock_irqsave(&pool_lock, flags); 558 pam = list_entry(page_address_pool.next, 559 struct page_address_map, list); 560 list_del(&pam->list); 561 spin_unlock_irqrestore(&pool_lock, flags); 562 563 pam->page = page; 564 pam->virtual = virtual; 565 566 spin_lock_irqsave(&pas->lock, flags); 567 list_add_tail(&pam->list, &pas->lh); 568 spin_unlock_irqrestore(&pas->lock, flags); 569 } else { /* Remove */ 570 spin_lock_irqsave(&pas->lock, flags); 571 list_for_each_entry(pam, &pas->lh, list) { 572 if (pam->page == page) { 573 list_del(&pam->list); 574 spin_unlock_irqrestore(&pas->lock, flags); 575 spin_lock_irqsave(&pool_lock, flags); 576 list_add_tail(&pam->list, &page_address_pool); 577 spin_unlock_irqrestore(&pool_lock, flags); 578 goto done; 579 } 580 } 581 spin_unlock_irqrestore(&pas->lock, flags); 582 } 583 done: 584 return; 585 } 586 587 static struct page_address_map page_address_maps[LAST_PKMAP]; 588 589 void __init page_address_init(void) 590 { 591 int i; 592 593 INIT_LIST_HEAD(&page_address_pool); 594 for (i = 0; i < ARRAY_SIZE(page_address_maps); i++) 595 list_add(&page_address_maps[i].list, &page_address_pool); 596 for (i = 0; i < ARRAY_SIZE(page_address_htable); i++) { 597 INIT_LIST_HEAD(&page_address_htable[i].lh); 598 spin_lock_init(&page_address_htable[i].lock); 599 } 600 spin_lock_init(&pool_lock); 601 } 602 603 #endif /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */ 604
This page was automatically generated by LXR 0.3.1. • Linux is a registered trademark of Linus Torvalds