1 /* 2 * linux/mm/madvise.c 3 * 4 * Copyright (C) 1999 Linus Torvalds 5 * Copyright (C) 2002 Christoph Hellwig 6 */ 7 8 #include <linux/mman.h> 9 #include <linux/pagemap.h> 10 11 12 /* 13 * We can potentially split a vm area into separate 14 * areas, each area with its own behavior. 15 */ 16 static long madvise_behavior(struct vm_area_struct * vma, unsigned long start, 17 unsigned long end, int behavior) 18 { 19 struct mm_struct * mm = vma->vm_mm; 20 int error; 21 22 if (start != vma->vm_start) { 23 error = split_vma(mm, vma, start, 1); 24 if (error) 25 return -EAGAIN; 26 } 27 28 if (end != vma->vm_end) { 29 error = split_vma(mm, vma, end, 0); 30 if (error) 31 return -EAGAIN; 32 } 33 34 /* 35 * vm_flags is protected by the mmap_sem held in write mode. 36 */ 37 VM_ClearReadHint(vma); 38 39 switch (behavior) { 40 case MADV_SEQUENTIAL: 41 vma->vm_flags |= VM_SEQ_READ; 42 break; 43 case MADV_RANDOM: 44 vma->vm_flags |= VM_RAND_READ; 45 break; 46 default: 47 break; 48 } 49 50 return 0; 51 } 52 53 /* 54 * Schedule all required I/O operations. Do not wait for completion. 55 */ 56 static long madvise_willneed(struct vm_area_struct * vma, 57 unsigned long start, unsigned long end) 58 { 59 struct file *file = vma->vm_file; 60 61 if (!file) 62 return -EBADF; 63 64 start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 65 if (end > vma->vm_end) 66 end = vma->vm_end; 67 end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 68 69 force_page_cache_readahead(file->f_mapping, 70 file, start, max_sane_readahead(end - start)); 71 return 0; 72 } 73 74 /* 75 * Application no longer needs these pages. If the pages are dirty, 76 * it's OK to just throw them away. The app will be more careful about 77 * data it wants to keep. Be sure to free swap resources too. The 78 * zap_page_range call sets things up for refill_inactive to actually free 79 * these pages later if no one else has touched them in the meantime, 80 * although we could add these pages to a global reuse list for 81 * refill_inactive to pick up before reclaiming other pages. 82 * 83 * NB: This interface discards data rather than pushes it out to swap, 84 * as some implementations do. This has performance implications for 85 * applications like large transactional databases which want to discard 86 * pages in anonymous maps after committing to backing store the data 87 * that was kept in them. There is no reason to write this data out to 88 * the swap area if the application is discarding it. 89 * 90 * An interface that causes the system to free clean pages and flush 91 * dirty pages is already available as msync(MS_INVALIDATE). 92 */ 93 static long madvise_dontneed(struct vm_area_struct * vma, 94 unsigned long start, unsigned long end) 95 { 96 if (vma->vm_flags & VM_LOCKED) 97 return -EINVAL; 98 99 if (unlikely(vma->vm_flags & VM_NONLINEAR)) { 100 struct zap_details details = { 101 .nonlinear_vma = vma, 102 .last_index = ULONG_MAX, 103 }; 104 zap_page_range(vma, start, end - start, &details); 105 } else 106 zap_page_range(vma, start, end - start, NULL); 107 return 0; 108 } 109 110 static long madvise_vma(struct vm_area_struct * vma, unsigned long start, 111 unsigned long end, int behavior) 112 { 113 long error = -EBADF; 114 115 switch (behavior) { 116 case MADV_NORMAL: 117 case MADV_SEQUENTIAL: 118 case MADV_RANDOM: 119 error = madvise_behavior(vma, start, end, behavior); 120 break; 121 122 case MADV_WILLNEED: 123 error = madvise_willneed(vma, start, end); 124 break; 125 126 case MADV_DONTNEED: 127 error = madvise_dontneed(vma, start, end); 128 break; 129 130 default: 131 error = -EINVAL; 132 break; 133 } 134 135 return error; 136 } 137 138 /* 139 * The madvise(2) system call. 140 * 141 * Applications can use madvise() to advise the kernel how it should 142 * handle paging I/O in this VM area. The idea is to help the kernel 143 * use appropriate read-ahead and caching techniques. The information 144 * provided is advisory only, and can be safely disregarded by the 145 * kernel without affecting the correct operation of the application. 146 * 147 * behavior values: 148 * MADV_NORMAL - the default behavior is to read clusters. This 149 * results in some read-ahead and read-behind. 150 * MADV_RANDOM - the system should read the minimum amount of data 151 * on any access, since it is unlikely that the appli- 152 * cation will need more than what it asks for. 153 * MADV_SEQUENTIAL - pages in the given range will probably be accessed 154 * once, so they can be aggressively read ahead, and 155 * can be freed soon after they are accessed. 156 * MADV_WILLNEED - the application is notifying the system to read 157 * some pages ahead. 158 * MADV_DONTNEED - the application is finished with the given range, 159 * so the kernel can free resources associated with it. 160 * 161 * return values: 162 * zero - success 163 * -EINVAL - start + len < 0, start is not page-aligned, 164 * "behavior" is not a valid value, or application 165 * is attempting to release locked or shared pages. 166 * -ENOMEM - addresses in the specified range are not currently 167 * mapped, or are outside the AS of the process. 168 * -EIO - an I/O error occurred while paging in data. 169 * -EBADF - map exists, but area maps something that isn't a file. 170 * -EAGAIN - a kernel resource was temporarily unavailable. 171 */ 172 asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior) 173 { 174 unsigned long end; 175 struct vm_area_struct * vma; 176 int unmapped_error = 0; 177 int error = -EINVAL; 178 size_t len; 179 180 down_write(¤t->mm->mmap_sem); 181 182 if (start & ~PAGE_MASK) 183 goto out; 184 len = (len_in + ~PAGE_MASK) & PAGE_MASK; 185 186 /* Check to see whether len was rounded up from small -ve to zero */ 187 if (len_in && !len) 188 goto out; 189 190 end = start + len; 191 if (end < start) 192 goto out; 193 194 error = 0; 195 if (end == start) 196 goto out; 197 198 /* 199 * If the interval [start,end) covers some unmapped address 200 * ranges, just ignore them, but return -ENOMEM at the end. 201 */ 202 vma = find_vma(current->mm, start); 203 for (;;) { 204 /* Still start < end. */ 205 error = -ENOMEM; 206 if (!vma) 207 goto out; 208 209 /* Here start < vma->vm_end. */ 210 if (start < vma->vm_start) { 211 unmapped_error = -ENOMEM; 212 start = vma->vm_start; 213 } 214 215 /* Here vma->vm_start <= start < vma->vm_end. */ 216 if (end <= vma->vm_end) { 217 if (start < end) { 218 error = madvise_vma(vma, start, end, 219 behavior); 220 if (error) 221 goto out; 222 } 223 error = unmapped_error; 224 goto out; 225 } 226 227 /* Here vma->vm_start <= start < vma->vm_end < end. */ 228 error = madvise_vma(vma, start, vma->vm_end, behavior); 229 if (error) 230 goto out; 231 start = vma->vm_end; 232 vma = vma->vm_next; 233 } 234 235 out: 236 up_write(¤t->mm->mmap_sem); 237 return error; 238 } 239
This page was automatically generated by LXR 0.3.1. • Linux is a registered trademark of Linus Torvalds