GNU Linux-libre 4.14.295-gnu1
[releases.git] / kernel / sysctl.c
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20
21 #include <linux/module.h>
22 #include <linux/aio.h>
23 #include <linux/mm.h>
24 #include <linux/swap.h>
25 #include <linux/slab.h>
26 #include <linux/sysctl.h>
27 #include <linux/bitmap.h>
28 #include <linux/signal.h>
29 #include <linux/printk.h>
30 #include <linux/proc_fs.h>
31 #include <linux/security.h>
32 #include <linux/ctype.h>
33 #include <linux/kmemleak.h>
34 #include <linux/fs.h>
35 #include <linux/init.h>
36 #include <linux/kernel.h>
37 #include <linux/kobject.h>
38 #include <linux/net.h>
39 #include <linux/sysrq.h>
40 #include <linux/highuid.h>
41 #include <linux/writeback.h>
42 #include <linux/ratelimit.h>
43 #include <linux/compaction.h>
44 #include <linux/hugetlb.h>
45 #include <linux/initrd.h>
46 #include <linux/key.h>
47 #include <linux/times.h>
48 #include <linux/limits.h>
49 #include <linux/dcache.h>
50 #include <linux/dnotify.h>
51 #include <linux/syscalls.h>
52 #include <linux/vmstat.h>
53 #include <linux/nfs_fs.h>
54 #include <linux/acpi.h>
55 #include <linux/reboot.h>
56 #include <linux/ftrace.h>
57 #include <linux/perf_event.h>
58 #include <linux/kprobes.h>
59 #include <linux/pipe_fs_i.h>
60 #include <linux/oom.h>
61 #include <linux/kmod.h>
62 #include <linux/capability.h>
63 #include <linux/binfmts.h>
64 #include <linux/sched/sysctl.h>
65 #include <linux/sched/coredump.h>
66 #include <linux/kexec.h>
67 #include <linux/bpf.h>
68 #include <linux/mount.h>
69
70 #include <linux/uaccess.h>
71 #include <asm/processor.h>
72
73 #ifdef CONFIG_X86
74 #include <asm/nmi.h>
75 #include <asm/stacktrace.h>
76 #include <asm/io.h>
77 #endif
78 #ifdef CONFIG_SPARC
79 #include <asm/setup.h>
80 #endif
81 #ifdef CONFIG_BSD_PROCESS_ACCT
82 #include <linux/acct.h>
83 #endif
84 #ifdef CONFIG_RT_MUTEXES
85 #include <linux/rtmutex.h>
86 #endif
87 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
88 #include <linux/lockdep.h>
89 #endif
90 #ifdef CONFIG_CHR_DEV_SG
91 #include <scsi/sg.h>
92 #endif
93
94 #ifdef CONFIG_LOCKUP_DETECTOR
95 #include <linux/nmi.h>
96 #endif
97
98 #if defined(CONFIG_SYSCTL)
99
100 /* External variables not in a header file. */
101 extern int suid_dumpable;
102 #ifdef CONFIG_COREDUMP
103 extern int core_uses_pid;
104 extern char core_pattern[];
105 extern unsigned int core_pipe_limit;
106 #endif
107 extern int pid_max;
108 extern int pid_max_min, pid_max_max;
109 extern int percpu_pagelist_fraction;
110 extern int latencytop_enabled;
111 extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
112 #ifndef CONFIG_MMU
113 extern int sysctl_nr_trim_pages;
114 #endif
115
116 /* Constants used for minimum and  maximum */
117 #ifdef CONFIG_LOCKUP_DETECTOR
118 static int sixty = 60;
119 #endif
120
121 static int __maybe_unused neg_one = -1;
122
123 static int zero;
124 static int __maybe_unused one = 1;
125 static int __maybe_unused two = 2;
126 static int __maybe_unused four = 4;
127 static unsigned long zero_ul;
128 static unsigned long one_ul = 1;
129 static unsigned long long_max = LONG_MAX;
130 static int one_hundred = 100;
131 static int one_thousand = 1000;
132 #ifdef CONFIG_PRINTK
133 static int ten_thousand = 10000;
134 #endif
135 #ifdef CONFIG_PERF_EVENTS
136 static int six_hundred_forty_kb = 640 * 1024;
137 #endif
138
139 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
140 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
141
142 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
143 static int maxolduid = 65535;
144 static int minolduid;
145
146 static int ngroups_max = NGROUPS_MAX;
147 static const int cap_last_cap = CAP_LAST_CAP;
148
149 /*this is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs */
150 #ifdef CONFIG_DETECT_HUNG_TASK
151 static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
152 #endif
153
154 #ifdef CONFIG_INOTIFY_USER
155 #include <linux/inotify.h>
156 #endif
157 #ifdef CONFIG_SPARC
158 #endif
159
160 #ifdef __hppa__
161 extern int pwrsw_enabled;
162 #endif
163
164 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
165 extern int unaligned_enabled;
166 #endif
167
168 #ifdef CONFIG_IA64
169 extern int unaligned_dump_stack;
170 #endif
171
172 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
173 extern int no_unaligned_warning;
174 #endif
175
176 #ifdef CONFIG_PROC_SYSCTL
177
178 /**
179  * enum sysctl_writes_mode - supported sysctl write modes
180  *
181  * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
182  *      to be written, and multiple writes on the same sysctl file descriptor
183  *      will rewrite the sysctl value, regardless of file position. No warning
184  *      is issued when the initial position is not 0.
185  * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
186  *      not 0.
187  * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
188  *      file position 0 and the value must be fully contained in the buffer
189  *      sent to the write syscall. If dealing with strings respect the file
190  *      position, but restrict this to the max length of the buffer, anything
191  *      passed the max lenght will be ignored. Multiple writes will append
192  *      to the buffer.
193  *
194  * These write modes control how current file position affects the behavior of
195  * updating sysctl values through the proc interface on each write.
196  */
197 enum sysctl_writes_mode {
198         SYSCTL_WRITES_LEGACY            = -1,
199         SYSCTL_WRITES_WARN              = 0,
200         SYSCTL_WRITES_STRICT            = 1,
201 };
202
203 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
204
205 static int proc_do_cad_pid(struct ctl_table *table, int write,
206                   void __user *buffer, size_t *lenp, loff_t *ppos);
207 static int proc_taint(struct ctl_table *table, int write,
208                                void __user *buffer, size_t *lenp, loff_t *ppos);
209 #endif
210
211 #ifdef CONFIG_PRINTK
212 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
213                                 void __user *buffer, size_t *lenp, loff_t *ppos);
214 #endif
215
216 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
217                 void __user *buffer, size_t *lenp, loff_t *ppos);
218 #ifdef CONFIG_COREDUMP
219 static int proc_dostring_coredump(struct ctl_table *table, int write,
220                 void __user *buffer, size_t *lenp, loff_t *ppos);
221 #endif
222
223 #ifdef CONFIG_MAGIC_SYSRQ
224 /* Note: sysrq code uses it's own private copy */
225 static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
226
227 static int sysrq_sysctl_handler(struct ctl_table *table, int write,
228                                 void __user *buffer, size_t *lenp,
229                                 loff_t *ppos)
230 {
231         int error;
232
233         error = proc_dointvec(table, write, buffer, lenp, ppos);
234         if (error)
235                 return error;
236
237         if (write)
238                 sysrq_toggle_support(__sysrq_enabled);
239
240         return 0;
241 }
242
243 #endif
244
245 #ifdef CONFIG_BPF_SYSCALL
246
247 void __weak unpriv_ebpf_notify(int new_state)
248 {
249 }
250
251 static int bpf_unpriv_handler(struct ctl_table *table, int write,
252                              void *buffer, size_t *lenp, loff_t *ppos)
253 {
254         int ret, unpriv_enable = *(int *)table->data;
255         bool locked_state = unpriv_enable == 1;
256         struct ctl_table tmp = *table;
257
258         if (write && !capable(CAP_SYS_ADMIN))
259                 return -EPERM;
260
261         tmp.data = &unpriv_enable;
262         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
263         if (write && !ret) {
264                 if (locked_state && unpriv_enable != 1)
265                         return -EPERM;
266                 *(int *)table->data = unpriv_enable;
267         }
268
269         unpriv_ebpf_notify(unpriv_enable);
270
271         return ret;
272 }
273 #endif
274
275 static struct ctl_table kern_table[];
276 static struct ctl_table vm_table[];
277 static struct ctl_table fs_table[];
278 static struct ctl_table debug_table[];
279 static struct ctl_table dev_table[];
280 extern struct ctl_table random_table[];
281 #ifdef CONFIG_EPOLL
282 extern struct ctl_table epoll_table[];
283 #endif
284
285 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
286 int sysctl_legacy_va_layout;
287 #endif
288
289 /* The default sysctl tables: */
290
291 static struct ctl_table sysctl_base_table[] = {
292         {
293                 .procname       = "kernel",
294                 .mode           = 0555,
295                 .child          = kern_table,
296         },
297         {
298                 .procname       = "vm",
299                 .mode           = 0555,
300                 .child          = vm_table,
301         },
302         {
303                 .procname       = "fs",
304                 .mode           = 0555,
305                 .child          = fs_table,
306         },
307         {
308                 .procname       = "debug",
309                 .mode           = 0555,
310                 .child          = debug_table,
311         },
312         {
313                 .procname       = "dev",
314                 .mode           = 0555,
315                 .child          = dev_table,
316         },
317         { }
318 };
319
320 #ifdef CONFIG_SCHED_DEBUG
321 static int min_sched_granularity_ns = 100000;           /* 100 usecs */
322 static int max_sched_granularity_ns = NSEC_PER_SEC;     /* 1 second */
323 static int min_wakeup_granularity_ns;                   /* 0 usecs */
324 static int max_wakeup_granularity_ns = NSEC_PER_SEC;    /* 1 second */
325 #ifdef CONFIG_SMP
326 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
327 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
328 #endif /* CONFIG_SMP */
329 #endif /* CONFIG_SCHED_DEBUG */
330
331 #ifdef CONFIG_COMPACTION
332 static int min_extfrag_threshold;
333 static int max_extfrag_threshold = 1000;
334 #endif
335
336 static struct ctl_table kern_table[] = {
337         {
338                 .procname       = "sched_child_runs_first",
339                 .data           = &sysctl_sched_child_runs_first,
340                 .maxlen         = sizeof(unsigned int),
341                 .mode           = 0644,
342                 .proc_handler   = proc_dointvec,
343         },
344 #ifdef CONFIG_SCHED_DEBUG
345         {
346                 .procname       = "sched_min_granularity_ns",
347                 .data           = &sysctl_sched_min_granularity,
348                 .maxlen         = sizeof(unsigned int),
349                 .mode           = 0644,
350                 .proc_handler   = sched_proc_update_handler,
351                 .extra1         = &min_sched_granularity_ns,
352                 .extra2         = &max_sched_granularity_ns,
353         },
354         {
355                 .procname       = "sched_latency_ns",
356                 .data           = &sysctl_sched_latency,
357                 .maxlen         = sizeof(unsigned int),
358                 .mode           = 0644,
359                 .proc_handler   = sched_proc_update_handler,
360                 .extra1         = &min_sched_granularity_ns,
361                 .extra2         = &max_sched_granularity_ns,
362         },
363         {
364                 .procname       = "sched_wakeup_granularity_ns",
365                 .data           = &sysctl_sched_wakeup_granularity,
366                 .maxlen         = sizeof(unsigned int),
367                 .mode           = 0644,
368                 .proc_handler   = sched_proc_update_handler,
369                 .extra1         = &min_wakeup_granularity_ns,
370                 .extra2         = &max_wakeup_granularity_ns,
371         },
372 #ifdef CONFIG_SMP
373         {
374                 .procname       = "sched_tunable_scaling",
375                 .data           = &sysctl_sched_tunable_scaling,
376                 .maxlen         = sizeof(enum sched_tunable_scaling),
377                 .mode           = 0644,
378                 .proc_handler   = sched_proc_update_handler,
379                 .extra1         = &min_sched_tunable_scaling,
380                 .extra2         = &max_sched_tunable_scaling,
381         },
382         {
383                 .procname       = "sched_migration_cost_ns",
384                 .data           = &sysctl_sched_migration_cost,
385                 .maxlen         = sizeof(unsigned int),
386                 .mode           = 0644,
387                 .proc_handler   = proc_dointvec,
388         },
389         {
390                 .procname       = "sched_nr_migrate",
391                 .data           = &sysctl_sched_nr_migrate,
392                 .maxlen         = sizeof(unsigned int),
393                 .mode           = 0644,
394                 .proc_handler   = proc_dointvec,
395         },
396         {
397                 .procname       = "sched_time_avg_ms",
398                 .data           = &sysctl_sched_time_avg,
399                 .maxlen         = sizeof(unsigned int),
400                 .mode           = 0644,
401                 .proc_handler   = proc_dointvec_minmax,
402                 .extra1         = &one,
403         },
404 #ifdef CONFIG_SCHEDSTATS
405         {
406                 .procname       = "sched_schedstats",
407                 .data           = NULL,
408                 .maxlen         = sizeof(unsigned int),
409                 .mode           = 0644,
410                 .proc_handler   = sysctl_schedstats,
411                 .extra1         = &zero,
412                 .extra2         = &one,
413         },
414 #endif /* CONFIG_SCHEDSTATS */
415 #endif /* CONFIG_SMP */
416 #ifdef CONFIG_NUMA_BALANCING
417         {
418                 .procname       = "numa_balancing_scan_delay_ms",
419                 .data           = &sysctl_numa_balancing_scan_delay,
420                 .maxlen         = sizeof(unsigned int),
421                 .mode           = 0644,
422                 .proc_handler   = proc_dointvec,
423         },
424         {
425                 .procname       = "numa_balancing_scan_period_min_ms",
426                 .data           = &sysctl_numa_balancing_scan_period_min,
427                 .maxlen         = sizeof(unsigned int),
428                 .mode           = 0644,
429                 .proc_handler   = proc_dointvec,
430         },
431         {
432                 .procname       = "numa_balancing_scan_period_max_ms",
433                 .data           = &sysctl_numa_balancing_scan_period_max,
434                 .maxlen         = sizeof(unsigned int),
435                 .mode           = 0644,
436                 .proc_handler   = proc_dointvec,
437         },
438         {
439                 .procname       = "numa_balancing_scan_size_mb",
440                 .data           = &sysctl_numa_balancing_scan_size,
441                 .maxlen         = sizeof(unsigned int),
442                 .mode           = 0644,
443                 .proc_handler   = proc_dointvec_minmax,
444                 .extra1         = &one,
445         },
446         {
447                 .procname       = "numa_balancing",
448                 .data           = NULL, /* filled in by handler */
449                 .maxlen         = sizeof(unsigned int),
450                 .mode           = 0644,
451                 .proc_handler   = sysctl_numa_balancing,
452                 .extra1         = &zero,
453                 .extra2         = &one,
454         },
455 #endif /* CONFIG_NUMA_BALANCING */
456 #endif /* CONFIG_SCHED_DEBUG */
457         {
458                 .procname       = "sched_rt_period_us",
459                 .data           = &sysctl_sched_rt_period,
460                 .maxlen         = sizeof(unsigned int),
461                 .mode           = 0644,
462                 .proc_handler   = sched_rt_handler,
463         },
464         {
465                 .procname       = "sched_rt_runtime_us",
466                 .data           = &sysctl_sched_rt_runtime,
467                 .maxlen         = sizeof(int),
468                 .mode           = 0644,
469                 .proc_handler   = sched_rt_handler,
470         },
471         {
472                 .procname       = "sched_rr_timeslice_ms",
473                 .data           = &sysctl_sched_rr_timeslice,
474                 .maxlen         = sizeof(int),
475                 .mode           = 0644,
476                 .proc_handler   = sched_rr_handler,
477         },
478 #ifdef CONFIG_SCHED_AUTOGROUP
479         {
480                 .procname       = "sched_autogroup_enabled",
481                 .data           = &sysctl_sched_autogroup_enabled,
482                 .maxlen         = sizeof(unsigned int),
483                 .mode           = 0644,
484                 .proc_handler   = proc_dointvec_minmax,
485                 .extra1         = &zero,
486                 .extra2         = &one,
487         },
488 #endif
489 #ifdef CONFIG_CFS_BANDWIDTH
490         {
491                 .procname       = "sched_cfs_bandwidth_slice_us",
492                 .data           = &sysctl_sched_cfs_bandwidth_slice,
493                 .maxlen         = sizeof(unsigned int),
494                 .mode           = 0644,
495                 .proc_handler   = proc_dointvec_minmax,
496                 .extra1         = &one,
497         },
498 #endif
499 #ifdef CONFIG_PROVE_LOCKING
500         {
501                 .procname       = "prove_locking",
502                 .data           = &prove_locking,
503                 .maxlen         = sizeof(int),
504                 .mode           = 0644,
505                 .proc_handler   = proc_dointvec,
506         },
507 #endif
508 #ifdef CONFIG_LOCK_STAT
509         {
510                 .procname       = "lock_stat",
511                 .data           = &lock_stat,
512                 .maxlen         = sizeof(int),
513                 .mode           = 0644,
514                 .proc_handler   = proc_dointvec,
515         },
516 #endif
517         {
518                 .procname       = "panic",
519                 .data           = &panic_timeout,
520                 .maxlen         = sizeof(int),
521                 .mode           = 0644,
522                 .proc_handler   = proc_dointvec,
523         },
524 #ifdef CONFIG_COREDUMP
525         {
526                 .procname       = "core_uses_pid",
527                 .data           = &core_uses_pid,
528                 .maxlen         = sizeof(int),
529                 .mode           = 0644,
530                 .proc_handler   = proc_dointvec,
531         },
532         {
533                 .procname       = "core_pattern",
534                 .data           = core_pattern,
535                 .maxlen         = CORENAME_MAX_SIZE,
536                 .mode           = 0644,
537                 .proc_handler   = proc_dostring_coredump,
538         },
539         {
540                 .procname       = "core_pipe_limit",
541                 .data           = &core_pipe_limit,
542                 .maxlen         = sizeof(unsigned int),
543                 .mode           = 0644,
544                 .proc_handler   = proc_dointvec,
545         },
546 #endif
547 #ifdef CONFIG_PROC_SYSCTL
548         {
549                 .procname       = "tainted",
550                 .maxlen         = sizeof(long),
551                 .mode           = 0644,
552                 .proc_handler   = proc_taint,
553         },
554         {
555                 .procname       = "sysctl_writes_strict",
556                 .data           = &sysctl_writes_strict,
557                 .maxlen         = sizeof(int),
558                 .mode           = 0644,
559                 .proc_handler   = proc_dointvec_minmax,
560                 .extra1         = &neg_one,
561                 .extra2         = &one,
562         },
563 #endif
564 #ifdef CONFIG_LATENCYTOP
565         {
566                 .procname       = "latencytop",
567                 .data           = &latencytop_enabled,
568                 .maxlen         = sizeof(int),
569                 .mode           = 0644,
570                 .proc_handler   = sysctl_latencytop,
571         },
572 #endif
573 #ifdef CONFIG_BLK_DEV_INITRD
574         {
575                 .procname       = "real-root-dev",
576                 .data           = &real_root_dev,
577                 .maxlen         = sizeof(int),
578                 .mode           = 0644,
579                 .proc_handler   = proc_dointvec,
580         },
581 #endif
582         {
583                 .procname       = "print-fatal-signals",
584                 .data           = &print_fatal_signals,
585                 .maxlen         = sizeof(int),
586                 .mode           = 0644,
587                 .proc_handler   = proc_dointvec,
588         },
589 #ifdef CONFIG_SPARC
590         {
591                 .procname       = "reboot-cmd",
592                 .data           = reboot_command,
593                 .maxlen         = 256,
594                 .mode           = 0644,
595                 .proc_handler   = proc_dostring,
596         },
597         {
598                 .procname       = "stop-a",
599                 .data           = &stop_a_enabled,
600                 .maxlen         = sizeof (int),
601                 .mode           = 0644,
602                 .proc_handler   = proc_dointvec,
603         },
604         {
605                 .procname       = "scons-poweroff",
606                 .data           = &scons_pwroff,
607                 .maxlen         = sizeof (int),
608                 .mode           = 0644,
609                 .proc_handler   = proc_dointvec,
610         },
611 #endif
612 #ifdef CONFIG_SPARC64
613         {
614                 .procname       = "tsb-ratio",
615                 .data           = &sysctl_tsb_ratio,
616                 .maxlen         = sizeof (int),
617                 .mode           = 0644,
618                 .proc_handler   = proc_dointvec,
619         },
620 #endif
621 #ifdef __hppa__
622         {
623                 .procname       = "soft-power",
624                 .data           = &pwrsw_enabled,
625                 .maxlen         = sizeof (int),
626                 .mode           = 0644,
627                 .proc_handler   = proc_dointvec,
628         },
629 #endif
630 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
631         {
632                 .procname       = "unaligned-trap",
633                 .data           = &unaligned_enabled,
634                 .maxlen         = sizeof (int),
635                 .mode           = 0644,
636                 .proc_handler   = proc_dointvec,
637         },
638 #endif
639         {
640                 .procname       = "ctrl-alt-del",
641                 .data           = &C_A_D,
642                 .maxlen         = sizeof(int),
643                 .mode           = 0644,
644                 .proc_handler   = proc_dointvec,
645         },
646 #ifdef CONFIG_FUNCTION_TRACER
647         {
648                 .procname       = "ftrace_enabled",
649                 .data           = &ftrace_enabled,
650                 .maxlen         = sizeof(int),
651                 .mode           = 0644,
652                 .proc_handler   = ftrace_enable_sysctl,
653         },
654 #endif
655 #ifdef CONFIG_STACK_TRACER
656         {
657                 .procname       = "stack_tracer_enabled",
658                 .data           = &stack_tracer_enabled,
659                 .maxlen         = sizeof(int),
660                 .mode           = 0644,
661                 .proc_handler   = stack_trace_sysctl,
662         },
663 #endif
664 #ifdef CONFIG_TRACING
665         {
666                 .procname       = "ftrace_dump_on_oops",
667                 .data           = &ftrace_dump_on_oops,
668                 .maxlen         = sizeof(int),
669                 .mode           = 0644,
670                 .proc_handler   = proc_dointvec,
671         },
672         {
673                 .procname       = "traceoff_on_warning",
674                 .data           = &__disable_trace_on_warning,
675                 .maxlen         = sizeof(__disable_trace_on_warning),
676                 .mode           = 0644,
677                 .proc_handler   = proc_dointvec,
678         },
679         {
680                 .procname       = "tracepoint_printk",
681                 .data           = &tracepoint_printk,
682                 .maxlen         = sizeof(tracepoint_printk),
683                 .mode           = 0644,
684                 .proc_handler   = tracepoint_printk_sysctl,
685         },
686 #endif
687 #ifdef CONFIG_KEXEC_CORE
688         {
689                 .procname       = "kexec_load_disabled",
690                 .data           = &kexec_load_disabled,
691                 .maxlen         = sizeof(int),
692                 .mode           = 0644,
693                 /* only handle a transition from default "0" to "1" */
694                 .proc_handler   = proc_dointvec_minmax,
695                 .extra1         = &one,
696                 .extra2         = &one,
697         },
698 #endif
699 #ifdef CONFIG_MODULES
700         {
701                 .procname       = "modprobe",
702                 .data           = &modprobe_path,
703                 .maxlen         = KMOD_PATH_LEN,
704                 .mode           = 0644,
705                 .proc_handler   = proc_dostring,
706         },
707         {
708                 .procname       = "modules_disabled",
709                 .data           = &modules_disabled,
710                 .maxlen         = sizeof(int),
711                 .mode           = 0644,
712                 /* only handle a transition from default "0" to "1" */
713                 .proc_handler   = proc_dointvec_minmax,
714                 .extra1         = &one,
715                 .extra2         = &one,
716         },
717 #endif
718 #ifdef CONFIG_UEVENT_HELPER
719         {
720                 .procname       = "hotplug",
721                 .data           = &uevent_helper,
722                 .maxlen         = UEVENT_HELPER_PATH_LEN,
723                 .mode           = 0644,
724                 .proc_handler   = proc_dostring,
725         },
726 #endif
727 #ifdef CONFIG_CHR_DEV_SG
728         {
729                 .procname       = "sg-big-buff",
730                 .data           = &sg_big_buff,
731                 .maxlen         = sizeof (int),
732                 .mode           = 0444,
733                 .proc_handler   = proc_dointvec,
734         },
735 #endif
736 #ifdef CONFIG_BSD_PROCESS_ACCT
737         {
738                 .procname       = "acct",
739                 .data           = &acct_parm,
740                 .maxlen         = 3*sizeof(int),
741                 .mode           = 0644,
742                 .proc_handler   = proc_dointvec,
743         },
744 #endif
745 #ifdef CONFIG_MAGIC_SYSRQ
746         {
747                 .procname       = "sysrq",
748                 .data           = &__sysrq_enabled,
749                 .maxlen         = sizeof (int),
750                 .mode           = 0644,
751                 .proc_handler   = sysrq_sysctl_handler,
752         },
753 #endif
754 #ifdef CONFIG_PROC_SYSCTL
755         {
756                 .procname       = "cad_pid",
757                 .data           = NULL,
758                 .maxlen         = sizeof (int),
759                 .mode           = 0600,
760                 .proc_handler   = proc_do_cad_pid,
761         },
762 #endif
763         {
764                 .procname       = "threads-max",
765                 .data           = NULL,
766                 .maxlen         = sizeof(int),
767                 .mode           = 0644,
768                 .proc_handler   = sysctl_max_threads,
769         },
770         {
771                 .procname       = "random",
772                 .mode           = 0555,
773                 .child          = random_table,
774         },
775         {
776                 .procname       = "usermodehelper",
777                 .mode           = 0555,
778                 .child          = usermodehelper_table,
779         },
780         {
781                 .procname       = "overflowuid",
782                 .data           = &overflowuid,
783                 .maxlen         = sizeof(int),
784                 .mode           = 0644,
785                 .proc_handler   = proc_dointvec_minmax,
786                 .extra1         = &minolduid,
787                 .extra2         = &maxolduid,
788         },
789         {
790                 .procname       = "overflowgid",
791                 .data           = &overflowgid,
792                 .maxlen         = sizeof(int),
793                 .mode           = 0644,
794                 .proc_handler   = proc_dointvec_minmax,
795                 .extra1         = &minolduid,
796                 .extra2         = &maxolduid,
797         },
798 #ifdef CONFIG_S390
799 #ifdef CONFIG_MATHEMU
800         {
801                 .procname       = "ieee_emulation_warnings",
802                 .data           = &sysctl_ieee_emulation_warnings,
803                 .maxlen         = sizeof(int),
804                 .mode           = 0644,
805                 .proc_handler   = proc_dointvec,
806         },
807 #endif
808         {
809                 .procname       = "userprocess_debug",
810                 .data           = &show_unhandled_signals,
811                 .maxlen         = sizeof(int),
812                 .mode           = 0644,
813                 .proc_handler   = proc_dointvec,
814         },
815 #endif
816         {
817                 .procname       = "pid_max",
818                 .data           = &pid_max,
819                 .maxlen         = sizeof (int),
820                 .mode           = 0644,
821                 .proc_handler   = proc_dointvec_minmax,
822                 .extra1         = &pid_max_min,
823                 .extra2         = &pid_max_max,
824         },
825         {
826                 .procname       = "panic_on_oops",
827                 .data           = &panic_on_oops,
828                 .maxlen         = sizeof(int),
829                 .mode           = 0644,
830                 .proc_handler   = proc_dointvec,
831         },
832 #if defined CONFIG_PRINTK
833         {
834                 .procname       = "printk",
835                 .data           = &console_loglevel,
836                 .maxlen         = 4*sizeof(int),
837                 .mode           = 0644,
838                 .proc_handler   = proc_dointvec,
839         },
840         {
841                 .procname       = "printk_ratelimit",
842                 .data           = &printk_ratelimit_state.interval,
843                 .maxlen         = sizeof(int),
844                 .mode           = 0644,
845                 .proc_handler   = proc_dointvec_jiffies,
846         },
847         {
848                 .procname       = "printk_ratelimit_burst",
849                 .data           = &printk_ratelimit_state.burst,
850                 .maxlen         = sizeof(int),
851                 .mode           = 0644,
852                 .proc_handler   = proc_dointvec,
853         },
854         {
855                 .procname       = "printk_delay",
856                 .data           = &printk_delay_msec,
857                 .maxlen         = sizeof(int),
858                 .mode           = 0644,
859                 .proc_handler   = proc_dointvec_minmax,
860                 .extra1         = &zero,
861                 .extra2         = &ten_thousand,
862         },
863         {
864                 .procname       = "printk_devkmsg",
865                 .data           = devkmsg_log_str,
866                 .maxlen         = DEVKMSG_STR_MAX_SIZE,
867                 .mode           = 0644,
868                 .proc_handler   = devkmsg_sysctl_set_loglvl,
869         },
870         {
871                 .procname       = "dmesg_restrict",
872                 .data           = &dmesg_restrict,
873                 .maxlen         = sizeof(int),
874                 .mode           = 0644,
875                 .proc_handler   = proc_dointvec_minmax_sysadmin,
876                 .extra1         = &zero,
877                 .extra2         = &one,
878         },
879         {
880                 .procname       = "kptr_restrict",
881                 .data           = &kptr_restrict,
882                 .maxlen         = sizeof(int),
883                 .mode           = 0644,
884                 .proc_handler   = proc_dointvec_minmax_sysadmin,
885                 .extra1         = &zero,
886                 .extra2         = &two,
887         },
888 #endif
889         {
890                 .procname       = "ngroups_max",
891                 .data           = &ngroups_max,
892                 .maxlen         = sizeof (int),
893                 .mode           = 0444,
894                 .proc_handler   = proc_dointvec,
895         },
896         {
897                 .procname       = "cap_last_cap",
898                 .data           = (void *)&cap_last_cap,
899                 .maxlen         = sizeof(int),
900                 .mode           = 0444,
901                 .proc_handler   = proc_dointvec,
902         },
903 #if defined(CONFIG_LOCKUP_DETECTOR)
904         {
905                 .procname       = "watchdog",
906                 .data           = &watchdog_user_enabled,
907                 .maxlen         = sizeof(int),
908                 .mode           = 0644,
909                 .proc_handler   = proc_watchdog,
910                 .extra1         = &zero,
911                 .extra2         = &one,
912         },
913         {
914                 .procname       = "watchdog_thresh",
915                 .data           = &watchdog_thresh,
916                 .maxlen         = sizeof(int),
917                 .mode           = 0644,
918                 .proc_handler   = proc_watchdog_thresh,
919                 .extra1         = &zero,
920                 .extra2         = &sixty,
921         },
922         {
923                 .procname       = "nmi_watchdog",
924                 .data           = &nmi_watchdog_user_enabled,
925                 .maxlen         = sizeof(int),
926                 .mode           = NMI_WATCHDOG_SYSCTL_PERM,
927                 .proc_handler   = proc_nmi_watchdog,
928                 .extra1         = &zero,
929                 .extra2         = &one,
930         },
931         {
932                 .procname       = "watchdog_cpumask",
933                 .data           = &watchdog_cpumask_bits,
934                 .maxlen         = NR_CPUS,
935                 .mode           = 0644,
936                 .proc_handler   = proc_watchdog_cpumask,
937         },
938 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
939         {
940                 .procname       = "soft_watchdog",
941                 .data           = &soft_watchdog_user_enabled,
942                 .maxlen         = sizeof(int),
943                 .mode           = 0644,
944                 .proc_handler   = proc_soft_watchdog,
945                 .extra1         = &zero,
946                 .extra2         = &one,
947         },
948         {
949                 .procname       = "softlockup_panic",
950                 .data           = &softlockup_panic,
951                 .maxlen         = sizeof(int),
952                 .mode           = 0644,
953                 .proc_handler   = proc_dointvec_minmax,
954                 .extra1         = &zero,
955                 .extra2         = &one,
956         },
957 #ifdef CONFIG_SMP
958         {
959                 .procname       = "softlockup_all_cpu_backtrace",
960                 .data           = &sysctl_softlockup_all_cpu_backtrace,
961                 .maxlen         = sizeof(int),
962                 .mode           = 0644,
963                 .proc_handler   = proc_dointvec_minmax,
964                 .extra1         = &zero,
965                 .extra2         = &one,
966         },
967 #endif /* CONFIG_SMP */
968 #endif
969 #ifdef CONFIG_HARDLOCKUP_DETECTOR
970         {
971                 .procname       = "hardlockup_panic",
972                 .data           = &hardlockup_panic,
973                 .maxlen         = sizeof(int),
974                 .mode           = 0644,
975                 .proc_handler   = proc_dointvec_minmax,
976                 .extra1         = &zero,
977                 .extra2         = &one,
978         },
979 #ifdef CONFIG_SMP
980         {
981                 .procname       = "hardlockup_all_cpu_backtrace",
982                 .data           = &sysctl_hardlockup_all_cpu_backtrace,
983                 .maxlen         = sizeof(int),
984                 .mode           = 0644,
985                 .proc_handler   = proc_dointvec_minmax,
986                 .extra1         = &zero,
987                 .extra2         = &one,
988         },
989 #endif /* CONFIG_SMP */
990 #endif
991 #endif
992
993 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
994         {
995                 .procname       = "unknown_nmi_panic",
996                 .data           = &unknown_nmi_panic,
997                 .maxlen         = sizeof (int),
998                 .mode           = 0644,
999                 .proc_handler   = proc_dointvec,
1000         },
1001 #endif
1002 #if defined(CONFIG_X86)
1003         {
1004                 .procname       = "panic_on_unrecovered_nmi",
1005                 .data           = &panic_on_unrecovered_nmi,
1006                 .maxlen         = sizeof(int),
1007                 .mode           = 0644,
1008                 .proc_handler   = proc_dointvec,
1009         },
1010         {
1011                 .procname       = "panic_on_io_nmi",
1012                 .data           = &panic_on_io_nmi,
1013                 .maxlen         = sizeof(int),
1014                 .mode           = 0644,
1015                 .proc_handler   = proc_dointvec,
1016         },
1017 #ifdef CONFIG_DEBUG_STACKOVERFLOW
1018         {
1019                 .procname       = "panic_on_stackoverflow",
1020                 .data           = &sysctl_panic_on_stackoverflow,
1021                 .maxlen         = sizeof(int),
1022                 .mode           = 0644,
1023                 .proc_handler   = proc_dointvec,
1024         },
1025 #endif
1026         {
1027                 .procname       = "bootloader_type",
1028                 .data           = &bootloader_type,
1029                 .maxlen         = sizeof (int),
1030                 .mode           = 0444,
1031                 .proc_handler   = proc_dointvec,
1032         },
1033         {
1034                 .procname       = "bootloader_version",
1035                 .data           = &bootloader_version,
1036                 .maxlen         = sizeof (int),
1037                 .mode           = 0444,
1038                 .proc_handler   = proc_dointvec,
1039         },
1040         {
1041                 .procname       = "io_delay_type",
1042                 .data           = &io_delay_type,
1043                 .maxlen         = sizeof(int),
1044                 .mode           = 0644,
1045                 .proc_handler   = proc_dointvec,
1046         },
1047 #endif
1048 #if defined(CONFIG_MMU)
1049         {
1050                 .procname       = "randomize_va_space",
1051                 .data           = &randomize_va_space,
1052                 .maxlen         = sizeof(int),
1053                 .mode           = 0644,
1054                 .proc_handler   = proc_dointvec,
1055         },
1056 #endif
1057 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
1058         {
1059                 .procname       = "spin_retry",
1060                 .data           = &spin_retry,
1061                 .maxlen         = sizeof (int),
1062                 .mode           = 0644,
1063                 .proc_handler   = proc_dointvec,
1064         },
1065 #endif
1066 #if     defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1067         {
1068                 .procname       = "acpi_video_flags",
1069                 .data           = &acpi_realmode_flags,
1070                 .maxlen         = sizeof (unsigned long),
1071                 .mode           = 0644,
1072                 .proc_handler   = proc_doulongvec_minmax,
1073         },
1074 #endif
1075 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1076         {
1077                 .procname       = "ignore-unaligned-usertrap",
1078                 .data           = &no_unaligned_warning,
1079                 .maxlen         = sizeof (int),
1080                 .mode           = 0644,
1081                 .proc_handler   = proc_dointvec,
1082         },
1083 #endif
1084 #ifdef CONFIG_IA64
1085         {
1086                 .procname       = "unaligned-dump-stack",
1087                 .data           = &unaligned_dump_stack,
1088                 .maxlen         = sizeof (int),
1089                 .mode           = 0644,
1090                 .proc_handler   = proc_dointvec,
1091         },
1092 #endif
1093 #ifdef CONFIG_DETECT_HUNG_TASK
1094         {
1095                 .procname       = "hung_task_panic",
1096                 .data           = &sysctl_hung_task_panic,
1097                 .maxlen         = sizeof(int),
1098                 .mode           = 0644,
1099                 .proc_handler   = proc_dointvec_minmax,
1100                 .extra1         = &zero,
1101                 .extra2         = &one,
1102         },
1103         {
1104                 .procname       = "hung_task_check_count",
1105                 .data           = &sysctl_hung_task_check_count,
1106                 .maxlen         = sizeof(int),
1107                 .mode           = 0644,
1108                 .proc_handler   = proc_dointvec_minmax,
1109                 .extra1         = &zero,
1110         },
1111         {
1112                 .procname       = "hung_task_timeout_secs",
1113                 .data           = &sysctl_hung_task_timeout_secs,
1114                 .maxlen         = sizeof(unsigned long),
1115                 .mode           = 0644,
1116                 .proc_handler   = proc_dohung_task_timeout_secs,
1117                 .extra2         = &hung_task_timeout_max,
1118         },
1119         {
1120                 .procname       = "hung_task_warnings",
1121                 .data           = &sysctl_hung_task_warnings,
1122                 .maxlen         = sizeof(int),
1123                 .mode           = 0644,
1124                 .proc_handler   = proc_dointvec_minmax,
1125                 .extra1         = &neg_one,
1126         },
1127 #endif
1128 #ifdef CONFIG_RT_MUTEXES
1129         {
1130                 .procname       = "max_lock_depth",
1131                 .data           = &max_lock_depth,
1132                 .maxlen         = sizeof(int),
1133                 .mode           = 0644,
1134                 .proc_handler   = proc_dointvec,
1135         },
1136 #endif
1137         {
1138                 .procname       = "poweroff_cmd",
1139                 .data           = &poweroff_cmd,
1140                 .maxlen         = POWEROFF_CMD_PATH_LEN,
1141                 .mode           = 0644,
1142                 .proc_handler   = proc_dostring,
1143         },
1144 #ifdef CONFIG_KEYS
1145         {
1146                 .procname       = "keys",
1147                 .mode           = 0555,
1148                 .child          = key_sysctls,
1149         },
1150 #endif
1151 #ifdef CONFIG_PERF_EVENTS
1152         /*
1153          * User-space scripts rely on the existence of this file
1154          * as a feature check for perf_events being enabled.
1155          *
1156          * So it's an ABI, do not remove!
1157          */
1158         {
1159                 .procname       = "perf_event_paranoid",
1160                 .data           = &sysctl_perf_event_paranoid,
1161                 .maxlen         = sizeof(sysctl_perf_event_paranoid),
1162                 .mode           = 0644,
1163                 .proc_handler   = proc_dointvec,
1164         },
1165         {
1166                 .procname       = "perf_event_mlock_kb",
1167                 .data           = &sysctl_perf_event_mlock,
1168                 .maxlen         = sizeof(sysctl_perf_event_mlock),
1169                 .mode           = 0644,
1170                 .proc_handler   = proc_dointvec,
1171         },
1172         {
1173                 .procname       = "perf_event_max_sample_rate",
1174                 .data           = &sysctl_perf_event_sample_rate,
1175                 .maxlen         = sizeof(sysctl_perf_event_sample_rate),
1176                 .mode           = 0644,
1177                 .proc_handler   = perf_proc_update_handler,
1178                 .extra1         = &one,
1179         },
1180         {
1181                 .procname       = "perf_cpu_time_max_percent",
1182                 .data           = &sysctl_perf_cpu_time_max_percent,
1183                 .maxlen         = sizeof(sysctl_perf_cpu_time_max_percent),
1184                 .mode           = 0644,
1185                 .proc_handler   = perf_cpu_time_max_percent_handler,
1186                 .extra1         = &zero,
1187                 .extra2         = &one_hundred,
1188         },
1189         {
1190                 .procname       = "perf_event_max_stack",
1191                 .data           = &sysctl_perf_event_max_stack,
1192                 .maxlen         = sizeof(sysctl_perf_event_max_stack),
1193                 .mode           = 0644,
1194                 .proc_handler   = perf_event_max_stack_handler,
1195                 .extra1         = &zero,
1196                 .extra2         = &six_hundred_forty_kb,
1197         },
1198         {
1199                 .procname       = "perf_event_max_contexts_per_stack",
1200                 .data           = &sysctl_perf_event_max_contexts_per_stack,
1201                 .maxlen         = sizeof(sysctl_perf_event_max_contexts_per_stack),
1202                 .mode           = 0644,
1203                 .proc_handler   = perf_event_max_stack_handler,
1204                 .extra1         = &zero,
1205                 .extra2         = &one_thousand,
1206         },
1207 #endif
1208         {
1209                 .procname       = "panic_on_warn",
1210                 .data           = &panic_on_warn,
1211                 .maxlen         = sizeof(int),
1212                 .mode           = 0644,
1213                 .proc_handler   = proc_dointvec_minmax,
1214                 .extra1         = &zero,
1215                 .extra2         = &one,
1216         },
1217 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
1218         {
1219                 .procname       = "timer_migration",
1220                 .data           = &sysctl_timer_migration,
1221                 .maxlen         = sizeof(unsigned int),
1222                 .mode           = 0644,
1223                 .proc_handler   = timer_migration_handler,
1224                 .extra1         = &zero,
1225                 .extra2         = &one,
1226         },
1227 #endif
1228 #ifdef CONFIG_BPF_SYSCALL
1229         {
1230                 .procname       = "unprivileged_bpf_disabled",
1231                 .data           = &sysctl_unprivileged_bpf_disabled,
1232                 .maxlen         = sizeof(sysctl_unprivileged_bpf_disabled),
1233                 .mode           = 0644,
1234                 .proc_handler   = bpf_unpriv_handler,
1235                 .extra1         = &zero,
1236                 .extra2         = &two,
1237         },
1238 #endif
1239 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
1240         {
1241                 .procname       = "panic_on_rcu_stall",
1242                 .data           = &sysctl_panic_on_rcu_stall,
1243                 .maxlen         = sizeof(sysctl_panic_on_rcu_stall),
1244                 .mode           = 0644,
1245                 .proc_handler   = proc_dointvec_minmax,
1246                 .extra1         = &zero,
1247                 .extra2         = &one,
1248         },
1249 #endif
1250         { }
1251 };
1252
1253 static struct ctl_table vm_table[] = {
1254         {
1255                 .procname       = "overcommit_memory",
1256                 .data           = &sysctl_overcommit_memory,
1257                 .maxlen         = sizeof(sysctl_overcommit_memory),
1258                 .mode           = 0644,
1259                 .proc_handler   = proc_dointvec_minmax,
1260                 .extra1         = &zero,
1261                 .extra2         = &two,
1262         },
1263         {
1264                 .procname       = "panic_on_oom",
1265                 .data           = &sysctl_panic_on_oom,
1266                 .maxlen         = sizeof(sysctl_panic_on_oom),
1267                 .mode           = 0644,
1268                 .proc_handler   = proc_dointvec_minmax,
1269                 .extra1         = &zero,
1270                 .extra2         = &two,
1271         },
1272         {
1273                 .procname       = "oom_kill_allocating_task",
1274                 .data           = &sysctl_oom_kill_allocating_task,
1275                 .maxlen         = sizeof(sysctl_oom_kill_allocating_task),
1276                 .mode           = 0644,
1277                 .proc_handler   = proc_dointvec,
1278         },
1279         {
1280                 .procname       = "oom_dump_tasks",
1281                 .data           = &sysctl_oom_dump_tasks,
1282                 .maxlen         = sizeof(sysctl_oom_dump_tasks),
1283                 .mode           = 0644,
1284                 .proc_handler   = proc_dointvec,
1285         },
1286         {
1287                 .procname       = "overcommit_ratio",
1288                 .data           = &sysctl_overcommit_ratio,
1289                 .maxlen         = sizeof(sysctl_overcommit_ratio),
1290                 .mode           = 0644,
1291                 .proc_handler   = overcommit_ratio_handler,
1292         },
1293         {
1294                 .procname       = "overcommit_kbytes",
1295                 .data           = &sysctl_overcommit_kbytes,
1296                 .maxlen         = sizeof(sysctl_overcommit_kbytes),
1297                 .mode           = 0644,
1298                 .proc_handler   = overcommit_kbytes_handler,
1299         },
1300         {
1301                 .procname       = "page-cluster", 
1302                 .data           = &page_cluster,
1303                 .maxlen         = sizeof(int),
1304                 .mode           = 0644,
1305                 .proc_handler   = proc_dointvec_minmax,
1306                 .extra1         = &zero,
1307         },
1308         {
1309                 .procname       = "dirty_background_ratio",
1310                 .data           = &dirty_background_ratio,
1311                 .maxlen         = sizeof(dirty_background_ratio),
1312                 .mode           = 0644,
1313                 .proc_handler   = dirty_background_ratio_handler,
1314                 .extra1         = &zero,
1315                 .extra2         = &one_hundred,
1316         },
1317         {
1318                 .procname       = "dirty_background_bytes",
1319                 .data           = &dirty_background_bytes,
1320                 .maxlen         = sizeof(dirty_background_bytes),
1321                 .mode           = 0644,
1322                 .proc_handler   = dirty_background_bytes_handler,
1323                 .extra1         = &one_ul,
1324         },
1325         {
1326                 .procname       = "dirty_ratio",
1327                 .data           = &vm_dirty_ratio,
1328                 .maxlen         = sizeof(vm_dirty_ratio),
1329                 .mode           = 0644,
1330                 .proc_handler   = dirty_ratio_handler,
1331                 .extra1         = &zero,
1332                 .extra2         = &one_hundred,
1333         },
1334         {
1335                 .procname       = "dirty_bytes",
1336                 .data           = &vm_dirty_bytes,
1337                 .maxlen         = sizeof(vm_dirty_bytes),
1338                 .mode           = 0644,
1339                 .proc_handler   = dirty_bytes_handler,
1340                 .extra1         = &dirty_bytes_min,
1341         },
1342         {
1343                 .procname       = "dirty_writeback_centisecs",
1344                 .data           = &dirty_writeback_interval,
1345                 .maxlen         = sizeof(dirty_writeback_interval),
1346                 .mode           = 0644,
1347                 .proc_handler   = dirty_writeback_centisecs_handler,
1348         },
1349         {
1350                 .procname       = "dirty_expire_centisecs",
1351                 .data           = &dirty_expire_interval,
1352                 .maxlen         = sizeof(dirty_expire_interval),
1353                 .mode           = 0644,
1354                 .proc_handler   = proc_dointvec_minmax,
1355                 .extra1         = &zero,
1356         },
1357         {
1358                 .procname       = "dirtytime_expire_seconds",
1359                 .data           = &dirtytime_expire_interval,
1360                 .maxlen         = sizeof(dirty_expire_interval),
1361                 .mode           = 0644,
1362                 .proc_handler   = dirtytime_interval_handler,
1363                 .extra1         = &zero,
1364         },
1365         {
1366                 .procname       = "nr_pdflush_threads",
1367                 .mode           = 0444 /* read-only */,
1368                 .proc_handler   = pdflush_proc_obsolete,
1369         },
1370         {
1371                 .procname       = "swappiness",
1372                 .data           = &vm_swappiness,
1373                 .maxlen         = sizeof(vm_swappiness),
1374                 .mode           = 0644,
1375                 .proc_handler   = proc_dointvec_minmax,
1376                 .extra1         = &zero,
1377                 .extra2         = &one_hundred,
1378         },
1379 #ifdef CONFIG_HUGETLB_PAGE
1380         {
1381                 .procname       = "nr_hugepages",
1382                 .data           = NULL,
1383                 .maxlen         = sizeof(unsigned long),
1384                 .mode           = 0644,
1385                 .proc_handler   = hugetlb_sysctl_handler,
1386         },
1387 #ifdef CONFIG_NUMA
1388         {
1389                 .procname       = "nr_hugepages_mempolicy",
1390                 .data           = NULL,
1391                 .maxlen         = sizeof(unsigned long),
1392                 .mode           = 0644,
1393                 .proc_handler   = &hugetlb_mempolicy_sysctl_handler,
1394         },
1395 #endif
1396          {
1397                 .procname       = "hugetlb_shm_group",
1398                 .data           = &sysctl_hugetlb_shm_group,
1399                 .maxlen         = sizeof(gid_t),
1400                 .mode           = 0644,
1401                 .proc_handler   = proc_dointvec,
1402          },
1403          {
1404                 .procname       = "hugepages_treat_as_movable",
1405                 .data           = &hugepages_treat_as_movable,
1406                 .maxlen         = sizeof(int),
1407                 .mode           = 0644,
1408                 .proc_handler   = proc_dointvec,
1409         },
1410         {
1411                 .procname       = "nr_overcommit_hugepages",
1412                 .data           = NULL,
1413                 .maxlen         = sizeof(unsigned long),
1414                 .mode           = 0644,
1415                 .proc_handler   = hugetlb_overcommit_handler,
1416         },
1417 #endif
1418         {
1419                 .procname       = "lowmem_reserve_ratio",
1420                 .data           = &sysctl_lowmem_reserve_ratio,
1421                 .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
1422                 .mode           = 0644,
1423                 .proc_handler   = lowmem_reserve_ratio_sysctl_handler,
1424         },
1425         {
1426                 .procname       = "drop_caches",
1427                 .data           = &sysctl_drop_caches,
1428                 .maxlen         = sizeof(int),
1429                 .mode           = 0200,
1430                 .proc_handler   = drop_caches_sysctl_handler,
1431                 .extra1         = &one,
1432                 .extra2         = &four,
1433         },
1434 #ifdef CONFIG_COMPACTION
1435         {
1436                 .procname       = "compact_memory",
1437                 .data           = &sysctl_compact_memory,
1438                 .maxlen         = sizeof(int),
1439                 .mode           = 0200,
1440                 .proc_handler   = sysctl_compaction_handler,
1441         },
1442         {
1443                 .procname       = "extfrag_threshold",
1444                 .data           = &sysctl_extfrag_threshold,
1445                 .maxlen         = sizeof(int),
1446                 .mode           = 0644,
1447                 .proc_handler   = sysctl_extfrag_handler,
1448                 .extra1         = &min_extfrag_threshold,
1449                 .extra2         = &max_extfrag_threshold,
1450         },
1451         {
1452                 .procname       = "compact_unevictable_allowed",
1453                 .data           = &sysctl_compact_unevictable_allowed,
1454                 .maxlen         = sizeof(int),
1455                 .mode           = 0644,
1456                 .proc_handler   = proc_dointvec,
1457                 .extra1         = &zero,
1458                 .extra2         = &one,
1459         },
1460
1461 #endif /* CONFIG_COMPACTION */
1462         {
1463                 .procname       = "min_free_kbytes",
1464                 .data           = &min_free_kbytes,
1465                 .maxlen         = sizeof(min_free_kbytes),
1466                 .mode           = 0644,
1467                 .proc_handler   = min_free_kbytes_sysctl_handler,
1468                 .extra1         = &zero,
1469         },
1470         {
1471                 .procname       = "watermark_scale_factor",
1472                 .data           = &watermark_scale_factor,
1473                 .maxlen         = sizeof(watermark_scale_factor),
1474                 .mode           = 0644,
1475                 .proc_handler   = watermark_scale_factor_sysctl_handler,
1476                 .extra1         = &one,
1477                 .extra2         = &one_thousand,
1478         },
1479         {
1480                 .procname       = "percpu_pagelist_fraction",
1481                 .data           = &percpu_pagelist_fraction,
1482                 .maxlen         = sizeof(percpu_pagelist_fraction),
1483                 .mode           = 0644,
1484                 .proc_handler   = percpu_pagelist_fraction_sysctl_handler,
1485                 .extra1         = &zero,
1486         },
1487 #ifdef CONFIG_MMU
1488         {
1489                 .procname       = "max_map_count",
1490                 .data           = &sysctl_max_map_count,
1491                 .maxlen         = sizeof(sysctl_max_map_count),
1492                 .mode           = 0644,
1493                 .proc_handler   = proc_dointvec_minmax,
1494                 .extra1         = &zero,
1495         },
1496 #else
1497         {
1498                 .procname       = "nr_trim_pages",
1499                 .data           = &sysctl_nr_trim_pages,
1500                 .maxlen         = sizeof(sysctl_nr_trim_pages),
1501                 .mode           = 0644,
1502                 .proc_handler   = proc_dointvec_minmax,
1503                 .extra1         = &zero,
1504         },
1505 #endif
1506         {
1507                 .procname       = "laptop_mode",
1508                 .data           = &laptop_mode,
1509                 .maxlen         = sizeof(laptop_mode),
1510                 .mode           = 0644,
1511                 .proc_handler   = proc_dointvec_jiffies,
1512         },
1513         {
1514                 .procname       = "block_dump",
1515                 .data           = &block_dump,
1516                 .maxlen         = sizeof(block_dump),
1517                 .mode           = 0644,
1518                 .proc_handler   = proc_dointvec,
1519                 .extra1         = &zero,
1520         },
1521         {
1522                 .procname       = "vfs_cache_pressure",
1523                 .data           = &sysctl_vfs_cache_pressure,
1524                 .maxlen         = sizeof(sysctl_vfs_cache_pressure),
1525                 .mode           = 0644,
1526                 .proc_handler   = proc_dointvec,
1527                 .extra1         = &zero,
1528         },
1529 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1530         {
1531                 .procname       = "legacy_va_layout",
1532                 .data           = &sysctl_legacy_va_layout,
1533                 .maxlen         = sizeof(sysctl_legacy_va_layout),
1534                 .mode           = 0644,
1535                 .proc_handler   = proc_dointvec,
1536                 .extra1         = &zero,
1537         },
1538 #endif
1539 #ifdef CONFIG_NUMA
1540         {
1541                 .procname       = "zone_reclaim_mode",
1542                 .data           = &node_reclaim_mode,
1543                 .maxlen         = sizeof(node_reclaim_mode),
1544                 .mode           = 0644,
1545                 .proc_handler   = proc_dointvec,
1546                 .extra1         = &zero,
1547         },
1548         {
1549                 .procname       = "min_unmapped_ratio",
1550                 .data           = &sysctl_min_unmapped_ratio,
1551                 .maxlen         = sizeof(sysctl_min_unmapped_ratio),
1552                 .mode           = 0644,
1553                 .proc_handler   = sysctl_min_unmapped_ratio_sysctl_handler,
1554                 .extra1         = &zero,
1555                 .extra2         = &one_hundred,
1556         },
1557         {
1558                 .procname       = "min_slab_ratio",
1559                 .data           = &sysctl_min_slab_ratio,
1560                 .maxlen         = sizeof(sysctl_min_slab_ratio),
1561                 .mode           = 0644,
1562                 .proc_handler   = sysctl_min_slab_ratio_sysctl_handler,
1563                 .extra1         = &zero,
1564                 .extra2         = &one_hundred,
1565         },
1566 #endif
1567 #ifdef CONFIG_SMP
1568         {
1569                 .procname       = "stat_interval",
1570                 .data           = &sysctl_stat_interval,
1571                 .maxlen         = sizeof(sysctl_stat_interval),
1572                 .mode           = 0644,
1573                 .proc_handler   = proc_dointvec_jiffies,
1574         },
1575         {
1576                 .procname       = "stat_refresh",
1577                 .data           = NULL,
1578                 .maxlen         = 0,
1579                 .mode           = 0600,
1580                 .proc_handler   = vmstat_refresh,
1581         },
1582 #endif
1583 #ifdef CONFIG_MMU
1584         {
1585                 .procname       = "mmap_min_addr",
1586                 .data           = &dac_mmap_min_addr,
1587                 .maxlen         = sizeof(unsigned long),
1588                 .mode           = 0644,
1589                 .proc_handler   = mmap_min_addr_handler,
1590         },
1591 #endif
1592 #ifdef CONFIG_NUMA
1593         {
1594                 .procname       = "numa_zonelist_order",
1595                 .data           = &numa_zonelist_order,
1596                 .maxlen         = NUMA_ZONELIST_ORDER_LEN,
1597                 .mode           = 0644,
1598                 .proc_handler   = numa_zonelist_order_handler,
1599         },
1600 #endif
1601 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1602    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1603         {
1604                 .procname       = "vdso_enabled",
1605 #ifdef CONFIG_X86_32
1606                 .data           = &vdso32_enabled,
1607                 .maxlen         = sizeof(vdso32_enabled),
1608 #else
1609                 .data           = &vdso_enabled,
1610                 .maxlen         = sizeof(vdso_enabled),
1611 #endif
1612                 .mode           = 0644,
1613                 .proc_handler   = proc_dointvec,
1614                 .extra1         = &zero,
1615         },
1616 #endif
1617 #ifdef CONFIG_HIGHMEM
1618         {
1619                 .procname       = "highmem_is_dirtyable",
1620                 .data           = &vm_highmem_is_dirtyable,
1621                 .maxlen         = sizeof(vm_highmem_is_dirtyable),
1622                 .mode           = 0644,
1623                 .proc_handler   = proc_dointvec_minmax,
1624                 .extra1         = &zero,
1625                 .extra2         = &one,
1626         },
1627 #endif
1628 #ifdef CONFIG_MEMORY_FAILURE
1629         {
1630                 .procname       = "memory_failure_early_kill",
1631                 .data           = &sysctl_memory_failure_early_kill,
1632                 .maxlen         = sizeof(sysctl_memory_failure_early_kill),
1633                 .mode           = 0644,
1634                 .proc_handler   = proc_dointvec_minmax,
1635                 .extra1         = &zero,
1636                 .extra2         = &one,
1637         },
1638         {
1639                 .procname       = "memory_failure_recovery",
1640                 .data           = &sysctl_memory_failure_recovery,
1641                 .maxlen         = sizeof(sysctl_memory_failure_recovery),
1642                 .mode           = 0644,
1643                 .proc_handler   = proc_dointvec_minmax,
1644                 .extra1         = &zero,
1645                 .extra2         = &one,
1646         },
1647 #endif
1648         {
1649                 .procname       = "user_reserve_kbytes",
1650                 .data           = &sysctl_user_reserve_kbytes,
1651                 .maxlen         = sizeof(sysctl_user_reserve_kbytes),
1652                 .mode           = 0644,
1653                 .proc_handler   = proc_doulongvec_minmax,
1654         },
1655         {
1656                 .procname       = "admin_reserve_kbytes",
1657                 .data           = &sysctl_admin_reserve_kbytes,
1658                 .maxlen         = sizeof(sysctl_admin_reserve_kbytes),
1659                 .mode           = 0644,
1660                 .proc_handler   = proc_doulongvec_minmax,
1661         },
1662 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
1663         {
1664                 .procname       = "mmap_rnd_bits",
1665                 .data           = &mmap_rnd_bits,
1666                 .maxlen         = sizeof(mmap_rnd_bits),
1667                 .mode           = 0600,
1668                 .proc_handler   = proc_dointvec_minmax,
1669                 .extra1         = (void *)&mmap_rnd_bits_min,
1670                 .extra2         = (void *)&mmap_rnd_bits_max,
1671         },
1672 #endif
1673 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
1674         {
1675                 .procname       = "mmap_rnd_compat_bits",
1676                 .data           = &mmap_rnd_compat_bits,
1677                 .maxlen         = sizeof(mmap_rnd_compat_bits),
1678                 .mode           = 0600,
1679                 .proc_handler   = proc_dointvec_minmax,
1680                 .extra1         = (void *)&mmap_rnd_compat_bits_min,
1681                 .extra2         = (void *)&mmap_rnd_compat_bits_max,
1682         },
1683 #endif
1684         { }
1685 };
1686
1687 static struct ctl_table fs_table[] = {
1688         {
1689                 .procname       = "inode-nr",
1690                 .data           = &inodes_stat,
1691                 .maxlen         = 2*sizeof(long),
1692                 .mode           = 0444,
1693                 .proc_handler   = proc_nr_inodes,
1694         },
1695         {
1696                 .procname       = "inode-state",
1697                 .data           = &inodes_stat,
1698                 .maxlen         = 7*sizeof(long),
1699                 .mode           = 0444,
1700                 .proc_handler   = proc_nr_inodes,
1701         },
1702         {
1703                 .procname       = "file-nr",
1704                 .data           = &files_stat,
1705                 .maxlen         = sizeof(files_stat),
1706                 .mode           = 0444,
1707                 .proc_handler   = proc_nr_files,
1708         },
1709         {
1710                 .procname       = "file-max",
1711                 .data           = &files_stat.max_files,
1712                 .maxlen         = sizeof(files_stat.max_files),
1713                 .mode           = 0644,
1714                 .proc_handler   = proc_doulongvec_minmax,
1715                 .extra1         = &zero_ul,
1716                 .extra2         = &long_max,
1717         },
1718         {
1719                 .procname       = "nr_open",
1720                 .data           = &sysctl_nr_open,
1721                 .maxlen         = sizeof(unsigned int),
1722                 .mode           = 0644,
1723                 .proc_handler   = proc_dointvec_minmax,
1724                 .extra1         = &sysctl_nr_open_min,
1725                 .extra2         = &sysctl_nr_open_max,
1726         },
1727         {
1728                 .procname       = "dentry-state",
1729                 .data           = &dentry_stat,
1730                 .maxlen         = 6*sizeof(long),
1731                 .mode           = 0444,
1732                 .proc_handler   = proc_nr_dentry,
1733         },
1734         {
1735                 .procname       = "overflowuid",
1736                 .data           = &fs_overflowuid,
1737                 .maxlen         = sizeof(int),
1738                 .mode           = 0644,
1739                 .proc_handler   = proc_dointvec_minmax,
1740                 .extra1         = &minolduid,
1741                 .extra2         = &maxolduid,
1742         },
1743         {
1744                 .procname       = "overflowgid",
1745                 .data           = &fs_overflowgid,
1746                 .maxlen         = sizeof(int),
1747                 .mode           = 0644,
1748                 .proc_handler   = proc_dointvec_minmax,
1749                 .extra1         = &minolduid,
1750                 .extra2         = &maxolduid,
1751         },
1752 #ifdef CONFIG_FILE_LOCKING
1753         {
1754                 .procname       = "leases-enable",
1755                 .data           = &leases_enable,
1756                 .maxlen         = sizeof(int),
1757                 .mode           = 0644,
1758                 .proc_handler   = proc_dointvec,
1759         },
1760 #endif
1761 #ifdef CONFIG_DNOTIFY
1762         {
1763                 .procname       = "dir-notify-enable",
1764                 .data           = &dir_notify_enable,
1765                 .maxlen         = sizeof(int),
1766                 .mode           = 0644,
1767                 .proc_handler   = proc_dointvec,
1768         },
1769 #endif
1770 #ifdef CONFIG_MMU
1771 #ifdef CONFIG_FILE_LOCKING
1772         {
1773                 .procname       = "lease-break-time",
1774                 .data           = &lease_break_time,
1775                 .maxlen         = sizeof(int),
1776                 .mode           = 0644,
1777                 .proc_handler   = proc_dointvec,
1778         },
1779 #endif
1780 #ifdef CONFIG_AIO
1781         {
1782                 .procname       = "aio-nr",
1783                 .data           = &aio_nr,
1784                 .maxlen         = sizeof(aio_nr),
1785                 .mode           = 0444,
1786                 .proc_handler   = proc_doulongvec_minmax,
1787         },
1788         {
1789                 .procname       = "aio-max-nr",
1790                 .data           = &aio_max_nr,
1791                 .maxlen         = sizeof(aio_max_nr),
1792                 .mode           = 0644,
1793                 .proc_handler   = proc_doulongvec_minmax,
1794         },
1795 #endif /* CONFIG_AIO */
1796 #ifdef CONFIG_INOTIFY_USER
1797         {
1798                 .procname       = "inotify",
1799                 .mode           = 0555,
1800                 .child          = inotify_table,
1801         },
1802 #endif  
1803 #ifdef CONFIG_EPOLL
1804         {
1805                 .procname       = "epoll",
1806                 .mode           = 0555,
1807                 .child          = epoll_table,
1808         },
1809 #endif
1810 #endif
1811         {
1812                 .procname       = "protected_symlinks",
1813                 .data           = &sysctl_protected_symlinks,
1814                 .maxlen         = sizeof(int),
1815                 .mode           = 0600,
1816                 .proc_handler   = proc_dointvec_minmax,
1817                 .extra1         = &zero,
1818                 .extra2         = &one,
1819         },
1820         {
1821                 .procname       = "protected_hardlinks",
1822                 .data           = &sysctl_protected_hardlinks,
1823                 .maxlen         = sizeof(int),
1824                 .mode           = 0600,
1825                 .proc_handler   = proc_dointvec_minmax,
1826                 .extra1         = &zero,
1827                 .extra2         = &one,
1828         },
1829         {
1830                 .procname       = "protected_fifos",
1831                 .data           = &sysctl_protected_fifos,
1832                 .maxlen         = sizeof(int),
1833                 .mode           = 0600,
1834                 .proc_handler   = proc_dointvec_minmax,
1835                 .extra1         = &zero,
1836                 .extra2         = &two,
1837         },
1838         {
1839                 .procname       = "protected_regular",
1840                 .data           = &sysctl_protected_regular,
1841                 .maxlen         = sizeof(int),
1842                 .mode           = 0600,
1843                 .proc_handler   = proc_dointvec_minmax,
1844                 .extra1         = &zero,
1845                 .extra2         = &two,
1846         },
1847         {
1848                 .procname       = "suid_dumpable",
1849                 .data           = &suid_dumpable,
1850                 .maxlen         = sizeof(int),
1851                 .mode           = 0644,
1852                 .proc_handler   = proc_dointvec_minmax_coredump,
1853                 .extra1         = &zero,
1854                 .extra2         = &two,
1855         },
1856 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1857         {
1858                 .procname       = "binfmt_misc",
1859                 .mode           = 0555,
1860                 .child          = sysctl_mount_point,
1861         },
1862 #endif
1863         {
1864                 .procname       = "pipe-max-size",
1865                 .data           = &pipe_max_size,
1866                 .maxlen         = sizeof(pipe_max_size),
1867                 .mode           = 0644,
1868                 .proc_handler   = &pipe_proc_fn,
1869                 .extra1         = &pipe_min_size,
1870         },
1871         {
1872                 .procname       = "pipe-user-pages-hard",
1873                 .data           = &pipe_user_pages_hard,
1874                 .maxlen         = sizeof(pipe_user_pages_hard),
1875                 .mode           = 0644,
1876                 .proc_handler   = proc_doulongvec_minmax,
1877         },
1878         {
1879                 .procname       = "pipe-user-pages-soft",
1880                 .data           = &pipe_user_pages_soft,
1881                 .maxlen         = sizeof(pipe_user_pages_soft),
1882                 .mode           = 0644,
1883                 .proc_handler   = proc_doulongvec_minmax,
1884         },
1885         {
1886                 .procname       = "mount-max",
1887                 .data           = &sysctl_mount_max,
1888                 .maxlen         = sizeof(unsigned int),
1889                 .mode           = 0644,
1890                 .proc_handler   = proc_dointvec_minmax,
1891                 .extra1         = &one,
1892         },
1893         { }
1894 };
1895
1896 static struct ctl_table debug_table[] = {
1897 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
1898         {
1899                 .procname       = "exception-trace",
1900                 .data           = &show_unhandled_signals,
1901                 .maxlen         = sizeof(int),
1902                 .mode           = 0644,
1903                 .proc_handler   = proc_dointvec
1904         },
1905 #endif
1906 #if defined(CONFIG_OPTPROBES)
1907         {
1908                 .procname       = "kprobes-optimization",
1909                 .data           = &sysctl_kprobes_optimization,
1910                 .maxlen         = sizeof(int),
1911                 .mode           = 0644,
1912                 .proc_handler   = proc_kprobes_optimization_handler,
1913                 .extra1         = &zero,
1914                 .extra2         = &one,
1915         },
1916 #endif
1917         { }
1918 };
1919
1920 static struct ctl_table dev_table[] = {
1921         { }
1922 };
1923
1924 int __init sysctl_init(void)
1925 {
1926         struct ctl_table_header *hdr;
1927
1928         hdr = register_sysctl_table(sysctl_base_table);
1929         kmemleak_not_leak(hdr);
1930         return 0;
1931 }
1932
1933 #endif /* CONFIG_SYSCTL */
1934
1935 /*
1936  * /proc/sys support
1937  */
1938
1939 #ifdef CONFIG_PROC_SYSCTL
1940
1941 static int _proc_do_string(char *data, int maxlen, int write,
1942                            char __user *buffer,
1943                            size_t *lenp, loff_t *ppos)
1944 {
1945         size_t len;
1946         char __user *p;
1947         char c;
1948
1949         if (!data || !maxlen || !*lenp) {
1950                 *lenp = 0;
1951                 return 0;
1952         }
1953
1954         if (write) {
1955                 if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
1956                         /* Only continue writes not past the end of buffer. */
1957                         len = strlen(data);
1958                         if (len > maxlen - 1)
1959                                 len = maxlen - 1;
1960
1961                         if (*ppos > len)
1962                                 return 0;
1963                         len = *ppos;
1964                 } else {
1965                         /* Start writing from beginning of buffer. */
1966                         len = 0;
1967                 }
1968
1969                 *ppos += *lenp;
1970                 p = buffer;
1971                 while ((p - buffer) < *lenp && len < maxlen - 1) {
1972                         if (get_user(c, p++))
1973                                 return -EFAULT;
1974                         if (c == 0 || c == '\n')
1975                                 break;
1976                         data[len++] = c;
1977                 }
1978                 data[len] = 0;
1979         } else {
1980                 len = strlen(data);
1981                 if (len > maxlen)
1982                         len = maxlen;
1983
1984                 if (*ppos > len) {
1985                         *lenp = 0;
1986                         return 0;
1987                 }
1988
1989                 data += *ppos;
1990                 len  -= *ppos;
1991
1992                 if (len > *lenp)
1993                         len = *lenp;
1994                 if (len)
1995                         if (copy_to_user(buffer, data, len))
1996                                 return -EFAULT;
1997                 if (len < *lenp) {
1998                         if (put_user('\n', buffer + len))
1999                                 return -EFAULT;
2000                         len++;
2001                 }
2002                 *lenp = len;
2003                 *ppos += len;
2004         }
2005         return 0;
2006 }
2007
2008 static void warn_sysctl_write(struct ctl_table *table)
2009 {
2010         pr_warn_once("%s wrote to %s when file position was not 0!\n"
2011                 "This will not be supported in the future. To silence this\n"
2012                 "warning, set kernel.sysctl_writes_strict = -1\n",
2013                 current->comm, table->procname);
2014 }
2015
2016 /**
2017  * proc_first_pos_non_zero_ignore - check if firs position is allowed
2018  * @ppos: file position
2019  * @table: the sysctl table
2020  *
2021  * Returns true if the first position is non-zero and the sysctl_writes_strict
2022  * mode indicates this is not allowed for numeric input types. String proc
2023  * hadlers can ignore the return value.
2024  */
2025 static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
2026                                            struct ctl_table *table)
2027 {
2028         if (!*ppos)
2029                 return false;
2030
2031         switch (sysctl_writes_strict) {
2032         case SYSCTL_WRITES_STRICT:
2033                 return true;
2034         case SYSCTL_WRITES_WARN:
2035                 warn_sysctl_write(table);
2036                 return false;
2037         default:
2038                 return false;
2039         }
2040 }
2041
2042 /**
2043  * proc_dostring - read a string sysctl
2044  * @table: the sysctl table
2045  * @write: %TRUE if this is a write to the sysctl file
2046  * @buffer: the user buffer
2047  * @lenp: the size of the user buffer
2048  * @ppos: file position
2049  *
2050  * Reads/writes a string from/to the user buffer. If the kernel
2051  * buffer provided is not large enough to hold the string, the
2052  * string is truncated. The copied string is %NULL-terminated.
2053  * If the string is being read by the user process, it is copied
2054  * and a newline '\n' is added. It is truncated if the buffer is
2055  * not large enough.
2056  *
2057  * Returns 0 on success.
2058  */
2059 int proc_dostring(struct ctl_table *table, int write,
2060                   void __user *buffer, size_t *lenp, loff_t *ppos)
2061 {
2062         if (write)
2063                 proc_first_pos_non_zero_ignore(ppos, table);
2064
2065         return _proc_do_string((char *)(table->data), table->maxlen, write,
2066                                (char __user *)buffer, lenp, ppos);
2067 }
2068
2069 static size_t proc_skip_spaces(char **buf)
2070 {
2071         size_t ret;
2072         char *tmp = skip_spaces(*buf);
2073         ret = tmp - *buf;
2074         *buf = tmp;
2075         return ret;
2076 }
2077
2078 static void proc_skip_char(char **buf, size_t *size, const char v)
2079 {
2080         while (*size) {
2081                 if (**buf != v)
2082                         break;
2083                 (*size)--;
2084                 (*buf)++;
2085         }
2086 }
2087
2088 #define TMPBUFLEN 22
2089 /**
2090  * proc_get_long - reads an ASCII formatted integer from a user buffer
2091  *
2092  * @buf: a kernel buffer
2093  * @size: size of the kernel buffer
2094  * @val: this is where the number will be stored
2095  * @neg: set to %TRUE if number is negative
2096  * @perm_tr: a vector which contains the allowed trailers
2097  * @perm_tr_len: size of the perm_tr vector
2098  * @tr: pointer to store the trailer character
2099  *
2100  * In case of success %0 is returned and @buf and @size are updated with
2101  * the amount of bytes read. If @tr is non-NULL and a trailing
2102  * character exists (size is non-zero after returning from this
2103  * function), @tr is updated with the trailing character.
2104  */
2105 static int proc_get_long(char **buf, size_t *size,
2106                           unsigned long *val, bool *neg,
2107                           const char *perm_tr, unsigned perm_tr_len, char *tr)
2108 {
2109         int len;
2110         char *p, tmp[TMPBUFLEN];
2111
2112         if (!*size)
2113                 return -EINVAL;
2114
2115         len = *size;
2116         if (len > TMPBUFLEN - 1)
2117                 len = TMPBUFLEN - 1;
2118
2119         memcpy(tmp, *buf, len);
2120
2121         tmp[len] = 0;
2122         p = tmp;
2123         if (*p == '-' && *size > 1) {
2124                 *neg = true;
2125                 p++;
2126         } else
2127                 *neg = false;
2128         if (!isdigit(*p))
2129                 return -EINVAL;
2130
2131         *val = simple_strtoul(p, &p, 0);
2132
2133         len = p - tmp;
2134
2135         /* We don't know if the next char is whitespace thus we may accept
2136          * invalid integers (e.g. 1234...a) or two integers instead of one
2137          * (e.g. 123...1). So lets not allow such large numbers. */
2138         if (len == TMPBUFLEN - 1)
2139                 return -EINVAL;
2140
2141         if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2142                 return -EINVAL;
2143
2144         if (tr && (len < *size))
2145                 *tr = *p;
2146
2147         *buf += len;
2148         *size -= len;
2149
2150         return 0;
2151 }
2152
2153 /**
2154  * proc_put_long - converts an integer to a decimal ASCII formatted string
2155  *
2156  * @buf: the user buffer
2157  * @size: the size of the user buffer
2158  * @val: the integer to be converted
2159  * @neg: sign of the number, %TRUE for negative
2160  *
2161  * In case of success %0 is returned and @buf and @size are updated with
2162  * the amount of bytes written.
2163  */
2164 static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2165                           bool neg)
2166 {
2167         int len;
2168         char tmp[TMPBUFLEN], *p = tmp;
2169
2170         sprintf(p, "%s%lu", neg ? "-" : "", val);
2171         len = strlen(tmp);
2172         if (len > *size)
2173                 len = *size;
2174         if (copy_to_user(*buf, tmp, len))
2175                 return -EFAULT;
2176         *size -= len;
2177         *buf += len;
2178         return 0;
2179 }
2180 #undef TMPBUFLEN
2181
2182 static int proc_put_char(void __user **buf, size_t *size, char c)
2183 {
2184         if (*size) {
2185                 char __user **buffer = (char __user **)buf;
2186                 if (put_user(c, *buffer))
2187                         return -EFAULT;
2188                 (*size)--, (*buffer)++;
2189                 *buf = *buffer;
2190         }
2191         return 0;
2192 }
2193
2194 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2195                                  int *valp,
2196                                  int write, void *data)
2197 {
2198         if (write) {
2199                 if (*negp) {
2200                         if (*lvalp > (unsigned long) INT_MAX + 1)
2201                                 return -EINVAL;
2202                         *valp = -*lvalp;
2203                 } else {
2204                         if (*lvalp > (unsigned long) INT_MAX)
2205                                 return -EINVAL;
2206                         *valp = *lvalp;
2207                 }
2208         } else {
2209                 int val = *valp;
2210                 if (val < 0) {
2211                         *negp = true;
2212                         *lvalp = -(unsigned long)val;
2213                 } else {
2214                         *negp = false;
2215                         *lvalp = (unsigned long)val;
2216                 }
2217         }
2218         return 0;
2219 }
2220
2221 static int do_proc_douintvec_conv(unsigned long *lvalp,
2222                                   unsigned int *valp,
2223                                   int write, void *data)
2224 {
2225         if (write) {
2226                 if (*lvalp > UINT_MAX)
2227                         return -EINVAL;
2228                 *valp = *lvalp;
2229         } else {
2230                 unsigned int val = *valp;
2231                 *lvalp = (unsigned long)val;
2232         }
2233         return 0;
2234 }
2235
2236 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2237
2238 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2239                   int write, void __user *buffer,
2240                   size_t *lenp, loff_t *ppos,
2241                   int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2242                               int write, void *data),
2243                   void *data)
2244 {
2245         int *i, vleft, first = 1, err = 0;
2246         size_t left;
2247         char *kbuf = NULL, *p;
2248         
2249         if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2250                 *lenp = 0;
2251                 return 0;
2252         }
2253         
2254         i = (int *) tbl_data;
2255         vleft = table->maxlen / sizeof(*i);
2256         left = *lenp;
2257
2258         if (!conv)
2259                 conv = do_proc_dointvec_conv;
2260
2261         if (write) {
2262                 if (proc_first_pos_non_zero_ignore(ppos, table))
2263                         goto out;
2264
2265                 if (left > PAGE_SIZE - 1)
2266                         left = PAGE_SIZE - 1;
2267                 p = kbuf = memdup_user_nul(buffer, left);
2268                 if (IS_ERR(kbuf))
2269                         return PTR_ERR(kbuf);
2270         }
2271
2272         for (; left && vleft--; i++, first=0) {
2273                 unsigned long lval;
2274                 bool neg;
2275
2276                 if (write) {
2277                         left -= proc_skip_spaces(&p);
2278
2279                         if (!left)
2280                                 break;
2281                         err = proc_get_long(&p, &left, &lval, &neg,
2282                                              proc_wspace_sep,
2283                                              sizeof(proc_wspace_sep), NULL);
2284                         if (err)
2285                                 break;
2286                         if (conv(&neg, &lval, i, 1, data)) {
2287                                 err = -EINVAL;
2288                                 break;
2289                         }
2290                 } else {
2291                         if (conv(&neg, &lval, i, 0, data)) {
2292                                 err = -EINVAL;
2293                                 break;
2294                         }
2295                         if (!first)
2296                                 err = proc_put_char(&buffer, &left, '\t');
2297                         if (err)
2298                                 break;
2299                         err = proc_put_long(&buffer, &left, lval, neg);
2300                         if (err)
2301                                 break;
2302                 }
2303         }
2304
2305         if (!write && !first && left && !err)
2306                 err = proc_put_char(&buffer, &left, '\n');
2307         if (write && !err && left)
2308                 left -= proc_skip_spaces(&p);
2309         if (write) {
2310                 kfree(kbuf);
2311                 if (first)
2312                         return err ? : -EINVAL;
2313         }
2314         *lenp -= left;
2315 out:
2316         *ppos += *lenp;
2317         return err;
2318 }
2319
2320 static int do_proc_dointvec(struct ctl_table *table, int write,
2321                   void __user *buffer, size_t *lenp, loff_t *ppos,
2322                   int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2323                               int write, void *data),
2324                   void *data)
2325 {
2326         return __do_proc_dointvec(table->data, table, write,
2327                         buffer, lenp, ppos, conv, data);
2328 }
2329
2330 static int do_proc_douintvec_w(unsigned int *tbl_data,
2331                                struct ctl_table *table,
2332                                void __user *buffer,
2333                                size_t *lenp, loff_t *ppos,
2334                                int (*conv)(unsigned long *lvalp,
2335                                            unsigned int *valp,
2336                                            int write, void *data),
2337                                void *data)
2338 {
2339         unsigned long lval;
2340         int err = 0;
2341         size_t left;
2342         bool neg;
2343         char *kbuf = NULL, *p;
2344
2345         left = *lenp;
2346
2347         if (proc_first_pos_non_zero_ignore(ppos, table))
2348                 goto bail_early;
2349
2350         if (left > PAGE_SIZE - 1)
2351                 left = PAGE_SIZE - 1;
2352
2353         p = kbuf = memdup_user_nul(buffer, left);
2354         if (IS_ERR(kbuf))
2355                 return -EINVAL;
2356
2357         left -= proc_skip_spaces(&p);
2358         if (!left) {
2359                 err = -EINVAL;
2360                 goto out_free;
2361         }
2362
2363         err = proc_get_long(&p, &left, &lval, &neg,
2364                              proc_wspace_sep,
2365                              sizeof(proc_wspace_sep), NULL);
2366         if (err || neg) {
2367                 err = -EINVAL;
2368                 goto out_free;
2369         }
2370
2371         if (conv(&lval, tbl_data, 1, data)) {
2372                 err = -EINVAL;
2373                 goto out_free;
2374         }
2375
2376         if (!err && left)
2377                 left -= proc_skip_spaces(&p);
2378
2379 out_free:
2380         kfree(kbuf);
2381         if (err)
2382                 return -EINVAL;
2383
2384         return 0;
2385
2386         /* This is in keeping with old __do_proc_dointvec() */
2387 bail_early:
2388         *ppos += *lenp;
2389         return err;
2390 }
2391
2392 static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
2393                                size_t *lenp, loff_t *ppos,
2394                                int (*conv)(unsigned long *lvalp,
2395                                            unsigned int *valp,
2396                                            int write, void *data),
2397                                void *data)
2398 {
2399         unsigned long lval;
2400         int err = 0;
2401         size_t left;
2402
2403         left = *lenp;
2404
2405         if (conv(&lval, tbl_data, 0, data)) {
2406                 err = -EINVAL;
2407                 goto out;
2408         }
2409
2410         err = proc_put_long(&buffer, &left, lval, false);
2411         if (err || !left)
2412                 goto out;
2413
2414         err = proc_put_char(&buffer, &left, '\n');
2415
2416 out:
2417         *lenp -= left;
2418         *ppos += *lenp;
2419
2420         return err;
2421 }
2422
2423 static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
2424                                int write, void __user *buffer,
2425                                size_t *lenp, loff_t *ppos,
2426                                int (*conv)(unsigned long *lvalp,
2427                                            unsigned int *valp,
2428                                            int write, void *data),
2429                                void *data)
2430 {
2431         unsigned int *i, vleft;
2432
2433         if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2434                 *lenp = 0;
2435                 return 0;
2436         }
2437
2438         i = (unsigned int *) tbl_data;
2439         vleft = table->maxlen / sizeof(*i);
2440
2441         /*
2442          * Arrays are not supported, keep this simple. *Do not* add
2443          * support for them.
2444          */
2445         if (vleft != 1) {
2446                 *lenp = 0;
2447                 return -EINVAL;
2448         }
2449
2450         if (!conv)
2451                 conv = do_proc_douintvec_conv;
2452
2453         if (write)
2454                 return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
2455                                            conv, data);
2456         return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
2457 }
2458
2459 static int do_proc_douintvec(struct ctl_table *table, int write,
2460                              void __user *buffer, size_t *lenp, loff_t *ppos,
2461                              int (*conv)(unsigned long *lvalp,
2462                                          unsigned int *valp,
2463                                          int write, void *data),
2464                              void *data)
2465 {
2466         return __do_proc_douintvec(table->data, table, write,
2467                                    buffer, lenp, ppos, conv, data);
2468 }
2469
2470 /**
2471  * proc_dointvec - read a vector of integers
2472  * @table: the sysctl table
2473  * @write: %TRUE if this is a write to the sysctl file
2474  * @buffer: the user buffer
2475  * @lenp: the size of the user buffer
2476  * @ppos: file position
2477  *
2478  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2479  * values from/to the user buffer, treated as an ASCII string. 
2480  *
2481  * Returns 0 on success.
2482  */
2483 int proc_dointvec(struct ctl_table *table, int write,
2484                      void __user *buffer, size_t *lenp, loff_t *ppos)
2485 {
2486         return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
2487 }
2488
2489 /**
2490  * proc_douintvec - read a vector of unsigned integers
2491  * @table: the sysctl table
2492  * @write: %TRUE if this is a write to the sysctl file
2493  * @buffer: the user buffer
2494  * @lenp: the size of the user buffer
2495  * @ppos: file position
2496  *
2497  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2498  * values from/to the user buffer, treated as an ASCII string.
2499  *
2500  * Returns 0 on success.
2501  */
2502 int proc_douintvec(struct ctl_table *table, int write,
2503                      void __user *buffer, size_t *lenp, loff_t *ppos)
2504 {
2505         return do_proc_douintvec(table, write, buffer, lenp, ppos,
2506                                  do_proc_douintvec_conv, NULL);
2507 }
2508
2509 /*
2510  * Taint values can only be increased
2511  * This means we can safely use a temporary.
2512  */
2513 static int proc_taint(struct ctl_table *table, int write,
2514                                void __user *buffer, size_t *lenp, loff_t *ppos)
2515 {
2516         struct ctl_table t;
2517         unsigned long tmptaint = get_taint();
2518         int err;
2519
2520         if (write && !capable(CAP_SYS_ADMIN))
2521                 return -EPERM;
2522
2523         t = *table;
2524         t.data = &tmptaint;
2525         err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2526         if (err < 0)
2527                 return err;
2528
2529         if (write) {
2530                 /*
2531                  * Poor man's atomic or. Not worth adding a primitive
2532                  * to everyone's atomic.h for this
2533                  */
2534                 int i;
2535                 for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2536                         if ((tmptaint >> i) & 1)
2537                                 add_taint(i, LOCKDEP_STILL_OK);
2538                 }
2539         }
2540
2541         return err;
2542 }
2543
2544 #ifdef CONFIG_PRINTK
2545 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
2546                                 void __user *buffer, size_t *lenp, loff_t *ppos)
2547 {
2548         if (write && !capable(CAP_SYS_ADMIN))
2549                 return -EPERM;
2550
2551         return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2552 }
2553 #endif
2554
2555 struct do_proc_dointvec_minmax_conv_param {
2556         int *min;
2557         int *max;
2558 };
2559
2560 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2561                                         int *valp,
2562                                         int write, void *data)
2563 {
2564         struct do_proc_dointvec_minmax_conv_param *param = data;
2565         if (write) {
2566                 int val;
2567                 if (*negp) {
2568                         if (*lvalp > (unsigned long) INT_MAX + 1)
2569                                 return -EINVAL;
2570                         val = -*lvalp;
2571                 } else {
2572                         if (*lvalp > (unsigned long) INT_MAX)
2573                                 return -EINVAL;
2574                         val = *lvalp;
2575                 }
2576                 if ((param->min && *param->min > val) ||
2577                     (param->max && *param->max < val))
2578                         return -EINVAL;
2579                 *valp = val;
2580         } else {
2581                 int val = *valp;
2582                 if (val < 0) {
2583                         *negp = true;
2584                         *lvalp = -(unsigned long)val;
2585                 } else {
2586                         *negp = false;
2587                         *lvalp = (unsigned long)val;
2588                 }
2589         }
2590         return 0;
2591 }
2592
2593 /**
2594  * proc_dointvec_minmax - read a vector of integers with min/max values
2595  * @table: the sysctl table
2596  * @write: %TRUE if this is a write to the sysctl file
2597  * @buffer: the user buffer
2598  * @lenp: the size of the user buffer
2599  * @ppos: file position
2600  *
2601  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2602  * values from/to the user buffer, treated as an ASCII string.
2603  *
2604  * This routine will ensure the values are within the range specified by
2605  * table->extra1 (min) and table->extra2 (max).
2606  *
2607  * Returns 0 on success.
2608  */
2609 int proc_dointvec_minmax(struct ctl_table *table, int write,
2610                   void __user *buffer, size_t *lenp, loff_t *ppos)
2611 {
2612         struct do_proc_dointvec_minmax_conv_param param = {
2613                 .min = (int *) table->extra1,
2614                 .max = (int *) table->extra2,
2615         };
2616         return do_proc_dointvec(table, write, buffer, lenp, ppos,
2617                                 do_proc_dointvec_minmax_conv, &param);
2618 }
2619
2620 struct do_proc_douintvec_minmax_conv_param {
2621         unsigned int *min;
2622         unsigned int *max;
2623 };
2624
2625 static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
2626                                          unsigned int *valp,
2627                                          int write, void *data)
2628 {
2629         struct do_proc_douintvec_minmax_conv_param *param = data;
2630
2631         if (write) {
2632                 unsigned int val = *lvalp;
2633
2634                 if ((param->min && *param->min > val) ||
2635                     (param->max && *param->max < val))
2636                         return -ERANGE;
2637
2638                 if (*lvalp > UINT_MAX)
2639                         return -EINVAL;
2640                 *valp = val;
2641         } else {
2642                 unsigned int val = *valp;
2643                 *lvalp = (unsigned long) val;
2644         }
2645
2646         return 0;
2647 }
2648
2649 /**
2650  * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
2651  * @table: the sysctl table
2652  * @write: %TRUE if this is a write to the sysctl file
2653  * @buffer: the user buffer
2654  * @lenp: the size of the user buffer
2655  * @ppos: file position
2656  *
2657  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2658  * values from/to the user buffer, treated as an ASCII string. Negative
2659  * strings are not allowed.
2660  *
2661  * This routine will ensure the values are within the range specified by
2662  * table->extra1 (min) and table->extra2 (max). There is a final sanity
2663  * check for UINT_MAX to avoid having to support wrap around uses from
2664  * userspace.
2665  *
2666  * Returns 0 on success.
2667  */
2668 int proc_douintvec_minmax(struct ctl_table *table, int write,
2669                           void __user *buffer, size_t *lenp, loff_t *ppos)
2670 {
2671         struct do_proc_douintvec_minmax_conv_param param = {
2672                 .min = (unsigned int *) table->extra1,
2673                 .max = (unsigned int *) table->extra2,
2674         };
2675         return do_proc_douintvec(table, write, buffer, lenp, ppos,
2676                                  do_proc_douintvec_minmax_conv, &param);
2677 }
2678
2679 static void validate_coredump_safety(void)
2680 {
2681 #ifdef CONFIG_COREDUMP
2682         if (suid_dumpable == SUID_DUMP_ROOT &&
2683             core_pattern[0] != '/' && core_pattern[0] != '|') {
2684                 printk(KERN_WARNING
2685 "Unsafe core_pattern used with fs.suid_dumpable=2.\n"
2686 "Pipe handler or fully qualified core dump path required.\n"
2687 "Set kernel.core_pattern before fs.suid_dumpable.\n"
2688                 );
2689         }
2690 #endif
2691 }
2692
2693 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
2694                 void __user *buffer, size_t *lenp, loff_t *ppos)
2695 {
2696         int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2697         if (!error)
2698                 validate_coredump_safety();
2699         return error;
2700 }
2701
2702 #ifdef CONFIG_COREDUMP
2703 static int proc_dostring_coredump(struct ctl_table *table, int write,
2704                   void __user *buffer, size_t *lenp, loff_t *ppos)
2705 {
2706         int error = proc_dostring(table, write, buffer, lenp, ppos);
2707         if (!error)
2708                 validate_coredump_safety();
2709         return error;
2710 }
2711 #endif
2712
2713 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2714                                      void __user *buffer,
2715                                      size_t *lenp, loff_t *ppos,
2716                                      unsigned long convmul,
2717                                      unsigned long convdiv)
2718 {
2719         unsigned long *i, *min, *max;
2720         int vleft, first = 1, err = 0;
2721         size_t left;
2722         char *kbuf = NULL, *p;
2723
2724         if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2725                 *lenp = 0;
2726                 return 0;
2727         }
2728
2729         i = (unsigned long *) data;
2730         min = (unsigned long *) table->extra1;
2731         max = (unsigned long *) table->extra2;
2732         vleft = table->maxlen / sizeof(unsigned long);
2733         left = *lenp;
2734
2735         if (write) {
2736                 if (proc_first_pos_non_zero_ignore(ppos, table))
2737                         goto out;
2738
2739                 if (left > PAGE_SIZE - 1)
2740                         left = PAGE_SIZE - 1;
2741                 p = kbuf = memdup_user_nul(buffer, left);
2742                 if (IS_ERR(kbuf))
2743                         return PTR_ERR(kbuf);
2744         }
2745
2746         for (; left && vleft--; i++, first = 0) {
2747                 unsigned long val;
2748
2749                 if (write) {
2750                         bool neg;
2751
2752                         left -= proc_skip_spaces(&p);
2753                         if (!left)
2754                                 break;
2755
2756                         err = proc_get_long(&p, &left, &val, &neg,
2757                                              proc_wspace_sep,
2758                                              sizeof(proc_wspace_sep), NULL);
2759                         if (err)
2760                                 break;
2761                         if (neg)
2762                                 continue;
2763                         val = convmul * val / convdiv;
2764                         if ((min && val < *min) || (max && val > *max)) {
2765                                 err = -EINVAL;
2766                                 break;
2767                         }
2768                         *i = val;
2769                 } else {
2770                         val = convdiv * (*i) / convmul;
2771                         if (!first) {
2772                                 err = proc_put_char(&buffer, &left, '\t');
2773                                 if (err)
2774                                         break;
2775                         }
2776                         err = proc_put_long(&buffer, &left, val, false);
2777                         if (err)
2778                                 break;
2779                 }
2780         }
2781
2782         if (!write && !first && left && !err)
2783                 err = proc_put_char(&buffer, &left, '\n');
2784         if (write && !err)
2785                 left -= proc_skip_spaces(&p);
2786         if (write) {
2787                 kfree(kbuf);
2788                 if (first)
2789                         return err ? : -EINVAL;
2790         }
2791         *lenp -= left;
2792 out:
2793         *ppos += *lenp;
2794         return err;
2795 }
2796
2797 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2798                                      void __user *buffer,
2799                                      size_t *lenp, loff_t *ppos,
2800                                      unsigned long convmul,
2801                                      unsigned long convdiv)
2802 {
2803         return __do_proc_doulongvec_minmax(table->data, table, write,
2804                         buffer, lenp, ppos, convmul, convdiv);
2805 }
2806
2807 /**
2808  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2809  * @table: the sysctl table
2810  * @write: %TRUE if this is a write to the sysctl file
2811  * @buffer: the user buffer
2812  * @lenp: the size of the user buffer
2813  * @ppos: file position
2814  *
2815  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2816  * values from/to the user buffer, treated as an ASCII string.
2817  *
2818  * This routine will ensure the values are within the range specified by
2819  * table->extra1 (min) and table->extra2 (max).
2820  *
2821  * Returns 0 on success.
2822  */
2823 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2824                            void __user *buffer, size_t *lenp, loff_t *ppos)
2825 {
2826     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2827 }
2828
2829 /**
2830  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2831  * @table: the sysctl table
2832  * @write: %TRUE if this is a write to the sysctl file
2833  * @buffer: the user buffer
2834  * @lenp: the size of the user buffer
2835  * @ppos: file position
2836  *
2837  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2838  * values from/to the user buffer, treated as an ASCII string. The values
2839  * are treated as milliseconds, and converted to jiffies when they are stored.
2840  *
2841  * This routine will ensure the values are within the range specified by
2842  * table->extra1 (min) and table->extra2 (max).
2843  *
2844  * Returns 0 on success.
2845  */
2846 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2847                                       void __user *buffer,
2848                                       size_t *lenp, loff_t *ppos)
2849 {
2850     return do_proc_doulongvec_minmax(table, write, buffer,
2851                                      lenp, ppos, HZ, 1000l);
2852 }
2853
2854
2855 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2856                                          int *valp,
2857                                          int write, void *data)
2858 {
2859         if (write) {
2860                 if (*lvalp > INT_MAX / HZ)
2861                         return 1;
2862                 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2863         } else {
2864                 int val = *valp;
2865                 unsigned long lval;
2866                 if (val < 0) {
2867                         *negp = true;
2868                         lval = -(unsigned long)val;
2869                 } else {
2870                         *negp = false;
2871                         lval = (unsigned long)val;
2872                 }
2873                 *lvalp = lval / HZ;
2874         }
2875         return 0;
2876 }
2877
2878 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2879                                                 int *valp,
2880                                                 int write, void *data)
2881 {
2882         if (write) {
2883                 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2884                         return 1;
2885                 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2886         } else {
2887                 int val = *valp;
2888                 unsigned long lval;
2889                 if (val < 0) {
2890                         *negp = true;
2891                         lval = -(unsigned long)val;
2892                 } else {
2893                         *negp = false;
2894                         lval = (unsigned long)val;
2895                 }
2896                 *lvalp = jiffies_to_clock_t(lval);
2897         }
2898         return 0;
2899 }
2900
2901 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
2902                                             int *valp,
2903                                             int write, void *data)
2904 {
2905         if (write) {
2906                 unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2907
2908                 if (jif > INT_MAX)
2909                         return 1;
2910                 *valp = (int)jif;
2911         } else {
2912                 int val = *valp;
2913                 unsigned long lval;
2914                 if (val < 0) {
2915                         *negp = true;
2916                         lval = -(unsigned long)val;
2917                 } else {
2918                         *negp = false;
2919                         lval = (unsigned long)val;
2920                 }
2921                 *lvalp = jiffies_to_msecs(lval);
2922         }
2923         return 0;
2924 }
2925
2926 /**
2927  * proc_dointvec_jiffies - read a vector of integers as seconds
2928  * @table: the sysctl table
2929  * @write: %TRUE if this is a write to the sysctl file
2930  * @buffer: the user buffer
2931  * @lenp: the size of the user buffer
2932  * @ppos: file position
2933  *
2934  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2935  * values from/to the user buffer, treated as an ASCII string. 
2936  * The values read are assumed to be in seconds, and are converted into
2937  * jiffies.
2938  *
2939  * Returns 0 on success.
2940  */
2941 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2942                           void __user *buffer, size_t *lenp, loff_t *ppos)
2943 {
2944     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2945                             do_proc_dointvec_jiffies_conv,NULL);
2946 }
2947
2948 /**
2949  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2950  * @table: the sysctl table
2951  * @write: %TRUE if this is a write to the sysctl file
2952  * @buffer: the user buffer
2953  * @lenp: the size of the user buffer
2954  * @ppos: pointer to the file position
2955  *
2956  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2957  * values from/to the user buffer, treated as an ASCII string. 
2958  * The values read are assumed to be in 1/USER_HZ seconds, and 
2959  * are converted into jiffies.
2960  *
2961  * Returns 0 on success.
2962  */
2963 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2964                                  void __user *buffer, size_t *lenp, loff_t *ppos)
2965 {
2966     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2967                             do_proc_dointvec_userhz_jiffies_conv,NULL);
2968 }
2969
2970 /**
2971  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2972  * @table: the sysctl table
2973  * @write: %TRUE if this is a write to the sysctl file
2974  * @buffer: the user buffer
2975  * @lenp: the size of the user buffer
2976  * @ppos: file position
2977  * @ppos: the current position in the file
2978  *
2979  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2980  * values from/to the user buffer, treated as an ASCII string. 
2981  * The values read are assumed to be in 1/1000 seconds, and 
2982  * are converted into jiffies.
2983  *
2984  * Returns 0 on success.
2985  */
2986 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2987                              void __user *buffer, size_t *lenp, loff_t *ppos)
2988 {
2989         return do_proc_dointvec(table, write, buffer, lenp, ppos,
2990                                 do_proc_dointvec_ms_jiffies_conv, NULL);
2991 }
2992
2993 static int proc_do_cad_pid(struct ctl_table *table, int write,
2994                            void __user *buffer, size_t *lenp, loff_t *ppos)
2995 {
2996         struct pid *new_pid;
2997         pid_t tmp;
2998         int r;
2999
3000         tmp = pid_vnr(cad_pid);
3001
3002         r = __do_proc_dointvec(&tmp, table, write, buffer,
3003                                lenp, ppos, NULL, NULL);
3004         if (r || !write)
3005                 return r;
3006
3007         new_pid = find_get_pid(tmp);
3008         if (!new_pid)
3009                 return -ESRCH;
3010
3011         put_pid(xchg(&cad_pid, new_pid));
3012         return 0;
3013 }
3014
3015 /**
3016  * proc_do_large_bitmap - read/write from/to a large bitmap
3017  * @table: the sysctl table
3018  * @write: %TRUE if this is a write to the sysctl file
3019  * @buffer: the user buffer
3020  * @lenp: the size of the user buffer
3021  * @ppos: file position
3022  *
3023  * The bitmap is stored at table->data and the bitmap length (in bits)
3024  * in table->maxlen.
3025  *
3026  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
3027  * large bitmaps may be represented in a compact manner. Writing into
3028  * the file will clear the bitmap then update it with the given input.
3029  *
3030  * Returns 0 on success.
3031  */
3032 int proc_do_large_bitmap(struct ctl_table *table, int write,
3033                          void __user *buffer, size_t *lenp, loff_t *ppos)
3034 {
3035         int err = 0;
3036         bool first = 1;
3037         size_t left = *lenp;
3038         unsigned long bitmap_len = table->maxlen;
3039         unsigned long *bitmap = *(unsigned long **) table->data;
3040         unsigned long *tmp_bitmap = NULL;
3041         char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
3042
3043         if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
3044                 *lenp = 0;
3045                 return 0;
3046         }
3047
3048         if (write) {
3049                 char *kbuf, *p;
3050
3051                 if (left > PAGE_SIZE - 1)
3052                         left = PAGE_SIZE - 1;
3053
3054                 p = kbuf = memdup_user_nul(buffer, left);
3055                 if (IS_ERR(kbuf))
3056                         return PTR_ERR(kbuf);
3057
3058                 tmp_bitmap = kzalloc(BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long),
3059                                      GFP_KERNEL);
3060                 if (!tmp_bitmap) {
3061                         kfree(kbuf);
3062                         return -ENOMEM;
3063                 }
3064                 proc_skip_char(&p, &left, '\n');
3065                 while (!err && left) {
3066                         unsigned long val_a, val_b;
3067                         bool neg;
3068
3069                         err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
3070                                              sizeof(tr_a), &c);
3071                         if (err)
3072                                 break;
3073                         if (val_a >= bitmap_len || neg) {
3074                                 err = -EINVAL;
3075                                 break;
3076                         }
3077
3078                         val_b = val_a;
3079                         if (left) {
3080                                 p++;
3081                                 left--;
3082                         }
3083
3084                         if (c == '-') {
3085                                 err = proc_get_long(&p, &left, &val_b,
3086                                                      &neg, tr_b, sizeof(tr_b),
3087                                                      &c);
3088                                 if (err)
3089                                         break;
3090                                 if (val_b >= bitmap_len || neg ||
3091                                     val_a > val_b) {
3092                                         err = -EINVAL;
3093                                         break;
3094                                 }
3095                                 if (left) {
3096                                         p++;
3097                                         left--;
3098                                 }
3099                         }
3100
3101                         bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
3102                         first = 0;
3103                         proc_skip_char(&p, &left, '\n');
3104                 }
3105                 kfree(kbuf);
3106         } else {
3107                 unsigned long bit_a, bit_b = 0;
3108
3109                 while (left) {
3110                         bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
3111                         if (bit_a >= bitmap_len)
3112                                 break;
3113                         bit_b = find_next_zero_bit(bitmap, bitmap_len,
3114                                                    bit_a + 1) - 1;
3115
3116                         if (!first) {
3117                                 err = proc_put_char(&buffer, &left, ',');
3118                                 if (err)
3119                                         break;
3120                         }
3121                         err = proc_put_long(&buffer, &left, bit_a, false);
3122                         if (err)
3123                                 break;
3124                         if (bit_a != bit_b) {
3125                                 err = proc_put_char(&buffer, &left, '-');
3126                                 if (err)
3127                                         break;
3128                                 err = proc_put_long(&buffer, &left, bit_b, false);
3129                                 if (err)
3130                                         break;
3131                         }
3132
3133                         first = 0; bit_b++;
3134                 }
3135                 if (!err)
3136                         err = proc_put_char(&buffer, &left, '\n');
3137         }
3138
3139         if (!err) {
3140                 if (write) {
3141                         if (*ppos)
3142                                 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
3143                         else
3144                                 bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
3145                 }
3146                 kfree(tmp_bitmap);
3147                 *lenp -= left;
3148                 *ppos += *lenp;
3149                 return 0;
3150         } else {
3151                 kfree(tmp_bitmap);
3152                 return err;
3153         }
3154 }
3155
3156 #else /* CONFIG_PROC_SYSCTL */
3157
3158 int proc_dostring(struct ctl_table *table, int write,
3159                   void __user *buffer, size_t *lenp, loff_t *ppos)
3160 {
3161         return -ENOSYS;
3162 }
3163
3164 int proc_dointvec(struct ctl_table *table, int write,
3165                   void __user *buffer, size_t *lenp, loff_t *ppos)
3166 {
3167         return -ENOSYS;
3168 }
3169
3170 int proc_douintvec(struct ctl_table *table, int write,
3171                   void __user *buffer, size_t *lenp, loff_t *ppos)
3172 {
3173         return -ENOSYS;
3174 }
3175
3176 int proc_dointvec_minmax(struct ctl_table *table, int write,
3177                     void __user *buffer, size_t *lenp, loff_t *ppos)
3178 {
3179         return -ENOSYS;
3180 }
3181
3182 int proc_douintvec_minmax(struct ctl_table *table, int write,
3183                           void __user *buffer, size_t *lenp, loff_t *ppos)
3184 {
3185         return -ENOSYS;
3186 }
3187
3188 int proc_dointvec_jiffies(struct ctl_table *table, int write,
3189                     void __user *buffer, size_t *lenp, loff_t *ppos)
3190 {
3191         return -ENOSYS;
3192 }
3193
3194 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3195                     void __user *buffer, size_t *lenp, loff_t *ppos)
3196 {
3197         return -ENOSYS;
3198 }
3199
3200 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3201                              void __user *buffer, size_t *lenp, loff_t *ppos)
3202 {
3203         return -ENOSYS;
3204 }
3205
3206 int proc_doulongvec_minmax(struct ctl_table *table, int write,
3207                     void __user *buffer, size_t *lenp, loff_t *ppos)
3208 {
3209         return -ENOSYS;
3210 }
3211
3212 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
3213                                       void __user *buffer,
3214                                       size_t *lenp, loff_t *ppos)
3215 {
3216     return -ENOSYS;
3217 }
3218
3219
3220 #endif /* CONFIG_PROC_SYSCTL */
3221
3222 /*
3223  * No sense putting this after each symbol definition, twice,
3224  * exception granted :-)
3225  */
3226 EXPORT_SYMBOL(proc_dointvec);
3227 EXPORT_SYMBOL(proc_douintvec);
3228 EXPORT_SYMBOL(proc_dointvec_jiffies);
3229 EXPORT_SYMBOL(proc_dointvec_minmax);
3230 EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
3231 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3232 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3233 EXPORT_SYMBOL(proc_dostring);
3234 EXPORT_SYMBOL(proc_doulongvec_minmax);
3235 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);