diff -urN linux-2.4.20/Documentation/Configure.help linux-2.4.20/Documentation/Configure.help --- linux-2.4.20/Documentation/Configure.help 2002-11-28 18:53:08.000000000 -0500 +++ linux-2.4.20/Documentation/Configure.help 2003-04-06 15:55:38.000000000 -0400 @@ -2569,6 +2569,20 @@ If you want to compile it as a module, say M here and read Documentation/modules.txt. If unsure, say `N'. +stealth networking support +CONFIG_IP_NF_MATCH_STEALTH + Enabling this option will drop all syn packets coming to unserved tcp + ports as well as all packets coming to unserved udp ports. If you + are using your system to route any type of packets (ie. via NAT) + you should put this module at the end of your ruleset, since it will + drop packets that aren't going to ports that are listening on your + machine itself, it doesn't take into account that the packet might be + destined for someone on your internal network if you're using NAT for + instance. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + MAC address match support CONFIG_IP_NF_MATCH_MAC MAC matching allows you to match packets based on the source @@ -21427,6 +21441,865 @@ "Area6" will work for most boards. For ADX, select "Area5". +Grsecurity +CONFIG_GRKERNSEC + If you say Y here, you will be able to configure many features that + will enhance the security of your system. It is highly recommended + that you say Y here and read through the help for each option so + you fully understand the features and can evaluate their usefulness + for your machine. + +Additional security levels +CONFIG_GRKERNSEC_LOW + + Low additional security + ----------------------------------------------------------------------- + If you choose this option, several of the grsecurity options will + be enabled that will give you greater protection against a number + of attacks, while assuring that none of your software will have any + conflicts with the additional security measures. If you run a lot of + unusual software, or you are having problems with the higher security + levels, you should say Y here. With this option, the following features + are enabled: + + linking restrictions + fifo restrictions + random pids + enforcing nproc on execve() + restricted dmesg + random ip ids + enforced chdir("/") on chroot + + Medium additional security + ----------------------------------------------------------------------- + If you say Y here, several features in addition to those included in the + low additional security level will be enabled. These features provide + even more security to your system, though in rare cases they may + be incompatible with very old or poorly written software. If you + enable this option, make sure that your auth service (identd) is + running as gid 10 (usually group wheel). With this option the following + features (in addition to those provided in the low additional security + level) will be enabled: + + random tcp source ports + altered ping ids + failed fork logging + time change logging + signal logging + deny mounts in chroot + deny double chrooting + deny sysctl writes in chroot + deny mknod in chroot + deny access to abstract AF_UNIX sockets out of chroot + deny pivot_root in chroot + denied writes of /dev/kmem, /dev/mem, and /dev/port + /proc restrictions with special gid set to 10 (usually wheel) + address space layout randomization + + High additional security + ---------------------------------------------------------------------- + If you say Y here, many of the features of grsecurity will be enabled, + that will protect you against virtually all kinds of attacks against + your system. The much hightened security comes at a cost of an + increased chance of incompatabilities with rare software on your + machine. It is highly recommended that you view + and read about each option. Since + this security level enabled PaX, you should also view + and read about the PaX project. While + you are there, download chpax.c and run chpax -p on binaries that cause + problems with PaX. Also remember that since the /proc restrictions are + enabled, you must run your identd as group wheel (gid 10). + This security level enables the following features in addition to those + listed in the low and medium security levels: + + additional /proc restrictions + chmod restrictions in chroot + no signals, ptrace, or viewing processes outside of chroot + capability restrictions in chroot + deny fchdir out of chroot + priority restrictions in chroot + segmentation-based implementation of PaX + mprotect restrictions + removal of /proc//[maps|mem] + kernel stack randomization + mount/unmount/remount logging + kernel symbol hiding + +Customized additional security +CONFIG_GRKERNSEC_CUSTOM + If you say Y here, you will be able to configure every grsecurity + option, which allows you to enable many more features that aren't + covered in the basic security levels. These additional features include + TPE, socket restrictions, and the sysctl system for grsecurity. It is + advised that you read through the help for each option to determine its + usefulness in your situation. + +Enforce non-executable pages +CONFIG_GRKERNSEC_PAX_NOEXEC + By design some architectures do not allow for protecting memory + pages against execution or even if they do, Linux does not make + use of this feature. In practice this means that if a page is + readable (such as the stack or heap) it is also executable. + + There is a well known exploit technique that makes use of this + fact and a common programming mistake where an attacker can + introduce code of his choice somewhere in the attacked program's + memory (typically the stack or the heap) and then execute it. + + If the attacked program was running with different (typically + higher) privileges than that of the attacker, then he can elevate + his own privilege level (e.g. get a root shell, write to files for + which he does not have write access to, etc). + + Enabling this option will let you choose from various features + that prevent the injection and execution of 'foreign' code in + a program. + + This will also break programs that rely on the old behaviour and + expect that dynamically allocated memory via the malloc() family + of functions is executable (which it is not). Notable examples + are the XFree86 4.x server, the java runtime and wine. + + NOTE: you can use the 'chpax' utility to enable/disable this + feature on a per file basis. chpax is available at + + +Paging based non-executable pages +CONFIG_GRKERNSEC_PAX_PAGEEXEC + This implementation is based on the paging feature of the CPU. + On i386 it has a variable performance impact on applications + depending on their memory usage pattern. You should carefully + test your applications before using this feature in production. + On alpha, parisc, sparc and sparc64 there is no performance + impact. + +Segmentation based non-executable pages +CONFIG_GRKERNSEC_PAX_SEGMEXEC + This implementation is based on the segmentation feature of the + CPU and has little performance impact, however applications will + be limited to a 1.5 GB address space instead of the normal 3 GB. + +Emulate trampolines +CONFIG_GRKERNSEC_PAX_EMUTRAMP + There are some programs and libraries that for one reason or + another attempt to execute special small code snippets from + non-executable memory pages. Most notable examples are the + signal handler return code generated by the kernel itself and + the GCC trampolines. + + If you enabled CONFIG_GRKERNSEC_PAX_PAGEEXEC or + CONFIG_GRKERNSEC_PAX_SEGMEXEC then such programs will no longer + work under your kernel. + + As a remedy you can say Y here and use the 'chpax' utility to + enable trampoline emulation for the affected programs yet still + have the protection provided by the non-executable pages. + + Alternatively you can say N here and use the 'chpax' utility + to disable CONFIG_GRKERNSEC_PAX_PAGEEXEC and + CONFIG_GRKERNSEC_PAX_SEGMEXEC for the affected files. + + NOTE: enabling this feature *may* open up a loophole in the + protection provided by non-executable pages that an attacker + could abuse. Therefore the best solution is to not have any + files on your system that would require this option. This can + be achieved by not using libc5 (which relies on the kernel + signal handler return code) and not using or rewriting programs + that make use of the nested function implementation of GCC. + Skilled users can just fix GCC itself so that it implements + nested function calls in a way that does not interfere with PaX. + +Automatically emulate sigreturn trampolines +CONFIG_GRKERNSEC_PAX_EMUSIGRT + Enabling this option will have the kernel automatically detect + and emulate signal return trampolines executing on the stack + that would otherwise lead to task termination. + + This solution is intended as a temporary one for users with + legacy versions of libc (libc5, glibc 2.0, uClibc before 0.9.17, + Modula-3 runtime, etc) or executables linked to such, basically + everything that does not specify its own SA_RESTORER function in + normal executable memory like glibc 2.1+ does. + + NOTE: this feature cannot be disabled on a per executable basis + and since it *does* open up a loophole in the protection provided + by non-executable pages, the best solution is to not have any + files on your system that would require this option. + +Restrict mprotect() +CONFIG_GRKERNSEC_PAX_MPROTECT + Enabling this option will prevent programs from + - changing the executable status of memory pages that were + not originally created as executable, + - making read-only executable pages writable again, + - creating executable pages from anonymous memory. + + You should say Y here to complete the protection provided by + the enforcement of non-executable pages. + + NOTE: you can use the 'chpax' utility to control this + feature on a per file basis. chpax is available at + + +Disallow ELF text relocations +CONFIG_GRKERNSEC_PAX_NOELFRELOCS + Non-executable pages and mprotect() restrictions are effective + in preventing the introduction of new executable code into an + attacked task's address space. There remain only two venues + for this kind of attack: if the attacker can execute already + existing code in the attacked task then he can either have it + create and mmap() a file containing his code or have it mmap() + an already existing ELF library that does not have position + independent code in it and use mprotect() on it to make it + writable and copy his code there. While protecting against + the former approach is beyond PaX, the latter can be prevented + by having only PIC ELF libraries on one's system (which do not + need to relocate their code). If you are sure this is your case, + then enable this option otherwise be careful as you may not even + be able to boot or log on your system (for example, some PAM + modules are erroneously compiled as non-PIC by default). + + NOTE: if you are using dymamic ELF executables (as suggested + when using ASLR) then you must have made sure that you linked + your files using the PIC version of crt1 (the et_dyn.zip package + referenced there has already been updated to support this). + +Enforce non-executable kernel pages +CONFIG_GRKERNSEC_PAX_KERNEXEC + This is the kernel land equivalent of PAGEEXEC and MPROTECT, + that is, enabling this option will make it harder to inject + and execute 'foreign' code in kernel memory itself. + +Address Space Layout Randomization +CONFIG_GRKERNSEC_PAX_ASLR + Many if not most exploit techniques rely on the knowledge of + certain addresses in the attacked program. The following options + will allow the kernel to apply a certain amount of randomization + to specific parts of the program thereby forcing an attacker to + guess them in most cases. Any failed guess will most likely crash + the attacked program which allows the kernel to detect such attempts + and react on them. PaX itself provides no reaction mechanisms, + instead it is strongly encouraged that you make use of grsecurity's + built-in crash detection features or develop one yourself. + + By saying Y here you can choose to randomize the following areas: + - top of the task's kernel stack + - top of the task's userland stack + - base address for mmap() requests that do not specify one + (this includes all libraries) + - base address of the main executable + + It is strongly recommended to say Y here as address space layout + randomization has negligible impact on performance yet it provides + a very effective protection. + + NOTE: you can use the 'chpax' utility to control most of these features + on a per file basis. + +Randomize kernel stack base +CONFIG_GRKERNSEC_PAX_RANDKSTACK + By saying Y here the kernel will randomize every task's kernel + stack on every system call. This will not only force an attacker + to guess it but also prevent him from making use of possible + leaked information about it. + + Since the kernel stack is a rather scarce resource, randomization + may cause unexpected stack overflows, therefore you should very + carefully test your system. Note that once enabled in the kernel + configuration, this feature cannot be disabled on a per file basis. + +Randomize user stack base +CONFIG_GRKERNSEC_PAX_RANDUSTACK + By saying Y here the kernel will randomize every task's userland + stack. The randomization is done in two steps where the second + one may apply a big amount of shift to the top of the stack and + cause problems for programs that want to use lots of memory (more + than 2.5 GB if SEGMEXEC is not active, or 1.25 GB when it is). + For this reason the second step can be controlled by 'chpax' on + a per file basis. + +Randomize ET_EXEC base +CONFIG_GRKERNSEC_PAX_RANDEXEC + By saying Y here the kernel will randomize the base address of normal + ET_EXEC ELF executables as well. This is accomplished by mapping the + executable in memory in a special way which also allows for detecting + attackers who attempt to execute its code for their purposes. Since + this special mapping causes performance degradation and the attack + detection may create false alarms as well, you should carefully test + your executables when this feature is enabled. + + This solution is intended only as a temporary one until you relink + your programs as a dynamic ELF file. + + NOTE: you can use the 'chpax' utility to control this feature + on a per file basis. + +Allow ELF ET_EXEC text relocations +CONFIG_GRKERNSEC_PAX_ETEXECRELOCS + On some architectures like the alpha there are incorrectly + created applications that require text relocations and would + not work without enabling this option. If you are an alpha + user, you should enable this option and disable it once you + have made sure that none of your applications need it. + +Automatically emulate ELF PLT +CONFIG_GRKERNSEC_PAX_EMUPLT + Enabling this option will have the kernel automatically detect + and emulate the Procedure Linkage Table entries in ELF files. + On some architectures such entries are in writable memory, and + become non-executable leading to task termination. Therefore + it is mandatory that you enable this option on alpha, sparc and + sparc64, otherwise your system would not even boot. + + NOTE: this feature *does* open up a loophole in the protection + provided by the non-executable pages, therefore the proper + solution is to modify the toolchain to produce a PLT that does + not need to be writable. + + +Randomize mmap() base +CONFIG_GRKERNSEC_PAX_RANDMMAP + By saying Y here the kernel will use a randomized base address for + mmap() requests that do not specify one themselves. As a result + all dynamically loaded libraries will appear at random addresses + and therefore be harder to exploit by a technique where an attacker + attempts to execute library code for his purposes (e.g. spawn a + shell from an exploited program that is running at an elevated + privilege level). + + Furthermore, if a program is relinked as a dynamic ELF file, its + base address will be randomized as well, completing the full + randomization of the address space layout. Attacking such programs + becomes a guess game. You can find an example of doing this at + and practical samples at + . + + NOTE: you can use the 'chpax' utility to control this feature + on a per file basis. + +Deny writing to /dev/kmem, /dev/mem, and /dev/port +CONFIG_GRKERNSEC_KMEM + If you say Y here, /dev/kmem and /dev/mem won't be allowed to + be written to via mmap or otherwise to modify the running kernel. + /dev/port will also not be allowed to be opened. If you have module + support disabled, enabling this will close up four ways that are + currently used to insert malicious code into the running kernel. + Even with all these features enabled, we still highly recommend that + you use the ACL system, as it is still possible for an attacker to + modify the running kernel through privileged I/O granted by ioperm/iopl. + If you are not using XFree86, you may be able to stop this additional + case by enabling the 'Disable privileged I/O' option. Though nothing + legitimately writes to /dev/kmem, XFree86 does need to write to /dev/mem, + but only to video memory, which is the only writing we allow in this + case. If /dev/kmem or /dev/mem are mmaped without PROT_WRITE, they will + not be allowed to mprotect it with PROT_WRITE later. + Enabling this feature could make certain apps like VMWare stop working, + as they need to write to other locations in /dev/mem. + There are a few video cards that require write access to the BIOS, + one of which is the Savage. If you have this video card, you must say + N here, or Xfree86 will not function. + It is highly recommended that you say Y here if you meet all the + conditions above. + +Disable privileged I/O +CONFIG_GRKERNSEC_IO + If you say Y here, all ioperm and iopl calls will return an error. + Ioperm and iopl can be used to modify the running kernel. + Unfortunately, some problems need this access to operate properly, + the most notable of which are XFree86 and hwclock. hwclock can be + remedied by having RTC support in the kernel, so CONFIG_RTC is + enabled if this option is enabled, to ensure that hwclock operates + correctly. XFree86 still will not operate correctly with this option + enabled, so DO NOT CHOOSE Y IF YOU USE XFree86. If you use XFree86 + and you still want to protect your kernel against modification, + use the ACL system. + +Hide kernel symbols +CONFIG_GRKERNSEC_HIDESYM + If you say Y here, getting information on loaded modules, and + displaying all kernel symbols through a syscall will be restricted + to users with CAP_SYS_MODULE. This option is only effective + provided the following conditions are met: + 1) The kernel using grsecurity is not precompiled by some distribution + 2) You are using the ACL system and hiding other files such as your + kernel image and System.map + 3) You have the additional /proc restrictions enabled, which removes + /proc/kcore + If the above conditions are met, this option will aid to provide a + useful protection against local and remote kernel exploitation of + overflows and arbitrary read/write vulnerabilities. + +Proc Restrictions +CONFIG_GRKERNSEC_PROC + If you say Y here, the permissions of the /proc filesystem + will be altered to enhance system security and privacy. Depending + upon the options you choose, you can either restrict users to see + only the processes they themselves run, or choose a group that can + view all processes and files normally restricted to root if you choose + the "restrict to user only" option. NOTE: If you're running identd as + a non-root user, you will have to run it as the group you specify here. + +Restrict /proc to user only +CONFIG_GRKERNSEC_PROC_USER + If you say Y here, non-root users will only be able to view their own + processes, and restricts them from viewing network-related information, + and viewing kernel symbol and module information. + +Restrict /proc to user and group +CONFIG_GRKERNSEC_PROC_USERGROUP + If you say Y here, you will be able to select a group that will be + able to view all processes, network-related information, and + kernel and symbol information. This option is useful if you want + to run identd as a non-root user. + +Remove addresses from /proc/pid/maps +CONFIG_GRKERNSEC_PROC_MEMMAP + If you say Y here, the /proc//maps file will + give no information about the addresses of its mappings if + PaX features that rely on random addresses are enabled on the task. + If you use PaX it is greatly recommended that you say Y here as it + closes up a hole that makes the full ASLR useless for suid + binaries. + +Additional proc restrictions +CONFIG_GRKERNSEC_PROC_ADD + If you say Y here, additional restrictions will be placed on + /proc that keep normal users from viewing cpu and device information. + +Dmesg(8) Restriction +CONFIG_GRKERNSEC_DMESG + If you say Y here, non-root users will not be able to use dmesg(8) + to view up to the last 4kb of messages in the kernel's log buffer. + If the sysctl option is enabled, a sysctl option with name "dmesg" is + created. + +Linking restrictions +CONFIG_GRKERNSEC_LINK + If you say Y here, /tmp race exploits will be prevented, since users + will no longer be able to follow symlinks owned by other users in + world-writeable +t directories (i.e. /tmp), unless the owner of the + symlink is the owner of the directory. users will also not be + able to hardlink to files they do not own. If the sysctl option is + enabled, a sysctl option with name "linking_restrictions" is created. + +FIFO restrictions +CONFIG_GRKERNSEC_FIFO + If you say Y here, users will not be able to write to FIFOs they don't + own in world-writeable +t directories (i.e. /tmp), unless the owner of + the FIFO is the same owner of the directory it's held in. If the sysctl + option is enabled, a sysctl option with name "fifo_restrictions" is + created. + +Enforce RLIMIT_NPROC on execs +CONFIG_GRKERNSEC_EXECVE + If you say Y here, users with a resource limit on processes will + have the value checked during execve() calls. The current system + only checks the system limit during fork() calls. If the sysctl option + is enabled, a sysctl option with name "execve_limiting" is created. + +Single group for auditing +CONFIG_GRKERNSEC_AUDIT_GROUP + If you say Y here, the exec, chdir, (un)mount, and ipc logging features + will only operate on a group you specify. This option is recommended + if you only want to watch certain users instead of having a large + amount of logs from the entire system. If the sysctl option is enabled, + a sysctl option with name "audit_group" is created. + +GID for auditing +CONFIG_GRKERNSEC_AUDIT_GID + Here you can choose the GID that will be the target of kernel auditing. + Remember to add the users you want to log to the GID specified here. + If the sysctl option is enabled, whatever you choose here won't matter. + You'll have to specify the GID in your bootup script by echoing the GID + to the proper /proc entry. View the help on the sysctl option for more + information. If the sysctl option is enabled, a sysctl option with name + "audit_gid" is created. + +Chdir logging +CONFIG_GRKERNSEC_AUDIT_CHDIR + If you say Y here, all chdir() calls will be logged. If the sysctl + option is enabled, a sysctl option with name "audit_chdir" is created. + +(Un)Mount logging +CONFIG_GRKERNSEC_AUDIT_MOUNT + If you say Y here, all mounts and unmounts will be logged. If the + sysctl option is enabled, a sysctl option with name "audit_mount" is + created. + +IPC logging +CONFIG_GRKERNSEC_AUDIT_IPC + If you say Y here, creation and removal of message queues, semaphores, + and shared memory will be logged. If the sysctl option is enabled, a + sysctl option with name "audit_ipc" is created. + +Exec logging +CONFIG_GRKERNSEC_EXECLOG + If you say Y here, all execve() calls will be logged (since the + other exec*() calls are frontends to execve(), all execution + will be logged). Useful for shell-servers that like to keep track + of their users. If the sysctl option is enabled, a sysctl option with + name "exec_logging" is created. + WARNING: This option when enabled will produce a LOT of logs, especially + on an active system. + +Resource logging +CONFIG_GRKERNSEC_RESLOG + If you say Y here, all attempts to overstep resource limits will + be logged with the resource name, the requested size, and the current + limit. It is highly recommended that you say Y here. + +Signal logging +CONFIG_GRKERNSEC_SIGNAL + If you say Y here, certain important signals will be logged, such as + SIGSEGV, which will as a result inform you of when a error in a program + occurred, which in some cases could mean a possible exploit attempt. + If the sysctl option is enabled, a sysctl option with name + "signal_logging" is created. + +Fork failure logging +CONFIG_GRKERNSEC_FORKFAIL + If you say Y here, all failed fork() attempts will be logged. + This could suggest a fork bomb, or someone attempting to overstep + their process limit. If the sysctl option is enabled, a sysctl option + with name "forkfail_logging" is created. + +Time change logging +CONFIG_GRKERNSEC_TIME + If you say Y here, any changes of the system clock will be logged. + If the sysctl option is enabled, a sysctl option with name + "timechange_logging" is created. + +Chroot jail restrictions +CONFIG_GRKERNSEC_CHROOT + If you say Y here, you will be able to choose several options that will + make breaking out of a chrooted jail much more difficult. If you + encounter no software incompatibilities with the following options, it + is recommended that you enable each one. + +Deny access to abstract AF_UNIX sockets out of chroot +CONFIG_GRKERNSEC_CHROOT_UNIX + If you say Y here, processes inside a chroot will not be able to + connect to abstract (meaning not belonging to a filesystem) Unix + domain sockets that were bound outside of a chroot. It is recommended + that you say Y here. If the sysctl option is enabled, a sysctl option + with name "chroot_deny_unix" is created. + +Deny shmat() out of chroot +CONFIG_GRKERNSEC_CHROOT_SHMAT + If you say Y here, processes inside a chroot will not be able to attach + to shared memory segments that were created outside of the chroot jail. + It is recommended that you say Y here. If the sysctl option is enabled, + a sysctl option with name "chroot_deny_shmat" is created. + +Protect outside processes +CONFIG_GRKERNSEC_CHROOT_FINDTASK + If you say Y here, processes inside a chroot will not be able to + kill, send signals with fcntl, ptrace, capget, setpgid, getpgid, + getsid, or view any process outside of the chroot. If the sysctl + option is enabled, a sysctl option with name "chroot_findtask" is + created. + +Deny mounts in chroot +CONFIG_GRKERNSEC_CHROOT_MOUNT + If you say Y here, processes inside a chroot will not be able to + mount or remount filesystems. If the sysctl option is enabled, a + sysctl option with name "chroot_deny_mount" is created. + +Deny pivot_root in chroot +CONFIG_GRKERNSEC_CHROOT_PIVOT + If you say Y here, processes inside a chroot will not be able to use + a function called pivot_root() that was introduced in Linux 2.3.41. It + works similar to chroot in that it changes the root filesystem. This + function could be misused in a chrooted process to attempt to break out + of the chroot, and therefore should not be allowed. If the sysctl + option is enabled, a sysctl option with name "chroot_deny_pivot" is + created. + +Deny double-chroots +CONFIG_GRKERNSEC_CHROOT_DOUBLE + If you say Y here, processes inside a chroot will not be able to chroot + again. This is a widely used method of breaking out of a chroot jail + and should not be allowed. If the sysctl option is enabled, a sysctl + option with name "chroot_deny_chroot" is created. + +Deny fchdir outside of chroot +CONFIG_GRKERNSEC_CHROOT_FCHDIR + If you say Y here, a well-known method of breaking chroots by fchdir'ing + to a file descriptor of the chrooting process that points to a directory + outside the filesystem will be stopped. If the sysctl option + is enabled, a sysctl option with name "chroot_deny_fchdir" is created. + +Enforce chdir("/") on all chroots +CONFIG_GRKERNSEC_CHROOT_CHDIR + If you say Y here, the current working directory of all newly-chrooted + applications will be set to the the root directory of the chroot. + The man page on chroot(2) states: + Note that this call does not change the current working + directory, so that `.' can be outside the tree rooted at + `/'. In particular, the super-user can escape from a + `chroot jail' by doing `mkdir foo; chroot foo; cd ..'. + + It is recommended that you say Y here, since it's not known to break + any software. If the sysctl option is enabled, a sysctl option with + name "chroot_enforce_chdir" is created. + +Deny (f)chmod +s in chroot +CONFIG_GRKERNSEC_CHROOT_CHMOD + If you say Y here, processes inside a chroot will not be able to chmod + or fchmod files to make them have suid or sgid bits. This protects + against another published method of breaking a chroot. If the sysctl + option is enabled, a sysctl option with name "chroot_deny_chmod" is + created. + +Deny mknod in chroot +CONFIG_GRKERNSEC_CHROOT_MKNOD + If you say Y here, processes inside a chroot will not be allowed to + mknod. The problem with using mknod inside a chroot is that it + would allow an attacker to create a device entry that is the same + as one on the physical root of your system, which could range from + anyhing from the console device to a device for your harddrive (which + they could then use to wipe the drive or steal data). It is recommended + that you say Y here, unless you run into software incompatibilities. + If the sysctl option is enabled, a sysctl option with name + "chroot_deny_mknod" is created. + +Restrict priority changes in chroot +CONFIG_GRKERNSEC_CHROOT_NICE + If you say Y here, processes inside a chroot will not be able to raise + the priority of processes in the chroot, or alter the priority of + processes outside the chroot. This provides more security than simply + removing CAP_SYS_NICE from the process' capability set. If the + sysctl option is enabled, a sysctl option with name "chroot_restrict_nice" + is created. + +Log all execs within chroot +CONFIG_GRKERNSEC_CHROOT_EXECLOG + If you say Y here, all executions inside a chroot jail will be logged + to syslog. This can cause a large amount of logs if certain + applications (eg. djb's daemontools) are installed on the system, and + is therefore left as an option. If the sysctl option is enabled, a + sysctl option with name "chroot_execlog" is created. + +Deny sysctl writes in chroot +CONFIG_GRKERNSEC_CHROOT_SYSCTL + If you say Y here, an attacker in a chroot will not be able to + write to sysctl entries, either by sysctl(2) or through a /proc + interface. It is strongly recommended that you say Y here. If the + sysctl option is enabled, a sysctl option with name + "chroot_deny_sysctl" is created. + +Chroot jail capability restrictions +CONFIG_GRKERNSEC_CHROOT_CAPS + If you say Y here, the capabilities on all root processes within a + chroot jail will be lowered to stop module insertion, raw i/o, + system and net admin tasks, rebooting the system, modifying immutable + files, modifying IPC owned by another, and changing the system time. + This is left an option because it can break some apps. Disable this + if your chrooted apps are having problems performing those kinds of + tasks. If the sysctl option is enabled, a sysctl option with + name "chroot_caps" is created. + +Trusted path execution +CONFIG_GRKERNSEC_TPE + If you say Y here, you will be able to choose a gid to add to the + supplementary groups of users you want to mark as "untrusted." + These users will not be able to execute any files that are not in + root-owned directories writeable only by root. If the sysctl option + is enabled, a sysctl option with name "tpe" is created. + +Group for trusted path execution +CONFIG_GRKERNSEC_TPE_GID + Here you can choose the GID to enable trusted path protection for. + Remember to add the users you want protection enabled for to the GID + specified here. If the sysctl option is enabled, whatever you choose + here won't matter. You'll have to specify the GID in your bootup + script by echoing the GID to the proper /proc entry. View the help + on the sysctl option for more information. If the sysctl option is + enabled, a sysctl option with name "tpe_gid" is created. + +Partially restrict non-root users +CONFIG_GRKERNSEC_TPE_ALL + If you say Y here, All non-root users other than the ones in the + group specified in the main TPE option will only be allowed to + execute files in directories they own that are not group or + world-writeable, or in directories owned by root and writeable only by + root. If the sysctl option is enabled, a sysctl option with name + "tpe_restrict_all" is created. + +Randomized PIDs +CONFIG_GRKERNSEC_RANDPID + If you say Y here, all PIDs created on the system will be + pseudo-randomly generated. This is extremely effective along + with the /proc restrictions to disallow an attacker from guessing + pids of daemons, etc. PIDs are also used in some cases as part + of a naming system for temporary files, so this option would keep + those filenames from being predicted as well. We also use code + to make sure that PID numbers aren't reused too soon. If the sysctl + option is enabled, a sysctl option with name "rand_pids" is created. + +Larger entropy pools +CONFIG_GRKERNSEC_RANDNET + If you say Y here, the entropy pools used for many features of Linux + and grsecurity will be doubled in size. Since several grsecurity + features use additional randomness, it is recommended that you say Y + here. Saying Y here has a similar effect as modifying + /proc/sys/kernel/random/poolsize. + +Truly random TCP ISN selection +CONFIG_GRKERNSEC_RANDISN + If you say Y here, Linux's default selection of TCP Initial Sequence + Numbers (ISNs) will be replaced with that of OpenBSD. Linux uses + an MD4 hash based on the connection plus a time value to create the + ISN, while OpenBSD's selection is random. If the sysctl option is + enabled, a sysctl option with name "rand_isns" is created. + +Randomized IP IDs +CONFIG_GRKERNSEC_RANDID + If you say Y here, all the id field on all outgoing packets + will be randomized. This hinders os fingerprinters and + keeps your machine from being used as a bounce for an untraceable + portscan. Ids are used for fragmented packets, fragments belonging + to the same packet have the same id. By default linux only + increments the id value on each packet sent to an individual host. + We use a port of the OpenBSD random ip id code to achieve the + randomness, while keeping the possibility of id duplicates to + near none. If the sysctl option is enabled, a sysctl option with name + "rand_ip_ids" is created. + +Randomized TCP source ports +CONFIG_GRKERNSEC_RANDSRC + If you say Y here, situations where a source port is generated on the + fly for the TCP protocol (ie. with connect() ) will be altered so that + the source port is generated at random, instead of a simple incrementing + algorithm. If the sysctl option is enabled, a sysctl option with name + "rand_tcp_src_ports" is created. + +Randomized RPC XIDs +CONFIG_GRKERNSEC_RANDRPC + If you say Y here, the method of determining XIDs for RPC requests will + be randomized, instead of using linux's default behavior of simply + incrementing the XID. If you want your RPC connections to be more + secure, say Y here. If the sysctl option is enabled, a sysctl option + with name "rand_rpc" is created. + +Altered Ping IDs +CONFIG_GRKERNSEC_RANDPING + If you say Y here, the way Linux handles echo replies will be changed + so that the reply uses an ID equal to the ID of the echo request. + This will help in confusing OS detection. If the sysctl option is + enabled, a sysctl option with name "altered_pings" is created. + +Socket restrictions +CONFIG_GRKERNSEC_SOCKET + If you say Y here, you will be able to choose from several options. + If you assign a GID on your system and add it to the supplementary + groups of users you want to restrict socket access to, this patch + will perform up to three things, based on the option(s) you choose. + +Deny all socket access +CONFIG_GRKERNSEC_SOCKET_ALL + If you say Y here, you will be able to choose a GID of whose users will + be unable to connect to other hosts from your machine or run server + applications from your machine. If the sysctl option is enabled, a + sysctl option with name "socket_all" is created. + +Group for disabled socket access +CONFIG_GRKERNSEC_SOCKET_ALL_GID + Here you can choose the GID to disable socket access for. Remember to + add the users you want socket access disabled for to the GID + specified here. If the sysctl option is enabled, whatever you choose + here won't matter. You'll have to specify the GID in your bootup + script by echoing the GID to the proper /proc entry. View the help + on the sysctl option for more information. If the sysctl option is + enabled, a sysctl option with name "socket_all_gid" is created. + +Deny all client socket access +CONFIG_GRKERNSEC_SOCKET_CLIENT + If you say Y here, you will be able to choose a GID of whose users will + be unable to connect to other hosts from your machine, but will be + able to run servers. If this option is enabled, all users in the group + you specify will have to use passive mode when initiating ftp transfers + from the shell on your machine. If the sysctl option is enabled, a + sysctl option with name "socket_client" is created. + +Group for disabled client socket access +CONFIG_GRKERNSEC_SOCKET_CLIENT_GID + Here you can choose the GID to disable client socket access for. + Remember to add the users you want client socket access disabled for to + the GID specified here. If the sysctl option is enabled, whatever you + choose here won't matter. You'll have to specify the GID in your bootup + script by echoing the GID to the proper /proc entry. View the help + on the sysctl option for more information. If the sysctl option is + enabled, a sysctl option with name "socket_client_gid" is created. + +Deny all server socket access +CONFIG_GRKERNSEC_SOCKET_SERVER + If you say Y here, you will be able to choose a GID of whose users will + be unable to run server applications from your machine. If the sysctl + option is enabled, a sysctl option with name "socket_server" is created. + +Group for disabled server socket access +CONFIG_GRKERNSEC_SOCKET_SERVER_GID + Here you can choose the GID to disable server socket access for. + Remember to add the users you want server socket access disabled for to + the GID specified here. If the sysctl option is enabled, whatever you + choose here won't matter. You'll have to specify the GID in your bootup + script by echoing the GID to the proper /proc entry. View the help + on the sysctl option for more information. If the sysctl option is + enabled, a sysctl option with name "socket_server_gid" is created. + +Sysctl support +CONFIG_GRKERNSEC_SYSCTL + If you say Y here, you will be able to change the options that + grsecurity runs with at bootup, without having to recompile your + kernel. You can echo values to files in /proc/sys/kernel/grsecurity + to enable (1) or disable (0) various features. All the sysctl entries + are mutable until the "grsec_lock" entry is set to a non-zero value. + All features are disabled by default. Please note that this option could + reduce the effectiveness of the added security of this patch if an ACL + system is not put in place. Your init scripts should be read-only, and + root should not have access to adding modules or performing raw i/o + operations. All options should be set at startup, and the grsec_lock + entry should be set to a non-zero value after all the options are set. + *THIS IS EXTREMELY IMPORTANT* + +Number of burst messages +CONFIG_GRKERNSEC_FLOODBURST + This option allows you to choose the maximum number of messages allowed + within the flood time interval you chose in a separate option. The + default should be suitable for most people, however if you find that + many of your logs are being interpreted as flooding, you may want to + raise this value. + +Seconds in between log messages +CONFIG_GRKERNSEC_FLOODTIME + This option allows you to enforce the number of seconds between + grsecurity log messages. The default should be suitable for most + people, however, if you choose to change it, choose a value small enough + to allow informative logs to be produced, but large enough to + prevent flooding. + +Hide kernel processes +CONFIG_GRKERNSEC_ACL_HIDEKERN + If you say Y here, when the ACL system is enabled via gradm -E, + an additional ACL will be passed to the kernel that hides all kernel + processes. These processes will only be viewable by the authenticated + admin, or processes that have viewing access set. + +Maximum tries before password lockout +CONFIG_GRKERNSEC_ACL_MAXTRIES + This option enforces the maximum number of times a user can attempt + to authorize themselves with the grsecurity ACL system before being + denied the ability to attempt authorization again for a specified time. + The lower the number, the harder it will be to brute-force a password. + +Time to wait after max password tries, in seconds +CONFIG_GRKERNSEC_ACL_TIMEOUT + This option specifies the time the user must wait after attempting to + authorize to the ACL system with the maximum number of invalid + passwords. The higher the number, the harder it will be to brute-force + a password. + Disable data cache CONFIG_DCACHE_DISABLE This option allows you to run the kernel with data cache disabled. diff -urN linux-2.4.20/Makefile linux-2.4.20/Makefile --- linux-2.4.20/Makefile 2002-11-28 18:53:16.000000000 -0500 +++ linux-2.4.20/Makefile 2003-04-06 15:55:38.000000000 -0400 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 4 SUBLEVEL = 20 -EXTRAVERSION = +EXTRAVERSION = -grsec KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) @@ -123,9 +123,10 @@ CORE_FILES =kernel/kernel.o mm/mm.o fs/fs.o ipc/ipc.o NETWORKS =net/network.o +GRSECURITY =grsecurity/grsec.o LIBS =$(TOPDIR)/lib/lib.a -SUBDIRS =kernel drivers mm fs net ipc lib +SUBDIRS =kernel drivers mm fs net ipc lib grsecurity DRIVERS-n := DRIVERS-y := @@ -265,7 +266,7 @@ export CPPFLAGS CFLAGS CFLAGS_KERNEL AFLAGS AFLAGS_KERNEL -export NETWORKS DRIVERS LIBS HEAD LDFLAGS LINKFLAGS MAKEBOOT ASFLAGS +export NETWORKS DRIVERS LIBS HEAD LDFLAGS LINKFLAGS MAKEBOOT ASFLAGS GRSECURITY .S.s: $(CPP) $(AFLAGS) $(AFLAGS_KERNEL) -traditional -o $*.s $< @@ -284,6 +285,7 @@ $(CORE_FILES) \ $(DRIVERS) \ $(NETWORKS) \ + $(GRSECURITY) \ $(LIBS) \ --end-group \ -o vmlinux diff -urN linux-2.4.20/arch/alpha/config.in linux-2.4.20/arch/alpha/config.in --- linux-2.4.20/arch/alpha/config.in 2002-11-28 18:53:08.000000000 -0500 +++ linux-2.4.20/arch/alpha/config.in 2003-04-06 15:55:38.000000000 -0400 @@ -425,3 +425,12 @@ endmenu source lib/Config.in + +mainmenu_option next_comment +comment 'Grsecurity' +bool 'Grsecurity' CONFIG_GRKERNSEC +if [ "$CONFIG_GRKERNSEC" = "y" ]; then + source grsecurity/Config.in +fi +endmenu + diff -urN linux-2.4.20/arch/alpha/kernel/entry.S linux-2.4.20/arch/alpha/kernel/entry.S --- linux-2.4.20/arch/alpha/kernel/entry.S 2002-08-02 20:39:42.000000000 -0400 +++ linux-2.4.20/arch/alpha/kernel/entry.S 2003-04-06 15:55:38.000000000 -0400 @@ -231,12 +231,12 @@ .end kernel_clone /* - * kernel_thread(fn, arg, clone_flags) + * arch_kernel_thread(fn, arg, clone_flags) */ .align 3 -.globl kernel_thread -.ent kernel_thread -kernel_thread: +.globl arch_kernel_thread +.ent arch_kernel_thread +arch_kernel_thread: ldgp $29,0($27) /* we can be called from a module */ .frame $30, 4*8, $26 subq $30,4*8,$30 diff -urN linux-2.4.20/arch/alpha/kernel/osf_sys.c linux-2.4.20/arch/alpha/kernel/osf_sys.c --- linux-2.4.20/arch/alpha/kernel/osf_sys.c 2002-08-02 20:39:42.000000000 -0400 +++ linux-2.4.20/arch/alpha/kernel/osf_sys.c 2003-04-06 15:55:38.000000000 -0400 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -230,6 +231,11 @@ struct file *file = NULL; unsigned long ret = -EBADF; +#ifdef CONFIG_GRKERNSEC_PAX_RANDEXEC + if (flags & MAP_MIRROR) + return -EINVAL; +#endif + #if 0 if (flags & (_MAP_HASSEMAPHORE | _MAP_INHERIT | _MAP_UNALIGNED)) printk("%s: unimplemented OSF mmap flags %04lx\n", @@ -240,6 +246,13 @@ if (!file) goto out; } + + if(gr_handle_mmap(file, prot)) { + fput(file); + ret = -EACCES; + goto out; + } + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); down_write(¤t->mm->mmap_sem); ret = do_mmap(file, addr, len, prot, flags, off); @@ -1367,6 +1380,10 @@ merely specific addresses, but regions of memory -- perhaps this feature should be incorporated into all ports? */ +#ifdef CONFIG_GRKERNSEC_PAX_RANDMMAP + if (!(current->flags & PF_PAX_RANDMMAP) || !filp) +#endif + if (addr) { addr = arch_get_unmapped_area_1 (PAGE_ALIGN(addr), len, limit); if (addr != -ENOMEM) @@ -1374,8 +1391,15 @@ } /* Next, try allocating at TASK_UNMAPPED_BASE. */ - addr = arch_get_unmapped_area_1 (PAGE_ALIGN(TASK_UNMAPPED_BASE), - len, limit); + + addr = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_GRKERNSEC_PAX_RANDMMAP + if (current->flags & PF_PAX_RANDMMAP) + addr += current->mm->delta_mmap; +#endif + + addr = arch_get_unmapped_area_1 (PAGE_ALIGN(addr), len, limit); if (addr != -ENOMEM) return addr; diff -urN linux-2.4.20/arch/alpha/kernel/ptrace.c linux-2.4.20/arch/alpha/kernel/ptrace.c --- linux-2.4.20/arch/alpha/kernel/ptrace.c 2001-09-18 20:03:51.000000000 -0400 +++ linux-2.4.20/arch/alpha/kernel/ptrace.c 2003-04-06 15:55:38.000000000 -0400 @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -273,6 +274,10 @@ read_unlock(&tasklist_lock); if (!child) goto out_notsk; + + if(gr_handle_ptrace(child, request)) + goto out; + if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out; diff -urN linux-2.4.20/arch/alpha/mm/fault.c linux-2.4.20/arch/alpha/mm/fault.c --- linux-2.4.20/arch/alpha/mm/fault.c 2002-11-28 18:53:08.000000000 -0500 +++ linux-2.4.20/arch/alpha/mm/fault.c 2003-04-06 15:55:38.000000000 -0400 @@ -53,6 +53,169 @@ __reload_thread(¤t->thread); } +/* + * PaX: decide what to do with offenders (regs->pc = fault address) + * + * returns 1 when task should be killed + * 2 when patched PLT trampoline was detected + * 3 when unpatched PLT trampoline was detected + * 4 when legitimate ET_EXEC was detected + */ +#ifdef CONFIG_GRKERNSEC_PAX_PAGEEXEC +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + int err; + +#ifdef CONFIG_GRKERNSEC_PAX_RANDEXEC + if (current->flags & PF_PAX_RANDEXEC) { + if (regs->pc >= current->mm->start_code && + regs->pc < current->mm->end_code) + { + if (regs->r26 == regs->pc) + return 1; + regs->pc += current->mm->delta_exec; + return 4; + } + } +#endif + +#ifdef CONFIG_GRKERNSEC_PAX_EMUPLT + do { /* PaX: patched PLT emulation #1 */ + unsigned int ldah, ldq, jmp; + + err = get_user(ldah, (unsigned int *)regs->pc); + err |= get_user(ldq, (unsigned int *)(regs->pc+4)); + err |= get_user(jmp, (unsigned int *)(regs->pc+8)); + + if (err) + break; + + if ((ldah & 0xFFFF0000U)== 0x277B0000U && + (ldq & 0xFFFF0000U) == 0xA77B0000U && + jmp == 0x6BFB0000U) + { + unsigned long r27, addr; + unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16; + unsigned long addrl = ldq | 0xFFFFFFFFFFFF0000UL; + + addr = regs->r27 + ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL); + err = get_user(r27, (unsigned long*)addr); + if (err) + break; + + regs->r27 = r27; + regs->pc = r27; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #2 */ + unsigned int ldah, lda, br; + + err = get_user(ldah, (unsigned int *)regs->pc); + err |= get_user(lda, (unsigned int *)(regs->pc+4)); + err |= get_user(br, (unsigned int *)(regs->pc+8)); + + if (err) + break; + + if ((ldah & 0xFFFF0000U)== 0x277B0000U && + (lda & 0xFFFF0000U) == 0xA77B0000U && + (br & 0xFFE00000U) == 0xC3E00000U) + { + unsigned long addr = br | 0xFFFFFFFFFFE00000UL; + unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16; + unsigned long addrl = lda | 0xFFFFFFFFFFFF0000UL; + + regs->r27 += ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL); + regs->pc += 12 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2); + return 2; + } + } while (0); + + do { /* PaX: unpatched PLT emulation */ + unsigned int br; + + err = get_user(br, (unsigned int *)regs->pc); + + if (!err && (br & 0xFFE00000U) == 0xC3800000U) { + unsigned int br2, ldq, nop, jmp; + unsigned long addr = br | 0xFFFFFFFFFFE00000UL, resolver; + + addr = regs->pc + 4 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2); + err = get_user(br2, (unsigned int *)addr); + err |= get_user(ldq, (unsigned int *)(addr+4)); + err |= get_user(nop, (unsigned int *)(addr+8)); + err |= get_user(jmp, (unsigned int *)(addr+12)); + err |= get_user(resolver, (unsigned long *)(addr+16)); + + if (err) + break; + + if (br2 == 0xC3600000U && + ldq == 0xA77B000CU && + nop == 0x47FF041FU && + jmp == 0x6B7B0000U) + { + regs->r28 = regs->pc+4; + regs->r27 = addr+16; + regs->pc = resolver; + return 3; + } + } + } while (0); +#endif + + return 1; +} + +static void pax_report_fault(struct pt_regs *regs) +{ + struct task_struct *tsk = current; + struct mm_struct *mm = current->mm; + char* buffer = (char*)__get_free_page(GFP_ATOMIC); + char* path=NULL; + unsigned long i; + + if (buffer) { + struct vm_area_struct* vma; + + down_read(&mm->mmap_sem); + vma = mm->mmap; + while (vma) { + if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) { + break; + } + vma = vma->vm_next; + } + if (vma) + path = d_path(vma->vm_file->f_dentry, vma->vm_file->f_vfsmnt, buffer, PAGE_SIZE); + up_read(&mm->mmap_sem); + } + if (tsk->curr_ip) + printk(KERN_ERR "PAX: From %u.%u.%u.%u: terminating task: %.930s(%s):%d, uid/euid: %u/%u, " + "PC: %016lX, SP: %016lX\n", NIPQUAD(tsk->curr_ip), + path, tsk->comm, tsk->pid, tsk->uid, tsk->euid, + regs->pc, rdusp()); + else + printk(KERN_ERR "PAX: terminating task: %.930s(%s):%d, uid/euid: %u/%u, " + "PC: %016lX, SP: %016lX\n", path, tsk->comm, + tsk->pid, tsk->uid, tsk->euid, regs->pc, rdusp()); + if (buffer) free_page((unsigned long)buffer); + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)(regs->pc+(i*4)))) { + printk("."); + break; + } + printk("%08x ", c); + } + printk("\n"); + do_coredump(SIGKILL, regs); +} +#endif + /* * This routine handles page faults. It determines the address, @@ -133,8 +296,32 @@ good_area: info.si_code = SEGV_ACCERR; if (cause < 0) { - if (!(vma->vm_flags & VM_EXEC)) + if (!(vma->vm_flags & VM_EXEC)) { + +#ifdef CONFIG_GRKERNSEC_PAX_PAGEEXEC + if (!(current->flags & PF_PAX_PAGEEXEC) || address != regs->pc) + goto survive; + + up_read(&mm->mmap_sem); + switch(pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_GRKERNSEC_PAX_EMUPLT + case 2: + case 3: + return; +#endif + +#ifdef CONFIG_GRKERNSEC_PAX_RANDEXEC + case 4: + return; +#endif + } + pax_report_fault(regs); + do_exit(SIGKILL); +#else goto bad_area; +#endif + } } else if (!cause) { /* Allow reads even for write-only mappings */ if (!(vma->vm_flags & (VM_READ | VM_WRITE))) diff -urN linux-2.4.20/arch/arm/config.in linux-2.4.20/arch/arm/config.in --- linux-2.4.20/arch/arm/config.in 2002-11-28 18:53:09.000000000 -0500 +++ linux-2.4.20/arch/arm/config.in 2003-04-06 15:55:38.000000000 -0400 @@ -657,3 +657,11 @@ endmenu source lib/Config.in + +mainmenu_option next_comment +comment 'Grsecurity' +bool 'Grsecurity' CONFIG_GRKERNSEC +if [ "$CONFIG_GRKERNSEC" = "y" ]; then + source grsecurity/Config.in +fi +endmenu diff -urN linux-2.4.20/arch/arm/kernel/process.c linux-2.4.20/arch/arm/kernel/process.c --- linux-2.4.20/arch/arm/kernel/process.c 2002-08-02 20:39:42.000000000 -0400 +++ linux-2.4.20/arch/arm/kernel/process.c 2003-04-06 15:55:38.000000000 -0400 @@ -366,7 +366,7 @@ * a system call from a "real" process, but the process memory space will * not be free'd until both the parent and the child have exited. */ -pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) +pid_t arch_kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) { pid_t __ret; diff -urN linux-2.4.20/arch/cris/config.in linux-2.4.20/arch/cris/config.in --- linux-2.4.20/arch/cris/config.in 2002-11-28 18:53:09.000000000 -0500 +++ linux-2.4.20/arch/cris/config.in 2003-04-06 15:55:38.000000000 -0400 @@ -258,3 +258,12 @@ source lib/Config.in endmenu + +mainmenu_option next_comment +comment 'Grsecurity' +bool 'Grsecurity' CONFIG_GRKERNSEC +if [ "$CONFIG_GRKERNSEC" = "y" ]; then + source grsecurity/Config.in +fi +endmenu + diff -urN linux-2.4.20/arch/cris/kernel/entry.S linux-2.4.20/arch/cris/kernel/entry.S --- linux-2.4.20/arch/cris/kernel/entry.S 2002-08-02 20:39:42.000000000 -0400 +++ linux-2.4.20/arch/cris/kernel/entry.S 2003-04-06 15:55:38.000000000 -0400 @@ -736,12 +736,12 @@ * the grosser the code, at least with the gcc version in cris-dist-1.13. */ -/* int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) */ +/* int arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) */ /* r10 r11 r12 */ .text - .global kernel_thread -kernel_thread: + .global arch_kernel_thread +arch_kernel_thread: /* Save ARG for later. */ move.d $r11, $r13 diff -urN linux-2.4.20/arch/i386/Makefile linux-2.4.20/arch/i386/Makefile --- linux-2.4.20/arch/i386/Makefile 2002-11-28 18:53:09.000000000 -0500 +++ linux-2.4.20/arch/i386/Makefile 2003-04-06 15:55:38.000000000 -0400 @@ -106,6 +106,9 @@ MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot +arch/i386/vmlinux.lds: arch/i386/vmlinux.lds.S FORCE + $(CPP) -C -P -I$(HPATH) -imacros $(HPATH)/asm-i386/page_offset.h -Ui386 arch/i386/vmlinux.lds.S >arch/i386/vmlinux.lds + vmlinux: arch/i386/vmlinux.lds FORCE: ; @@ -142,6 +145,7 @@ @$(MAKEBOOT) clean archmrproper: + rm -f arch/i386/vmlinux.lds archdep: @$(MAKEBOOT) dep diff -urN linux-2.4.20/arch/i386/boot/compressed/head.S linux-2.4.20/arch/i386/boot/compressed/head.S --- linux-2.4.20/arch/i386/boot/compressed/head.S 2000-07-05 15:03:12.000000000 -0400 +++ linux-2.4.20/arch/i386/boot/compressed/head.S 2003-04-06 15:55:38.000000000 -0400 @@ -74,7 +74,7 @@ popl %esi # discard address popl %esi # real mode pointer xorl %ebx,%ebx - ljmp $(__KERNEL_CS), $0x100000 + ljmp *0x100000 /* * We come here, if we were loaded high. @@ -124,5 +124,5 @@ movsl movl %ebx,%esi # Restore setup pointer xorl %ebx,%ebx - ljmp $(__KERNEL_CS), $0x100000 + ljmp *0x100000 move_routine_end: diff -urN linux-2.4.20/arch/i386/config.in linux-2.4.20/arch/i386/config.in --- linux-2.4.20/arch/i386/config.in 2002-11-28 18:53:09.000000000 -0500 +++ linux-2.4.20/arch/i386/config.in 2003-04-06 15:55:38.000000000 -0400 @@ -456,3 +456,11 @@ endmenu source lib/Config.in + +mainmenu_option next_comment +comment 'Grsecurity' +bool 'Grsecurity' CONFIG_GRKERNSEC +if [ "$CONFIG_GRKERNSEC" = "y" ]; then + source grsecurity/Config.in +fi +endmenu diff -urN linux-2.4.20/arch/i386/kernel/apm.c linux-2.4.20/arch/i386/kernel/apm.c --- linux-2.4.20/arch/i386/kernel/apm.c 2002-11-28 18:53:09.000000000 -0500 +++ linux-2.4.20/arch/i386/kernel/apm.c 2003-04-06 15:55:38.000000000 -0400 @@ -1923,6 +1923,12 @@ __va((unsigned long)0x40 << 4)); _set_limit((char *)&gdt[APM_40 >> 3], 4095 - (0x40 << 4)); +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + set_base(gdt2[APM_40 >> 3], + __va((unsigned long)0x40 << 4)); + _set_limit((char *)&gdt2[APM_40 >> 3], 4095 - (0x40 << 4)); +#endif + apm_bios_entry.offset = apm_info.bios.offset; apm_bios_entry.segment = APM_CS; set_base(gdt[APM_CS >> 3], @@ -1931,6 +1937,16 @@ __va((unsigned long)apm_info.bios.cseg_16 << 4)); set_base(gdt[APM_DS >> 3], __va((unsigned long)apm_info.bios.dseg << 4)); + +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + set_base(gdt2[APM_CS >> 3], + __va((unsigned long)apm_info.bios.cseg << 4)); + set_base(gdt2[APM_CS_16 >> 3], + __va((unsigned long)apm_info.bios.cseg_16 << 4)); + set_base(gdt2[APM_DS >> 3], + __va((unsigned long)apm_info.bios.dseg << 4)); +#endif + #ifndef APM_RELAX_SEGMENTS if (apm_info.bios.version == 0x100) { #endif @@ -1940,6 +1956,13 @@ _set_limit((char *)&gdt[APM_CS_16 >> 3], 64 * 1024 - 1); /* For the DEC Hinote Ultra CT475 (and others?) */ _set_limit((char *)&gdt[APM_DS >> 3], 64 * 1024 - 1); + +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + _set_limit((char *)&gdt2[APM_CS >> 3], 64 * 1024 - 1); + _set_limit((char *)&gdt2[APM_CS_16 >> 3], 64 * 1024 - 1); + _set_limit((char *)&gdt2[APM_DS >> 3], 64 * 1024 - 1); +#endif + #ifndef APM_RELAX_SEGMENTS } else { _set_limit((char *)&gdt[APM_CS >> 3], @@ -1948,6 +1971,16 @@ (apm_info.bios.cseg_16_len - 1) & 0xffff); _set_limit((char *)&gdt[APM_DS >> 3], (apm_info.bios.dseg_len - 1) & 0xffff); + +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + _set_limit((char *)&gdt2[APM_CS >> 3], + (apm_info.bios.cseg_len - 1) & 0xffff); + _set_limit((char *)&gdt2[APM_CS_16 >> 3], + (apm_info.bios.cseg_16_len - 1) & 0xffff); + _set_limit((char *)&gdt2[APM_DS >> 3], + (apm_info.bios.dseg_len - 1) & 0xffff); +#endif + } #endif diff -urN linux-2.4.20/arch/i386/kernel/entry.S linux-2.4.20/arch/i386/kernel/entry.S --- linux-2.4.20/arch/i386/kernel/entry.S 2002-11-28 18:53:09.000000000 -0500 +++ linux-2.4.20/arch/i386/kernel/entry.S 2003-04-06 15:55:38.000000000 -0400 @@ -45,6 +45,7 @@ #include #include #include +#include EBX = 0x00 ECX = 0x04 @@ -209,6 +210,17 @@ jae badsys call *SYMBOL_NAME(sys_call_table)(,%eax,4) movl %eax,EAX(%esp) # save the return value + +#ifdef CONFIG_GRKERNSEC_PAX_RANDKSTACK + cli # need_resched and signals atomic test + cmpl $0,need_resched(%ebx) + jne reschedule + cmpl $0,sigpending(%ebx) + jne signal_return + call SYMBOL_NAME(pax_randomize_kstack) + jmp restore_all +#endif + ENTRY(ret_from_sys_call) cli # need_resched and signals atomic test cmpl $0,need_resched(%ebx) @@ -389,8 +401,56 @@ jmp error_code ENTRY(page_fault) +#ifdef CONFIG_GRKERNSEC_PAX_PAGEEXEC + ALIGN + pushl $ SYMBOL_NAME(pax_do_page_fault) +#else pushl $ SYMBOL_NAME(do_page_fault) +#endif + +#ifndef CONFIG_GRKERNSEC_PAX_EMUTRAMP jmp error_code +#else + pushl %ds + pushl %eax + xorl %eax,%eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + decl %eax # eax = -1 + pushl %ecx + pushl %ebx + cld + movl %es,%ecx + movl ORIG_EAX(%esp), %esi # get the error code + movl ES(%esp), %edi # get the function address + movl %eax, ORIG_EAX(%esp) + movl %ecx, ES(%esp) + movl %esp,%edx + pushl %esi # push the error code + pushl %edx # push the pt_regs pointer + movl $(__KERNEL_DS),%edx + movl %edx,%ds + movl %edx,%es + GET_CURRENT(%ebx) + call *%edi + addl $8,%esp + decl %eax + jnz ret_from_exception + + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %eax + popl %ds + popl %es + addl $4,%esp + jmp system_call +#endif ENTRY(machine_check) pushl $0 @@ -402,7 +462,7 @@ pushl $ SYMBOL_NAME(do_spurious_interrupt_bug) jmp error_code -.data +.section .rodata, "a" ENTRY(sys_call_table) .long SYMBOL_NAME(sys_ni_syscall) /* 0 - old "setup()" system call*/ .long SYMBOL_NAME(sys_exit) diff -urN linux-2.4.20/arch/i386/kernel/head.S linux-2.4.20/arch/i386/kernel/head.S --- linux-2.4.20/arch/i386/kernel/head.S 2002-08-02 20:39:42.000000000 -0400 +++ linux-2.4.20/arch/i386/kernel/head.S 2003-04-06 15:55:38.000000000 -0400 @@ -41,6 +41,7 @@ * * On entry, %esi points to the real-mode code as a 32-bit pointer. */ +.global startup_32 startup_32: /* * Set segments to known values @@ -86,7 +87,7 @@ PRESENT+RW+USER */ 2: stosl add $0x1000,%eax - cmp $empty_zero_page-__PAGE_OFFSET,%edi + cmp $0x00c00007,%eax jne 2b /* @@ -100,9 +101,19 @@ movl %eax,%cr0 /* ..and set paging (PG) bit */ jmp 1f /* flush the prefetch-queue */ 1: + +#if !defined(CONFIG_GRKERNSEC_PAX_KERNEXEC) || defined(CONFIG_SMP) + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + orw %bx,%bx + jz 1f +#endif + movl $1f,%eax jmp *%eax /* make sure eip is relocated */ 1: +#endif + /* Set up the stack pointer */ lss stack_start,%esp @@ -121,7 +132,7 @@ */ xorl %eax,%eax movl $ SYMBOL_NAME(__bss_start),%edi - movl $ SYMBOL_NAME(_end),%ecx + movl $ SYMBOL_NAME(__bss_end),%ecx subl %edi,%ecx rep stosb @@ -272,8 +283,6 @@ jmp L6 # main should never return here, but # just in case, we know what happens. -ready: .byte 0 - /* * We depend on ET to be correct. This checks for 287/387. */ @@ -319,13 +328,6 @@ jne rp_sidt ret -ENTRY(stack_start) - .long SYMBOL_NAME(init_task_union)+8192 - .long __KERNEL_DS - -/* This is the default interrupt "handler" :-) */ -int_msg: - .asciz "Unknown interrupt\n" ALIGN ignore_int: cld @@ -347,6 +349,18 @@ popl %eax iret +.data +ready: .byte 0 + +ENTRY(stack_start) + .long SYMBOL_NAME(init_task_union)+8192 + .long __KERNEL_DS + +.section .rodata,"a" +/* This is the default interrupt "handler" :-) */ +int_msg: + .asciz "Unknown interrupt\n" + /* * The interrupt descriptor table has room for 256 idt's, * the global descriptor table is dependent on the number @@ -372,41 +386,58 @@ SYMBOL_NAME(gdt): .long SYMBOL_NAME(gdt_table) +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC +.globl SYMBOL_NAME(gdt2) + .word 0 +gdt_descr2: + .word GDT_ENTRIES*8-1 +SYMBOL_NAME(gdt2): + .long SYMBOL_NAME(gdt_table2) +#endif + /* * This is initialized to create an identity-mapping at 0-8M (for bootup * purposes) and another mapping of the 0-8M area at virtual address * PAGE_OFFSET. */ -.org 0x1000 +.section .data.swapper_pg_dir,"a" ENTRY(swapper_pg_dir) - .long 0x00102007 - .long 0x00103007 - .fill BOOT_USER_PGD_PTRS-2,4,0 + .long pg0-__PAGE_OFFSET+7 + .long pg1-__PAGE_OFFSET+7 + .long pg2-__PAGE_OFFSET+7 + .fill BOOT_USER_PGD_PTRS-3,4,0 /* default: 766 entries */ - .long 0x00102007 - .long 0x00103007 + .long pg0-__PAGE_OFFSET+7 + .long pg1-__PAGE_OFFSET+7 + .long pg2-__PAGE_OFFSET+7 /* default: 254 entries */ - .fill BOOT_KERNEL_PGD_PTRS-2,4,0 + .fill BOOT_KERNEL_PGD_PTRS-3,4,0 /* * The page tables are initialized to only 8MB here - the final page * tables are set up later depending on memory size. */ -.org 0x2000 +.section .data.pg0,"a" ENTRY(pg0) + .fill 1024,4,0 -.org 0x3000 +.section .data.pg1,"a" ENTRY(pg1) + .fill 1024,4,0 + +.section .data.pg2,"a" +ENTRY(pg2) + .fill 1024,4,0 /* * empty_zero_page must immediately follow the page tables ! (The * initialization loop counts until empty_zero_page) */ - -.org 0x4000 +.section .data.empty_zero_page,"a" ENTRY(empty_zero_page) + .fill 1024,4,0 -.org 0x5000 +.text /* * Real beginning of normal "text" segment @@ -419,7 +450,7 @@ * in the text section because it has alignment requirements * that we cannot fulfill any other way. */ -.data +.section .rodata,"a" ALIGN /* @@ -430,19 +461,55 @@ */ ENTRY(gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x0000000000000000 /* not used */ - .quad 0x00cf9a000000ffff /* 0x10 kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* 0x18 kernel 4GB data at 0x00000000 */ - .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */ - .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */ + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + .quad 0x00cf9b000000ffff /* 0x08 kernel 4GB code at 0x00000000 */ + .quad 0xc0cf9b400000ffff /* 0x10 kernel 4GB code at 0xc0400000 */ +#else + .quad 0x0000000000000000 /* not used */ + .quad 0x00cf9b000000ffff /* 0x10 kernel 4GB code at 0x00000000 */ +#endif + + .quad 0x00cf93000000ffff /* 0x18 kernel 4GB data at 0x00000000 */ + .quad 0x00cffb000000ffff /* 0x23 user 4GB code at 0x00000000 */ + .quad 0x00cff3000000ffff /* 0x2b user 4GB data at 0x00000000 */ + .quad 0x0000000000000000 /* not used */ + .quad 0x0000000000000000 /* not used */ + /* + * The APM segments have byte granularity and their bases + * and limits are set at run time. + */ + .quad 0x0040920000000000 /* 0x40 APM set up for bad BIOS's */ + .quad 0x00409a0000000000 /* 0x48 APM CS code */ + .quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */ + .quad 0x0040920000000000 /* 0x58 APM DS data */ + .fill NR_CPUS*4,8,0 /* space for TSS's and LDT's */ + +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC +ENTRY(gdt_table2) + .quad 0x0000000000000000 /* NULL descriptor */ + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + .quad 0x00cf9b000000ffff /* 0x08 kernel 4GB code at 0x00000000 */ + .quad 0xc0cf9b400000ffff /* 0x10 kernel 4GB code at 0xc0400000 */ +#else + .quad 0x0000000000000000 /* not used */ + .quad 0x00cf9b000000ffff /* 0x10 kernel 4GB code at 0x00000000 */ +#endif + + .quad 0x00cf93000000ffff /* 0x18 kernel 4GB data at 0x00000000 */ + .quad 0x60c5fb000000ffff /* 0x23 user 1.5GB code at 0x60000000 */ + .quad 0x00c5f3000000ffff /* 0x2b user 1.5GB data at 0x00000000 */ + .quad 0x0000000000000000 /* not used */ .quad 0x0000000000000000 /* not used */ /* * The APM segments have byte granularity and their bases * and limits are set at run time. */ - .quad 0x0040920000000000 /* 0x40 APM set up for bad BIOS's */ - .quad 0x00409a0000000000 /* 0x48 APM CS code */ - .quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */ - .quad 0x0040920000000000 /* 0x58 APM DS data */ + .quad 0x0040930000000000 /* 0x40 APM set up for bad BIOS's */ + .quad 0x00409b0000000000 /* 0x48 APM CS code */ + .quad 0x00009b0000000000 /* 0x50 APM CS 16 code (16 bit) */ + .quad 0x0040930000000000 /* 0x58 APM DS data */ .fill NR_CPUS*4,8,0 /* space for TSS's and LDT's */ +#endif diff -urN linux-2.4.20/arch/i386/kernel/i386_ksyms.c linux-2.4.20/arch/i386/kernel/i386_ksyms.c --- linux-2.4.20/arch/i386/kernel/i386_ksyms.c 2002-08-02 20:39:42.000000000 -0400 +++ linux-2.4.20/arch/i386/kernel/i386_ksyms.c 2003-04-06 15:55:38.000000000 -0400 @@ -73,6 +73,9 @@ EXPORT_SYMBOL(get_cmos_time); EXPORT_SYMBOL(apm_info); EXPORT_SYMBOL(gdt); +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC +EXPORT_SYMBOL(gdt2); +#endif EXPORT_SYMBOL(empty_zero_page); #ifdef CONFIG_DEBUG_IOVIRT diff -urN linux-2.4.20/arch/i386/kernel/ioport.c linux-2.4.20/arch/i386/kernel/ioport.c --- linux-2.4.20/arch/i386/kernel/ioport.c 1999-07-19 18:22:48.000000000 -0400 +++ linux-2.4.20/arch/i386/kernel/ioport.c 2003-04-06 15:55:38.000000000 -0400 @@ -14,6 +14,7 @@ #include #include #include +#include /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ static void set_bitmap(unsigned long *bitmap, short base, short extent, int new_value) @@ -59,8 +60,16 @@ if ((from + num <= from) || (from + num > IO_BITMAP_SIZE*32)) return -EINVAL; +#ifdef CONFIG_GRKERNSEC_IO + if (turn_on) { + gr_handle_ioperm(); +#else if (turn_on && !capable(CAP_SYS_RAWIO)) +#endif return -EPERM; +#ifdef CONFIG_GRKERNSEC_IO + } +#endif /* * If it's the first ioperm() call in this thread's lifetime, set the * IO bitmap up. ioperm() is much less timing critical than clone(), @@ -108,8 +117,13 @@ return -EINVAL; /* Trying to gain more privileges? */ if (level > old) { +#ifdef CONFIG_GRKERNSEC_IO + gr_handle_iopl(); + return -EPERM; +#else if (!capable(CAP_SYS_RAWIO)) return -EPERM; +#endif } regs->eflags = (regs->eflags & 0xffffcfff) | (level << 12); return 0; diff -urN linux-2.4.20/arch/i386/kernel/ldt.c linux-2.4.20/arch/i386/kernel/ldt.c --- linux-2.4.20/arch/i386/kernel/ldt.c 2001-10-17 17:46:29.000000000 -0400 +++ linux-2.4.20/arch/i386/kernel/ldt.c 2003-04-06 15:55:38.000000000 -0400 @@ -122,6 +122,13 @@ } } +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + if (current->flags & PF_PAX_SEGMEXEC && (ldt_info.contents & 2)) { + error = -EINVAL; + goto out_unlock; + } +#endif + entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) | (ldt_info.limit & 0x0ffff); entry_2 = (ldt_info.base_addr & 0xff000000) | diff -urN linux-2.4.20/arch/i386/kernel/pci-pc.c linux-2.4.20/arch/i386/kernel/pci-pc.c --- linux-2.4.20/arch/i386/kernel/pci-pc.c 2002-11-28 18:53:09.000000000 -0500 +++ linux-2.4.20/arch/i386/kernel/pci-pc.c 2003-04-06 15:55:38.000000000 -0400 @@ -16,6 +16,7 @@ #include #include #include +#include #include "pci-i386.h" @@ -572,10 +573,16 @@ * the array there. */ +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC +#define __FLAT_KERNEL_CS 0x08 +#else +#define __FLAT_KERNEL_CS __KERNEL_CS +#endif + static struct { unsigned long address; unsigned short segment; -} bios32_indirect = { 0, __KERNEL_CS }; +} bios32_indirect = { 0, __FLAT_KERNEL_CS }; /* * Returns the entry point for the given service, NULL on error @@ -616,7 +623,9 @@ static struct { unsigned long address; unsigned short segment; -} pci_indirect = { 0, __KERNEL_CS }; +} pci_indirect = { 0, __FLAT_KERNEL_CS }; + +#undef __FLAT_KERNEL_CS static int pci_bios_present; @@ -1425,6 +1434,19 @@ if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT)) pcibios_sort(); #endif + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + /* PaX: nuke __FLAT_KERNEL_CS, no longer needed */ + gdt_table[1].a = 0UL; + gdt_table[1].b = 0UL; + +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + gdt_table2[1].a = 0UL; + gdt_table2[1].b = 0UL; +#endif + +#endif + } char * __devinit pcibios_setup(char *str) diff -urN linux-2.4.20/arch/i386/kernel/process.c linux-2.4.20/arch/i386/kernel/process.c --- linux-2.4.20/arch/i386/kernel/process.c 2002-08-02 20:39:42.000000000 -0400 +++ linux-2.4.20/arch/i386/kernel/process.c 2003-04-06 15:55:38.000000000 -0400 @@ -485,7 +485,7 @@ /* * Create a kernel thread */ -int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) +int arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) { long retval, d0; @@ -508,6 +508,7 @@ "r" (arg), "r" (fn), "b" (flags | CLONE_VM) : "memory"); + return retval; } @@ -584,7 +585,11 @@ { struct pt_regs * childregs; +#ifdef CONFIG_GRKERNSEC_PAX_RANDKSTACK + childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p - sizeof(unsigned long))) - 1; +#else childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1; +#endif struct_cpy(childregs, regs); childregs->eax = 0; childregs->esp = esp; @@ -645,6 +650,16 @@ dump->u_fpvalid = dump_fpu (regs, &dump->i387); } +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC +void pax_switch_segments(struct task_struct * tsk) +{ + if (tsk->flags & PF_PAX_SEGMEXEC) + __asm__ __volatile__("lgdt %0": "=m" (gdt_descr2)); + else + __asm__ __volatile__("lgdt %0": "=m" (gdt_descr)); +} +#endif + /* * This special macro can be used to load a debugging register */ @@ -684,6 +699,10 @@ unlazy_fpu(prev_p); +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + pax_switch_segments(next_p); +#endif + /* * Reload esp0, LDT and the page table pointer: */ @@ -824,3 +843,25 @@ } #undef last_sched #undef first_sched + +#ifdef CONFIG_GRKERNSEC_PAX_RANDKSTACK +asmlinkage void pax_randomize_kstack(void) +{ + struct tss_struct *tss = init_tss + smp_processor_id(); + unsigned long time; + + rdtscl(time); + + /* P4 seems to return a 0 LSB, ignore it */ +#ifdef CONFIG_MPENTIUM4 + time &= 0x3EUL; + time <<= 1; +#else + time &= 0x1FUL; + time <<= 2; +#endif + + current->thread.esp0 ^= time; + tss->esp0 = current->thread.esp0; +} +#endif diff -urN linux-2.4.20/arch/i386/kernel/ptrace.c linux-2.4.20/arch/i386/kernel/ptrace.c --- linux-2.4.20/arch/i386/kernel/ptrace.c 2002-08-02 20:39:42.000000000 -0400 +++ linux-2.4.20/arch/i386/kernel/ptrace.c 2003-04-06 15:55:38.000000000 -0400 @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -177,6 +178,9 @@ if (pid == 1) /* you may not mess with init */ goto out_tsk; + if(gr_handle_ptrace(child, request)) + goto out_tsk; + if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out_tsk; @@ -256,6 +260,17 @@ if(addr < (long) &dummy->u_debugreg[4] && ((unsigned long) data) >= TASK_SIZE-3) break; +#ifdef CONFIG_GRKERNSEC + if(addr >= (long) &dummy->u_debugreg[0] && + addr <= (long) &dummy->u_debugreg[3]){ + long reg = (addr - (long) &dummy->u_debugreg[0]) >> 2; + long type = (child->thread.debugreg[7] >> (DR_CONTROL_SHIFT + 4*reg)) & 3; + long align = (child->thread.debugreg[7] >> (DR_CONTROL_SHIFT + 2 + 4*reg)) & 3; + if((type & 1) && (data & align)) + break; + } +#endif + if(addr == (long) &dummy->u_debugreg[7]) { data &= ~DR_CONTROL_RESERVED; for(i=0; i<4; i++) diff -urN linux-2.4.20/arch/i386/kernel/setup.c linux-2.4.20/arch/i386/kernel/setup.c --- linux-2.4.20/arch/i386/kernel/setup.c 2002-11-28 18:53:09.000000000 -0500 +++ linux-2.4.20/arch/i386/kernel/setup.c 2003-04-06 15:55:38.000000000 -0400 @@ -3044,7 +3044,7 @@ set_tss_desc(nr,t); gdt_table[__TSS(nr)].b &= 0xfffffdff; load_TR(nr); - load_LDT(&init_mm); + _load_LDT(&init_mm); /* * Clear all 6 debug registers: diff -urN linux-2.4.20/arch/i386/kernel/sys_i386.c linux-2.4.20/arch/i386/kernel/sys_i386.c --- linux-2.4.20/arch/i386/kernel/sys_i386.c 2001-03-19 15:35:09.000000000 -0500 +++ linux-2.4.20/arch/i386/kernel/sys_i386.c 2003-04-06 15:55:39.000000000 -0400 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -48,6 +49,11 @@ int error = -EBADF; struct file * file = NULL; +#if defined(CONFIG_GRKERNSEC_PAX_SEGMEXEC) || defined(CONFIG_GRKERNSEC_PAX_RANDEXEC) + if (flags & MAP_MIRROR) + return -EINVAL; +#endif + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); if (!(flags & MAP_ANONYMOUS)) { file = fget(fd); @@ -55,8 +61,14 @@ goto out; } + if(gr_handle_mmap(file, prot)) { + fput(file); + error = -EACCES; + goto out; + } + down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + error = do_mmap(file, addr, len, prot, flags, pgoff << PAGE_SHIFT); up_write(¤t->mm->mmap_sem); if (file) diff -urN linux-2.4.20/arch/i386/kernel/trampoline.S linux-2.4.20/arch/i386/kernel/trampoline.S --- linux-2.4.20/arch/i386/kernel/trampoline.S 2002-11-28 18:53:09.000000000 -0500 +++ linux-2.4.20/arch/i386/kernel/trampoline.S 2003-04-06 15:55:39.000000000 -0400 @@ -54,7 +54,7 @@ lmsw %ax # into protected mode jmp flush_instr flush_instr: - ljmpl $__KERNEL_CS, $0x00100000 + ljmpl $__KERNEL_CS, $SYMBOL_NAME(startup_32)-__PAGE_OFFSET # jump to startup_32 in arch/i386/kernel/head.S idt_48: diff -urN linux-2.4.20/arch/i386/kernel/traps.c linux-2.4.20/arch/i386/kernel/traps.c --- linux-2.4.20/arch/i386/kernel/traps.c 2002-11-28 18:53:09.000000000 -0500 +++ linux-2.4.20/arch/i386/kernel/traps.c 2003-04-06 15:55:39.000000000 -0400 @@ -228,14 +228,23 @@ show_stack((unsigned long*)esp); printk("\nCode: "); + +#ifndef CONFIG_GRKERNSEC_PAX_KERNEXEC if(regs->eip < PAGE_OFFSET) goto bad; +#endif for(i=0;i<20;i++) { unsigned char c; + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + if(__get_user(c, &((unsigned char*)regs->eip)[i+__KERNEL_TEXT_OFFSET])) { +#else if(__get_user(c, &((unsigned char*)regs->eip)[i])) { bad: +#endif + printk(" Bad EIP value."); break; } @@ -258,8 +267,13 @@ eip = regs->eip; +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + eip += __KERNEL_TEXT_OFFSET; +#else if (eip < PAGE_OFFSET) goto no_bug; +#endif + if (__get_user(ud2, (unsigned short *)eip)) goto no_bug; if (ud2 != 0x0b0f) @@ -267,7 +281,13 @@ if (__get_user(line, (unsigned short *)(eip + 2))) goto bug; if (__get_user(file, (char **)(eip + 4)) || + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + __get_user(c, file + __KERNEL_TEXT_OFFSET)) +#else (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) +#endif + file = ""; printk("kernel BUG at %s:%d!\n", file, line); @@ -417,6 +437,18 @@ gp_in_kernel: { unsigned long fixup; + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + if ((regs->xcs & 0xFFFF) == __KERNEL_CS) { + if (current->curr_ip) + printk(KERN_ERR "PAX: From %u.%u.%u.%u: task %s:%d, uid/euid: %u/%u, may have attempted to execute invalid code at %08lx\n", + NIPQUAD(current->curr_ip), current->comm, current->pid, current->uid, current->euid, regs->eip); + else + printk(KERN_ERR "PAX: task %s:%d, uid/euid: %u/%u, may have attempted to execute invalid code at %08lx\n", + current->comm, current->pid, current->uid, current->euid, regs->eip); + } +#endif + fixup = search_exception_table(regs->eip); if (fixup) { regs->eip = fixup; @@ -527,13 +559,12 @@ { unsigned int condition; struct task_struct *tsk = current; - unsigned long eip = regs->eip; siginfo_t info; __asm__ __volatile__("movl %%db6,%0" : "=r" (condition)); /* If the user set TF, it's simplest to clear it right away. */ - if ((eip >=PAGE_OFFSET) && (regs->eflags & TF_MASK)) + if (!(regs->xcs & 3) && (regs->eflags & TF_MASK) && !(regs->eflags & VM_MASK)) goto clear_TF; /* Mask out spurious debug traps due to lazy DR7 setting */ @@ -855,11 +886,63 @@ void set_tss_desc(unsigned int n, void *addr) { _set_tssldt_desc(gdt_table+__TSS(n), (int)addr, 235, 0x89); + +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + _set_tssldt_desc(gdt_table2+__TSS(n), (int)addr, 235, 0x89); +#endif + +} + +void __set_ldt_desc(unsigned int n, void *addr, unsigned int size) +{ + _set_tssldt_desc(gdt_table+__LDT(n), (int)addr, ((size << 3)-1), 0x82); + +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + _set_tssldt_desc(gdt_table2+__LDT(n), (int)addr, ((size << 3)-1), 0x82); +#endif + } void set_ldt_desc(unsigned int n, void *addr, unsigned int size) { + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + pgd_t* pgd; + pmd_t* pmd; + + unsigned long __pe = _KERNPG_TABLE + __pa(__KERNEL_TEXT_OFFSET); + + asm("movl %%cr3,%0":"=r" (pgd)); + pgd = (pgd_t *)__va(pgd) + __pgd_offset(__KERNEL_TEXT_OFFSET); + pmd = pmd_offset(pgd, __KERNEL_TEXT_OFFSET); + if (cpu_has_pse) { + __pe += _PAGE_PSE; + if (cpu_has_pge) + __pe += _PAGE_GLOBAL; + } else + __pe += __pa(__KERNEL_TEXT_OFFSET); + set_pmd(pmd, __pmd(__pe)); + __flush_tlb_all(); +#endif + _set_tssldt_desc(gdt_table+__LDT(n), (int)addr, ((size << 3)-1), 0x82); + +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + _set_tssldt_desc(gdt_table2+__LDT(n), (int)addr, ((size << 3)-1), 0x82); +#endif + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + __pe = __PAGE_KERNEL_RO + __pa(__KERNEL_TEXT_OFFSET); + if (cpu_has_pse) { + __pe += _PAGE_PSE; + if (cpu_has_pge) + __pe += _PAGE_GLOBAL; + } else + __pe += __pa(__KERNEL_TEXT_OFFSET); + set_pmd(pmd, __pmd(__pe)); + flush_tlb_all(); +#endif + } #ifdef CONFIG_X86_VISWS_APIC diff -urN linux-2.4.20/arch/i386/mm/fault.c linux-2.4.20/arch/i386/mm/fault.c --- linux-2.4.20/arch/i386/mm/fault.c 2002-11-28 18:53:09.000000000 -0500 +++ linux-2.4.20/arch/i386/mm/fault.c 2003-04-06 15:55:39.000000000 -0400 @@ -4,6 +4,7 @@ * Copyright (C) 1995 Linus Torvalds */ +#include #include #include #include @@ -19,6 +20,8 @@ #include #include #include /* For unblank_screen() */ +#include +#include #include #include @@ -127,6 +130,11 @@ asmlinkage void do_invalid_op(struct pt_regs *, unsigned long); extern unsigned long idt; +#if defined(CONFIG_GRKERNSEC_PAX_PAGEEXEC) || defined(CONFIG_GRKERNSEC_PAX_SEGMEXEC) +static void pax_report_fault(struct pt_regs *regs); +static int pax_handle_fetch_fault(struct pt_regs *regs); +#endif + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate @@ -137,23 +145,31 @@ * bit 1 == 0 means read, 1 means write * bit 2 == 0 means kernel, 1 means user-mode */ -asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) +#ifdef CONFIG_GRKERNSEC_PAX_PAGEEXEC +static int do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address) +#else +asmlinkage int do_page_fault(struct pt_regs *regs, unsigned long error_code) +#endif { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct * vma; +#ifndef CONFIG_GRKERNSEC_PAX_PAGEEXEC unsigned long address; +#endif unsigned long page; unsigned long fixup; int write; siginfo_t info; +#ifndef CONFIG_GRKERNSEC_PAX_PAGEEXEC /* get the address */ __asm__("movl %%cr2,%0":"=r" (address)); /* It's safe to allow irq's after cr2 has been saved */ - if (regs->eflags & X86_EFLAGS_IF) + if (likely(regs->eflags & X86_EFLAGS_IF)) local_irq_enable(); +#endif tsk = current; @@ -258,7 +274,7 @@ tsk->thread.screen_bitmap |= 1 << bit; } up_read(&mm->mmap_sem); - return; + return 0; /* * Something tried to access memory that isn't in our memory map.. @@ -269,6 +285,39 @@ /* User mode accesses just cause a SIGSEGV */ if (error_code & 4) { +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + if (current->flags & PF_PAX_SEGMEXEC) { + +#if defined(CONFIG_GRKERNSEC_PAX_EMUTRAMP) || defined(CONFIG_GRKERNSEC_PAX_RANDEXEC) + if ((error_code == 4) && (regs->eip + SEGMEXEC_TASK_SIZE == address)) { + switch (pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_GRKERNSEC_PAX_RANDEXEC + case 5: + return 0; +#endif + +#ifdef CONFIG_GRKERNSEC_PAX_EMUTRAMP + case 4: + return 0; + case 3: + case 2: + return 1; +#endif + + case 1: + default: + } + } +#endif + + if (address >= SEGMEXEC_TASK_SIZE) { + pax_report_fault(regs); + do_exit(SIGKILL); + } + } +#endif + tsk->thread.cr2 = address; tsk->thread.error_code = error_code; tsk->thread.trap_no = 14; @@ -277,7 +326,7 @@ /* info.si_code has been set above */ info.si_addr = (void *)address; force_sig_info(SIGSEGV, &info, tsk); - return; + return 0; } /* @@ -290,7 +339,7 @@ if (nr == 6) { do_invalid_op(regs, 0); - return; + return 0; } } @@ -298,7 +347,7 @@ /* Are we prepared to handle this kernel fault? */ if ((fixup = search_exception_table(regs->eip)) != 0) { regs->eip = fixup; - return; + return 0; } /* @@ -310,6 +359,18 @@ if (address < PAGE_SIZE) printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + else if (init_mm.start_code + __KERNEL_TEXT_OFFSET <= address && address < init_mm.end_code + __KERNEL_TEXT_OFFSET) { + if (tsk->curr_ip) + printk(KERN_ERR "PAX: From %u.%u.%u.%u: %s:%d, uid/euid: %u/%u, attempted to modify kernel code", + NIPQUAD(tsk->curr_ip), tsk->comm, tsk->pid, tsk->uid, tsk->euid); + else + printk(KERN_ERR "PAX: %s:%d, uid/euid: %u/%u, attempted to modify kernel code", + tsk->comm, tsk->pid, tsk->uid, tsk->euid); + } +#endif + else printk(KERN_ALERT "Unable to handle kernel paging request"); printk(" at virtual address %08lx\n",address); @@ -362,7 +423,7 @@ /* Kernel mode? Handle exceptions or die */ if (!(error_code & 4)) goto no_context; - return; + return 0; vmalloc_fault: { @@ -395,6 +456,369 @@ pte_k = pte_offset(pmd_k, address); if (!pte_present(*pte_k)) goto no_context; - return; + return 0; + } +} +#ifdef CONFIG_GRKERNSEC_PAX_PAGEEXEC +/* PaX: called with the page_table_lock spinlock held */ +static inline pte_t * pax_get_pte(struct mm_struct *mm, unsigned long address) +{ + pgd_t *pgd; + pmd_t *pmd; + + pgd = pgd_offset(mm, address); + if (!pgd || !pgd_present(*pgd)) + return 0; + pmd = pmd_offset(pgd, address); + if (!pmd || !pmd_present(*pmd)) + return 0; + return pte_offset(pmd, address); +} +#endif + +/* + * PaX: decide what to do with offenders (regs->eip = fault address) + * + * returns 1 when task should be killed + * 2 when sigreturn trampoline was detected + * 3 when rt_sigreturn trampoline was detected + * 4 when gcc trampoline was detected + * 5 when legitimate ET_EXEC was detected + */ +#if defined(CONFIG_GRKERNSEC_PAX_PAGEEXEC) || defined(CONFIG_GRKERNSEC_PAX_SEGMEXEC) +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ +#ifdef CONFIG_GRKERNSEC_PAX_EMUTRAMP + static const unsigned char trans[8] = {6, 1, 2, 0, 13, 5, 3, 4}; +#endif + int err; + +#ifdef CONFIG_GRKERNSEC_PAX_RANDEXEC + if (current->flags & PF_PAX_RANDEXEC) { + unsigned long esp_4; + if (regs->eip >= current->mm->start_code && + regs->eip < current->mm->end_code) + { + err = get_user(esp_4, (unsigned long*)(regs->esp-4UL)); + if (!err && esp_4 != regs->eip) { + regs->eip += current->mm->delta_exec; + return 5; + } + return 1; + } + } +#endif + +#ifdef CONFIG_GRKERNSEC_PAX_EMUTRAMP + +#ifndef CONFIG_GRKERNSEC_PAX_EMUSIGRT + if (!(current->flags & PF_PAX_EMUTRAMP)) + return 1; +#endif + + do { /* PaX: sigreturn emulation */ + unsigned char pop, mov; + unsigned short sys; + unsigned long nr; + + err = get_user(pop, (unsigned char *)(regs->eip)); + err |= get_user(mov, (unsigned char *)(regs->eip + 1)); + err |= get_user(nr, (unsigned long *)(regs->eip + 2)); + err |= get_user(sys, (unsigned short *)(regs->eip + 6)); + + if (err) + break; + + if (pop == 0x58 && + mov == 0xb8 && + nr == __NR_sigreturn && + sys == 0x80cd) + { + +#ifdef CONFIG_GRKERNSEC_PAX_EMUSIGRT + int sig; + struct k_sigaction *ka; + __sighandler_t handler; + + if (get_user(sig, (int *)regs->esp)) + return 1; + if (sig < 1 || sig > _NSIG || sig == SIGKILL || sig == SIGSTOP) + return 1; + ka = ¤t->sig->action[sig-1]; + handler = ka->sa.sa_handler; + if (handler == SIG_DFL || handler == SIG_IGN) { + if (!(current->flags & PF_PAX_EMUTRAMP)) + return 1; + } else if (ka->sa.sa_flags & SA_SIGINFO) + return 1; +#endif + + regs->esp += 4; + regs->eax = nr; + regs->eip += 8; + return 2; + } + } while (0); + + do { /* PaX: rt_sigreturn emulation */ + unsigned char mov; + unsigned short sys; + unsigned long nr; + + err = get_user(mov, (unsigned char *)(regs->eip)); + err |= get_user(nr, (unsigned long *)(regs->eip + 1)); + err |= get_user(sys, (unsigned short *)(regs->eip + 5)); + + if (err) + break; + + if (mov == 0xb8 && + nr == __NR_rt_sigreturn && + sys == 0x80cd) + { + +#ifdef CONFIG_GRKERNSEC_PAX_EMUSIGRT + int sig; + struct k_sigaction *ka; + __sighandler_t handler; + + if (get_user(sig, (int *)regs->esp)) + return 1; + if (sig < 1 || sig > _NSIG || sig == SIGKILL || sig == SIGSTOP) + return 1; + ka = ¤t->sig->action[sig-1]; + handler = ka->sa.sa_handler; + if (handler == SIG_DFL || handler == SIG_IGN) { + if (!(current->flags & PF_PAX_EMUTRAMP)) + return 1; + } else if (ka->sa.sa_flags & SA_SIGINFO) + return 1; +#endif + + regs->eax = nr; + regs->eip += 7; + return 3; + } + } while (0); + +#ifdef CONFIG_GRKERNSEC_PAX_EMUSIGRT + if (!(current->flags & PF_PAX_EMUTRAMP)) + return 1; +#endif + + do { /* PaX: gcc trampoline emulation #1 */ + unsigned char mov1, mov2; + unsigned short jmp; + unsigned long addr1, addr2, ret; + unsigned short call; + + err = get_user(mov1, (unsigned char *)regs->eip); + err |= get_user(addr1, (unsigned long *)(regs->eip + 1)); + err |= get_user(mov2, (unsigned char *)(regs->eip + 5)); + err |= get_user(addr2, (unsigned long *)(regs->eip + 6)); + err |= get_user(jmp, (unsigned short *)(regs->eip + 10)); + err |= get_user(ret, (unsigned long *)regs->esp); + + if (err) + break; + + err = get_user(call, (unsigned short *)(ret-2)); + if (err) + break; + + if ((mov1 & 0xF8) == 0xB8 && + (mov2 & 0xF8) == 0xB8 && + (mov1 & 0x07) != (mov2 & 0x07) && + (jmp & 0xF8FF) == 0xE0FF && + (mov2 & 0x07) == ((jmp>>8) & 0x07) && + (call & 0xF8FF) == 0xD0FF && + (regs->eip == ((unsigned long*)regs)[trans[(call>>8) & 0x07]])) + { + ((unsigned long *)regs)[trans[mov1 & 0x07]] = addr1; + ((unsigned long *)regs)[trans[mov2 & 0x07]] = addr2; + regs->eip = addr2; + return 4; + } + } while (0); + + do { /* PaX: gcc trampoline emulation #2 */ + unsigned char mov, jmp; + unsigned long addr1, addr2, ret; + unsigned short call; + + err = get_user(mov, (unsigned char *)regs->eip); + err |= get_user(addr1, (unsigned long *)(regs->eip + 1)); + err |= get_user(jmp, (unsigned char *)(regs->eip + 5)); + err |= get_user(addr2, (unsigned long *)(regs->eip + 6)); + err |= get_user(ret, (unsigned long *)regs->esp); + + if (err) + break; + + err = get_user(call, (unsigned short *)(ret-2)); + if (err) + break; + + if ((mov & 0xF8) == 0xB8 && + jmp == 0xE9 && + (call & 0xF8FF) == 0xD0FF && + (regs->eip == ((unsigned long*)regs)[trans[(call>>8) & 0x07]])) + { + ((unsigned long *)regs)[trans[mov & 0x07]] = addr1; + regs->eip += addr2 + 10; + return 4; + } + } while (0); +#endif + + return 1; /* PaX in action */ +} + +static void pax_report_fault(struct pt_regs *regs) +{ + struct task_struct *tsk = current; + struct mm_struct *mm = current->mm; + char* buffer = (char*)__get_free_page(GFP_ATOMIC); + char* path=NULL; + unsigned long i; + + if (buffer) { + struct vm_area_struct* vma; + + down_read(&mm->mmap_sem); + vma = mm->mmap; + while (vma) { + if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) { + break; + } + vma = vma->vm_next; + } + if (vma) + path = d_path(vma->vm_file->f_dentry, vma->vm_file->f_vfsmnt, buffer, PAGE_SIZE); + up_read(&mm->mmap_sem); + } + if (tsk->curr_ip) + printk(KERN_ERR "PAX: From %u.%u.%u.%u: terminating task: %.930s(%.16s):%d, uid/euid: %u/%u, " + "EIP: %08lX, ESP: %08lX\n", NIPQUAD(tsk->curr_ip), + path, tsk->comm, tsk->pid, tsk->uid, tsk->euid, + regs->eip, regs->esp); + else + printk(KERN_ERR "PAX: terminating task: %.930s(%.16s):%d, uid/euid: %u/%u, " + "EIP: %08lX, ESP: %08lX\n", path, tsk->comm, tsk->pid, + tsk->uid, tsk->euid, regs->eip, regs->esp); + + if (buffer) free_page((unsigned long)buffer); + printk(KERN_ERR "PAX: bytes at EIP: "); + for (i = 0; i < 20; i++) { + unsigned char c; + if (get_user(c, (unsigned char*)(regs->eip+i))) { + printk("."); + break; + } + printk("%02x ", c); + } + printk("\n"); + do_coredump(SIGKILL, regs); +} +#endif + +#ifdef CONFIG_GRKERNSEC_PAX_PAGEEXEC +/* + * PaX: handle the extra page faults or pass it down to the original handler + * + * returns 0 when nothing special was detected + * 1 when sigreturn trampoline (syscall) has to be emulated + */ +asmlinkage int pax_do_page_fault(struct pt_regs *regs, unsigned long error_code) +{ + struct mm_struct *mm = current->mm; + unsigned long address; + pte_t *pte; + unsigned char pte_mask; + int ret; + + __asm__("movl %%cr2,%0":"=r" (address)); + + /* It's safe to allow irq's after cr2 has been saved */ + if (regs->eflags & X86_EFLAGS_IF) + local_irq_enable(); + + if (unlikely((error_code & 5) != 5 || + address >= TASK_SIZE || + !(current->flags & PF_PAX_PAGEEXEC))) + return do_page_fault(regs, error_code, address); + + /* PaX: it's our fault, let's handle it if we can */ + + /* PaX: take a look at read faults before acquiring any locks */ + if (unlikely((error_code == 5) && (regs->eip == address))) { + /* instruction fetch attempt from a protected page in user mode */ + ret = pax_handle_fetch_fault(regs); + switch (ret) { +#ifdef CONFIG_GRKERNSEC_PAX_RANDEXEC + case 5: + return 0; +#endif + +#ifdef CONFIG_GRKERNSEC_PAX_EMUTRAMP + + case 4: + return 0; + case 3: + case 2: return 1; +#endif + case 1: + default: + pax_report_fault(regs); + do_exit(SIGKILL); + } + } + + pte_mask = _PAGE_ACCESSED | _PAGE_USER | ((error_code & 2) << (_PAGE_BIT_DIRTY-1)); + + spin_lock(&mm->page_table_lock); + pte = pax_get_pte(mm, address); + if (unlikely(!pte || !(pte_val(*pte) & _PAGE_PRESENT) || pte_exec(*pte))) { + spin_unlock(&mm->page_table_lock); + do_page_fault(regs, error_code, address); + return 0; + } + + if (unlikely((error_code == 7) && !pte_write(*pte))) { + /* write attempt to a protected page in user mode */ + spin_unlock(&mm->page_table_lock); + do_page_fault(regs, error_code, address); + return 0; } + + /* + * PaX: fill DTLB with user rights and retry + */ + __asm__ __volatile__ ( + "orb %2,%1\n" +#if defined(CONFIG_M586) || defined(CONFIG_M586TSC) +/* + * PaX: let this uncommented 'invlpg' remind us on the behaviour of Intel's + * (and AMD's) TLBs. namely, they do not cache PTEs that would raise *any* + * page fault when examined during a TLB load attempt. this is true not only + * for PTEs holding a non-present entry but also present entries that will + * raise a page fault (such as those set up by PaX, or the copy-on-write + * mechanism). in effect it means that we do *not* need to flush the TLBs + * for our target pages since their PTEs are simply not in the TLBs at all. + * the best thing in omitting it is that we gain around 15-20% speed in + * fast path of the page fault handler and can get rid of tracing since we + * can no longer flush unintended entries. + */ + + "invlpg %0\n" +#endif + + "testb $0,%0\n" + "xorb %3,%1\n" + : + : "m" (*(char*)address), "m" (*(char*)pte) , "q" (pte_mask) , "i" (_PAGE_USER) + : "memory", "cc"); + spin_unlock(&mm->page_table_lock); + return 0; } +#endif diff -urN linux-2.4.20/arch/i386/mm/init.c linux-2.4.20/arch/i386/mm/init.c --- linux-2.4.20/arch/i386/mm/init.c 2002-11-28 18:53:09.000000000 -0500 +++ linux-2.4.20/arch/i386/mm/init.c 2003-04-06 15:55:39.000000000 -0400 @@ -37,6 +37,8 @@ #include #include #include +#include +#include mmu_gather_t mmu_gathers[NR_CPUS]; unsigned long highstart_pfn, highend_pfn; @@ -589,6 +591,34 @@ totalram_pages++; } printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10); + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + /* PaX: limit KERNEL_CS to actual size */ + { + unsigned long limit; + + limit = ((unsigned long)&_etext + PAGE_SIZE - 1) >> PAGE_SHIFT; + gdt_table[2].a = (gdt_table[2].a & 0xFFFF0000UL) | (limit & 0x0FFFFUL); + gdt_table[2].b = (gdt_table[2].b & 0xFFF0FFFFUL) | (limit & 0xF0000UL); + +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + gdt_table2[2].a = (gdt_table2[2].a & 0xFFFF0000UL) | (limit & 0x0FFFFUL); + gdt_table2[2].b = (gdt_table2[2].b & 0xFFF0FFFFUL) | (limit & 0xF0000UL); +#endif + + /* PaX: make KERNEL_CS read-only */ + for (addr = __KERNEL_TEXT_OFFSET; addr < __KERNEL_TEXT_OFFSET + 0x00400000UL; addr += (1UL << PMD_SHIFT)) { + pgd_t *pgd; + pmd_t *pmd; + + pgd = pgd_offset_k(addr); + pmd = pmd_offset(pgd, addr); + set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_RW)); + } + flush_tlb_all(); + } +#endif + } #ifdef CONFIG_BLK_DEV_INITRD diff -urN linux-2.4.20/arch/i386/vmlinux.lds linux-2.4.20/arch/i386/vmlinux.lds --- linux-2.4.20/arch/i386/vmlinux.lds 2002-02-25 14:37:53.000000000 -0500 +++ linux-2.4.20/arch/i386/vmlinux.lds 1969-12-31 19:00:00.000000000 -0500 @@ -1,82 +0,0 @@ -/* ld script to make i386 Linux kernel - * Written by Martin Mares ; - */ -OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") -OUTPUT_ARCH(i386) -ENTRY(_start) -SECTIONS -{ - . = 0xC0000000 + 0x100000; - _text = .; /* Text and read-only data */ - .text : { - *(.text) - *(.fixup) - *(.gnu.warning) - } = 0x9090 - - _etext = .; /* End of text section */ - - .rodata : { *(.rodata) *(.rodata.*) } - .kstrtab : { *(.kstrtab) } - - . = ALIGN(16); /* Exception table */ - __start___ex_table = .; - __ex_table : { *(__ex_table) } - __stop___ex_table = .; - - __start___ksymtab = .; /* Kernel symbol table */ - __ksymtab : { *(__ksymtab) } - __stop___ksymtab = .; - - .data : { /* Data */ - *(.data) - CONSTRUCTORS - } - - _edata = .; /* End of data section */ - - . = ALIGN(8192); /* init_task */ - .data.init_task : { *(.data.init_task) } - - . = ALIGN(4096); /* Init code and data */ - __init_begin = .; - .text.init : { *(.text.init) } - .data.init : { *(.data.init) } - . = ALIGN(16); - __setup_start = .; - .setup.init : { *(.setup.init) } - __setup_end = .; - __initcall_start = .; - .initcall.init : { *(.initcall.init) } - __initcall_end = .; - . = ALIGN(4096); - __init_end = .; - - . = ALIGN(4096); - .data.page_aligned : { *(.data.idt) } - - . = ALIGN(32); - .data.cacheline_aligned : { *(.data.cacheline_aligned) } - - __bss_start = .; /* BSS */ - .bss : { - *(.bss) - } - _end = . ; - - /* Sections to be discarded */ - /DISCARD/ : { - *(.text.exit) - *(.data.exit) - *(.exitcall.exit) - } - - /* Stabs debugging sections. */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } -} diff -urN linux-2.4.20/arch/i386/vmlinux.lds.S linux-2.4.20/arch/i386/vmlinux.lds.S --- linux-2.4.20/arch/i386/vmlinux.lds.S 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.4.20/arch/i386/vmlinux.lds.S 2003-04-06 15:55:38.000000000 -0400 @@ -0,0 +1,141 @@ +/* ld script to make i386 Linux kernel + * Written by Martin Mares ; + */ +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(_start) +SECTIONS +{ + . = __PAGE_OFFSET + 0x100000; + .data.startup : { +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + LONG(startup_32 + __KERNEL_TEXT_OFFSET - __PAGE_OFFSET) +#else + LONG(startup_32 - __PAGE_OFFSET) +#endif + + SHORT(__KERNEL_CS) + SHORT(0) + } + + . = ALIGN(16); /* Exception table */ + __start___ex_table = .; + __ex_table : { *(__ex_table) } + __stop___ex_table = .; + + __start___ksymtab = .; /* Kernel symbol table */ + __ksymtab : { *(__ksymtab) } + __stop___ksymtab = .; + + .data : { /* Data */ + *(.data) + CONSTRUCTORS + } + + . = ALIGN(32); + .data.cacheline_aligned : { *(.data.cacheline_aligned) } + + .data.init_task : { + . = ALIGN(8192); + *(.data.init_task) + } + + .data.page_aligned : { + . = ALIGN(4096); + *(.data.swapper_pg_dir) + *(.data.pg0) + *(.data.pg1) + *(.data.pg2) + *(.data.empty_zero_page) + } + + _edata = .; /* End of data section */ + + __bss_start = .; /* BSS */ + .bss : { + *(.bss) + LONG(0) + } + __bss_end = . ; + + . = ALIGN(4096); /* Init code and data */ + __init_begin = .; + + .data.init : { *(.data.init) } + . = ALIGN(16); + __setup_start = .; + .setup.init : { *(.setup.init) } + __setup_end = .; + __initcall_start = .; + .initcall.init : { *(.initcall.init) } + __initcall_end = .; + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + __text_init_start = .; + .text.init (. - __KERNEL_TEXT_OFFSET) : AT (__text_init_start) { + *(.text.init) + . = ALIGN(4*1024*1024) - 1; + BYTE(0) + } + . += __KERNEL_TEXT_OFFSET; +#else + .text.init : { *(.text.init) } + . = ALIGN(4096); +#endif + + __init_end = .; + + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC +/* + * PaX: this must be kept in synch with the KERNEL_CS base + * in the GDTs in arch/i386/kernel/head.S + */ + _text = . - __KERNEL_TEXT_OFFSET; /* Text and read-only data */ + .text (. - __KERNEL_TEXT_OFFSET) : AT (_text + __KERNEL_TEXT_OFFSET) { +#else + _text = .; /* Text and read-only data */ + .text : { +#endif + + *(.text) + *(.fixup) + *(.gnu.warning) + . = ALIGN(4096); + } = 0x9090 + + _etext = .; /* End of text section */ + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + . += __KERNEL_TEXT_OFFSET; +#endif + + .rodata : { *(.rodata) *(.rodata.*) } + .rodata.page_aligned : { + . = ALIGN(4096); + *(.data.idt) + } + .kstrtab : { *(.kstrtab) } + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + _end = ALIGN(4*1024*1024); +#else + _end = .; +#endif + + /* Sections to be discarded */ + /DISCARD/ : { + *(.text.exit) + *(.data.exit) + *(.exitcall.exit) + } + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } +} diff -urN linux-2.4.20/arch/ia64/config.in linux-2.4.20/arch/ia64/config.in --- linux-2.4.20/arch/ia64/config.in 2002-11-28 18:53:09.000000000 -0500 +++ linux-2.4.20/arch/ia64/config.in 2003-04-06 15:55:39.000000000 -0400 @@ -293,3 +293,12 @@ fi endmenu + +mainmenu_option next_comment +comment 'Grsecurity' +bool 'Grsecurity' CONFIG_GRKERNSEC +if [ "$CONFIG_GRKERNSEC" = "y" ]; then + source grsecurity/Config.in +fi +endmenu + diff -urN linux-2.4.20/arch/ia64/kernel/process.c linux-2.4.20/arch/ia64/kernel/process.c --- linux-2.4.20/arch/ia64/kernel/process.c 2002-11-28 18:53:09.000000000 -0500 +++ linux-2.4.20/arch/ia64/kernel/process.c 2003-04-06 15:55:39.000000000 -0400 @@ -224,7 +224,7 @@ * | | <-- sp (lowest addr) * +---------------------+ * - * Note: if we get called through kernel_thread() then the memory + * Note: if we get called through arch_kernel_thread() then the memory * above "(highest addr)" is valid kernel stack memory that needs to * be copied as well. * @@ -479,7 +479,7 @@ } pid_t -kernel_thread (int (*fn)(void *), void *arg, unsigned long flags) +arch_kernel_thread (int (*fn)(void *), void *arg, unsigned long flags) { struct task_struct *parent = current; int result, tid; diff -urN linux-2.4.20/arch/ia64/kernel/ptrace.c linux-2.4.20/arch/ia64/kernel/ptrace.c --- linux-2.4.20/arch/ia64/kernel/ptrace.c 2002-08-02 20:39:42.000000000 -0400 +++ linux-2.4.20/arch/ia64/kernel/ptrace.c 2003-04-06 15:55:39.000000000 -0400 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -1118,6 +1119,9 @@ if (pid == 1) /* no messing around with init! */ goto out_tsk; + if (gr_handle_ptrace(child, request)) + goto out_tsk; + if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out_tsk; diff -urN linux-2.4.20/arch/ia64/kernel/sys_ia64.c linux-2.4.20/arch/ia64/kernel/sys_ia64.c --- linux-2.4.20/arch/ia64/kernel/sys_ia64.c 2002-11-28 18:53:09.000000000 -0500 +++ linux-2.4.20/arch/ia64/kernel/sys_ia64.c 2003-04-06 15:55:39.000000000 -0400 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -212,6 +213,11 @@ goto out; } + if (gr_handle_mmap(file, prot)) { + addr = -EACCES; + goto out; + } + down_write(¤t->mm->mmap_sem); addr = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); up_write(¤t->mm->mmap_sem); diff -urN linux-2.4.20/arch/m68k/config.in linux-2.4.20/arch/m68k/config.in --- linux-2.4.20/arch/m68k/config.in 2002-11-28 18:53:09.000000000 -0500 +++ linux-2.4.20/arch/m68k/config.in 2003-04-06 15:55:39.000000000 -0400 @@ -558,3 +558,11 @@ endmenu source lib/Config.in + +mainmenu_option next_comment +comment 'Grsecurity' +bool 'Grsecurity' CONFIG_GRKERNSEC +if [ "$CONFIG_GRKERNSEC" = "y" ]; then + source grsecurity/Config.in +fi +endmenu diff -urN linux-2.4.20/arch/m68k/kernel/process.c linux-2.4.20/arch/m68k/kernel/process.c --- linux-2.4.20/arch/m68k/kernel/process.c 2002-08-02 20:39:43.000000000 -0400 +++ linux-2.4.20/arch/m68k/kernel/process.c 2003-04-06 15:55:39.000000000 -0400 @@ -124,7 +124,7 @@ /* * Create a kernel thread */ -int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) +int arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) { int pid; mm_segment_t fs; diff -urN linux-2.4.20/arch/mips/config.in linux-2.4.20/arch/mips/config.in --- linux-2.4.20/arch/mips/config.in 2002-11-28 18:53:09.000000000 -0500 +++ linux-2.4.20/arch/mips/config.in 2003-04-06 15:55:39.000000000 -0400 @@ -7,3 +7,11 @@ define_bool CONFIG_MIPS64 n source arch/mips/config-shared.in + +mainmenu_option next_comment +comment 'Grsecurity' +bool 'Grsecurity' CONFIG_GRKERNSEC +if [ "$CONFIG_GRKERNSEC" = "y" ]; then + source grsecurity/Config.in +fi +endmenu diff -urN linux-2.4.20/arch/mips/kernel/process.c linux-2.4.20/arch/mips/kernel/process.c --- linux-2.4.20/arch/mips/kernel/process.c 2002-11-28 18:53:10.000000000 -0500 +++ linux-2.4.20/arch/mips/kernel/process.c 2003-04-06 15:55:39.000000000 -0400 @@ -152,7 +152,7 @@ /* * Create a kernel thread */ -int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) +int arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) { long retval; diff -urN linux-2.4.20/arch/mips64/config.in linux-2.4.20/arch/mips64/config.in --- linux-2.4.20/arch/mips64/config.in 2002-11-28 18:53:10.000000000 -0500 +++ linux-2.4.20/arch/mips64/config.in 2003-04-06 15:55:39.000000000 -0400 @@ -7,3 +7,11 @@ define_bool CONFIG_MIPS64 y source arch/mips/config-shared.in + +mainmenu_option next_comment +comment 'Grsecurity' +bool 'Grsecurity' CONFIG_GRKERNSEC +if [ "$CONFIG_GRKERNSEC" = "y" ]; then + source grsecurity/Config.in +fi +endmenu diff -urN linux-2.4.20/arch/mips64/kernel/process.c linux-2.4.20/arch/mips64/kernel/process.c --- linux-2.4.20/arch/mips64/kernel/process.c 2002-11-28 18:53:10.000000000 -0500 +++ linux-2.4.20/arch/mips64/kernel/process.c 2003-04-06 15:55:39.000000000 -0400 @@ -151,7 +151,7 @@ /* * Create a kernel thread */ -int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) +int arch_kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) { int retval; diff -urN linux-2.4.20/arch/parisc/config.in linux-2.4.20/arch/parisc/config.in --- linux-2.4.20/arch/parisc/config.in 2002-11-28 18:53:10.000000000 -0500 +++ linux-2.4.20/arch/parisc/config.in 2003-04-06 15:55:39.000000000 -0400 @@ -197,3 +197,11 @@ endmenu source lib/Config.in + +mainmenu_option next_comment +comment 'Grsecurity' +bool 'Grsecurity' CONFIG_GRKERNSEC +if [ "$CONFIG_GRKERNSEC" = "y" ]; then + source grsecurity/Config.in +fi +endmenu diff -urN linux-2.4.20/arch/parisc/kernel/ioctl32.c linux-2.4.20/arch/parisc/kernel/ioctl32.c --- linux-2.4.20/arch/parisc/kernel/ioctl32.c 2002-11-28 18:53:10.000000000 -0500 +++ linux-2.4.20/arch/parisc/kernel/ioctl32.c 2003-04-06 15:55:39.000000000 -0400 @@ -1434,7 +1434,11 @@ * To have permissions to do most of the vt ioctls, we either have * to be the owner of the tty, or super-user. */ +#ifdef CONFIG_GRKERNSEC + if (current->tty == tty || capable(CAP_SYS_TTY_CONFIG)) +#else if (current->tty == tty || suser()) +#endif return 1; return 0; } diff -urN linux-2.4.20/arch/parisc/kernel/process.c linux-2.4.20/arch/parisc/kernel/process.c --- linux-2.4.20/arch/parisc/kernel/process.c 2002-11-28 18:53:10.000000000 -0500 +++ linux-2.4.20/arch/parisc/kernel/process.c 2003-04-06 15:55:39.000000000 -0400 @@ -163,7 +163,7 @@ */ extern pid_t __kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); -pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) +pid_t arch_kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) { /* diff -urN linux-2.4.20/arch/parisc/kernel/ptrace.c linux-2.4.20/arch/parisc/kernel/ptrace.c --- linux-2.4.20/arch/parisc/kernel/ptrace.c 2002-11-28 18:53:10.000000000 -0500 +++ linux-2.4.20/arch/parisc/kernel/ptrace.c 2003-04-06 15:55:39.000000000 -0400 @@ -15,7 +15,7 @@ #include #include #include - +#include #include #include #include @@ -119,6 +119,9 @@ if (pid == 1) /* no messing around with init! */ goto out_tsk; + if (gr_handle_ptrace(child, request)) + goto out_tsk; + if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out_tsk; diff -urN linux-2.4.20/arch/parisc/kernel/sys_parisc.c linux-2.4.20/arch/parisc/kernel/sys_parisc.c --- linux-2.4.20/arch/parisc/kernel/sys_parisc.c 2002-11-28 18:53:10.000000000 -0500 +++ linux-2.4.20/arch/parisc/kernel/sys_parisc.c 2003-04-06 15:55:39.000000000 -0400 @@ -12,6 +12,7 @@ #include #include #include +#include int sys_pipe(int *fildes) { @@ -90,6 +91,11 @@ inode = filp->f_dentry->d_inode; } +#ifdef CONFIG_GRKERNSEC_PAX_RANDMMAP + if ((current->flags & PF_PAX_RANDMMAP) && (!addr || filp)) + addr = TASK_UNMAPPED_BASE + current->mm->delta_mmap; +#endif + if (inode && (flags & MAP_SHARED) && (inode->i_mapping->i_mmap_shared)) { addr = get_shared_area(inode, addr, len, pgoff); } else { @@ -104,12 +110,23 @@ { struct file * file = NULL; unsigned long error = -EBADF; + +#ifdef CONFIG_GRKERNSEC_PAX_RANDEXEC + if (flags & MAP_MIRROR) + return -EINVAL; +#endif + if (!(flags & MAP_ANONYMOUS)) { file = fget(fd); if (!file) goto out; } + if (gr_handle_mmap(file, prot)) { + fput(file); + return -EACCES; + } + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); down_write(¤t->mm->mmap_sem); diff -urN linux-2.4.20/arch/parisc/kernel/sys_parisc32.c linux-2.4.20/arch/parisc/kernel/sys_parisc32.c --- linux-2.4.20/arch/parisc/kernel/sys_parisc32.c 2002-11-28 18:53:10.000000000 -0500 +++ linux-2.4.20/arch/parisc/kernel/sys_parisc32.c 2003-04-06 15:55:39.000000000 -0400 @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -184,6 +185,20 @@ if (IS_ERR(file)) return retval; + gr_learn_resource(current, RLIMIT_NPROC, atomic_read(¤t->user->processes)); + + if (gr_handle_nproc()) { + allow_write_access(file); + fput(file); + return -EAGAIN; + } + + if (!gr_acl_handle_execve(file->f_dentry, file->f_vfsmnt)) { + allow_write_access(file); + fput(file); + return -EACCES; + } + bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0])); @@ -209,11 +224,26 @@ if (retval < 0) goto out; + if (!gr_tpe_allow(file)) { + retval = -EACCES; + goto out; + } + + if (gr_check_crash_exec(file)) { + retval = -EACCES; + goto out; + } + retval = copy_strings_kernel(1, &bprm.filename, &bprm); if (retval < 0) goto out; bprm.exec = bprm.p; + + gr_set_proc_label(file->f_dentry, file->f_vfsmnt); + + gr_log_chroot_exec(file->f_dentry, file->f_vfsmnt); + retval = copy_strings32(bprm.envc, envp, &bprm); if (retval < 0) goto out; diff -urN linux-2.4.20/arch/parisc/mm/fault.c linux-2.4.20/arch/parisc/mm/fault.c --- linux-2.4.20/arch/parisc/mm/fault.c 2002-11-28 18:53:10.000000000 -0500 +++ linux-2.4.20/arch/parisc/mm/fault.c 2003-04-06 15:55:39.000000000 -0400 @@ -139,6 +139,83 @@ } #endif +/* + * PaX: decide what to do with offenders (instruction_pointer(regs) = fault address) + * + * returns 1 when task should be killed + * 2 when legitimate ET_EXEC was detected + */ +#ifdef CONFIG_GRKERNSEC_PAX_PAGEEXEC +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + int err; + +#ifdef CONFIG_GRKERNSEC_PAX_RANDEXEC + if (current->flags & PF_PAX_RANDEXEC) { + if (instruction_pointer(regs) >= current->mm->start_code && + instruction_pointer(regs) < current->mm->end_code) + { + if (regs->gr[22] == instruction_pointer(regs)) + return 1; + + regs->iaoq[0] += current->mm->delta_exec; + if ((regs->iaoq[1] & ~3UL) >= current->mm->start_code && + (regs->iaoq[1] & ~3UL) < current->mm->end_code) + regs->iaoq[1] += current->mm->delta_exec; + return 2; + } + } +#endif + + return 1; +} + +static void pax_report_fault(struct pt_regs *regs) +{ + struct task_struct *tsk = current; + struct mm_struct *mm = current->mm; + char* buffer = (char*)__get_free_page(GFP_ATOMIC); + char* path=NULL; + unsigned long i; + + if (buffer) { + struct vm_area_struct* vma; + + down_read(&mm->mmap_sem); + vma = mm->mmap; + while (vma) { + if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) { + break; + } + vma = vma->vm_next; + } + if (vma) + path = d_path(vma->vm_file->f_dentry, vma->vm_file->f_vfsmnt, buffer, PAGE_SIZE); + up_read(&mm->mmap_sem); + } + if (tsk->curr_ip) + printk(KERN_ERR "PAX: From %u.%u.%u.%u: terminating task: %s(%s):%d, uid/euid: %u/%u, " + "PC: %016lX, SP: %016lX\n", NIPQUAD(tsk->curr_ip), path, tsk->comm, tsk->pid, + tsk->uid, tsk->euid, instruction_pointer(regs), regs->gr[30]); + else + printk(KERN_ERR "PAX: terminating task: %s(%s):%d, uid/euid: %u/%u, " + "PC: %016lX, SP: %016lX\n", path, tsk->comm, tsk->pid, + tsk->uid, tsk->euid, instruction_pointer(regs), regs->gr[30]); + if (buffer) free_page((unsigned long)buffer); + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)(instruction_pointer(regs)+(i*4)))) { + printk("."); + break; + } + printk("%08x ", c); + } + printk("\n"); + do_coredump(SIGKILL, regs); +} +#endif + void do_page_fault(struct pt_regs *regs, unsigned long code, unsigned long address) { @@ -164,8 +241,27 @@ acc_type = parisc_acctyp(code,regs->iir); - if ((vma->vm_flags & acc_type) != acc_type) + if ((vma->vm_flags & acc_type) != acc_type) { + +#ifdef CONFIG_GRKERNSEC_PAX_PAGEEXEC + if ((current->flags & PF_PAX_PAGEEXEC) && (acc_type & VM_EXEC) && + address == instruction_pointer(regs)) { + up_read(&mm->mmap_sem); + switch(pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_GRKERNSEC_PAX_RANDEXEC + case 2: + return; +#endif + + } + pax_report_fault(regs); + do_exit(SIGKILL); + } +#endif + goto bad_area; + } /* * If for any reason at all we couldn't handle the fault, make diff -urN linux-2.4.20/arch/ppc/config.in linux-2.4.20/arch/ppc/config.in --- linux-2.4.20/arch/ppc/config.in 2002-11-28 18:53:11.000000000 -0500 +++ linux-2.4.20/arch/ppc/config.in 2003-04-06 15:55:39.000000000 -0400 @@ -425,3 +425,12 @@ fi fi endmenu + +mainmenu_option next_comment +comment 'Grsecurity' +bool 'Grsecurity' CONFIG_GRKERNSEC +if [ "$CONFIG_GRKERNSEC" = "y" ]; then + source grsecurity/Config.in +fi +endmenu + diff -urN linux-2.4.20/arch/ppc/kernel/misc.S linux-2.4.20/arch/ppc/kernel/misc.S --- linux-2.4.20/arch/ppc/kernel/misc.S 2002-11-28 18:53:11.000000000 -0500 +++ linux-2.4.20/arch/ppc/kernel/misc.S 2003-04-06 15:55:39.000000000 -0400 @@ -898,9 +898,9 @@ /* * Create a kernel thread - * kernel_thread(fn, arg, flags) + * arch_kernel_thread(fn, arg, flags) */ -_GLOBAL(kernel_thread) +_GLOBAL(arch_kernel_thread) mr r6,r3 /* function */ ori r3,r5,CLONE_VM /* flags */ li r0,__NR_clone diff -urN linux-2.4.20/arch/ppc/kernel/process.c linux-2.4.20/arch/ppc/kernel/process.c --- linux-2.4.20/arch/ppc/kernel/process.c 2001-11-26 08:29:17.000000000 -0500 +++ linux-2.4.20/arch/ppc/kernel/process.c 2003-04-06 15:55:39.000000000 -0400 @@ -185,6 +185,25 @@ return 1; } +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC +void pax_switch_segments(struct task_struct * tsk) +{ + if (tsk->flags & PF_PAX_SEGMEXEC) { + li r0,16 /* load up segment register values */ + mtctr r0 /* for context 0 */ + lis r3,0x2000 /* Ku = 1, VSID = 0 */ + li r4,0 +3: mtsrin r3,r4 + addi r3,r3,0x111 /* increment VSID */ + addis r4,r4,0x1000 /* address of next segment */ + bdnz 3b + __asm__ __volatile__("lgdt %0": "=m" (gdt_descr2)); + } else { + __asm__ __volatile__("lgdt %0": "=m" (gdt_descr)); + } +} +#endif + void _switch_to(struct task_struct *prev, struct task_struct *new, struct task_struct **last) @@ -228,6 +247,10 @@ #endif /* CONFIG_ALTIVEC */ #endif /* CONFIG_SMP */ +#ifdef CONFIG_GRKERNSEC_SEGMEXEC + pax_switch_segments(next_p); +#endif + current_set[smp_processor_id()] = new; /* Avoid the trap. On smp this this never happens since diff -urN linux-2.4.20/arch/ppc/kernel/ptrace.c linux-2.4.20/arch/ppc/kernel/ptrace.c --- linux-2.4.20/arch/ppc/kernel/ptrace.c 2002-11-28 18:53:11.000000000 -0500 +++ linux-2.4.20/arch/ppc/kernel/ptrace.c 2003-04-06 15:55:39.000000000 -0400 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -185,6 +186,9 @@ if (pid == 1) /* you may not mess with init */ goto out_tsk; + if (gr_handle_ptrace(child, request)) + goto out_tsk; + if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out_tsk; diff -urN linux-2.4.20/arch/ppc/kernel/syscalls.c linux-2.4.20/arch/ppc/kernel/syscalls.c --- linux-2.4.20/arch/ppc/kernel/syscalls.c 2002-08-02 20:39:43.000000000 -0400 +++ linux-2.4.20/arch/ppc/kernel/syscalls.c 2003-04-06 15:55:39.000000000 -0400 @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -196,14 +197,25 @@ struct file * file = NULL; int ret = -EBADF; +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + if (flags & MAP_MIRROR) + return -EINVAL; +#endif + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); if (!(flags & MAP_ANONYMOUS)) { if (!(file = fget(fd))) goto out; } + if (gr_handle_mmap(file, prot)) { + fput(file); + ret = -EACCES; + goto out; + } + down_write(¤t->mm->mmap_sem); - ret = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + ret = do_mmap(file, addr, len, prot, flags, pgoff << PAGE_SHIFT); up_write(¤t->mm->mmap_sem); if (file) fput(file); diff -urN linux-2.4.20/arch/ppc64/kernel/ioctl32.c linux-2.4.20/arch/ppc64/kernel/ioctl32.c --- linux-2.4.20/arch/ppc64/kernel/ioctl32.c 2002-11-28 18:53:11.000000000 -0500 +++ linux-2.4.20/arch/ppc64/kernel/ioctl32.c 2003-04-06 15:55:39.000000000 -0400 @@ -1761,7 +1761,11 @@ * To have permissions to do most of the vt ioctls, we either have * to be the owner of the tty, or super-user. */ +#ifdef CONFIG_GRKERNSEC + if (current->tty == tty || capable(CAP_SYS_TTY_CONFIG)) +#else if (current->tty == tty || suser()) +#endif return 1; return 0; } diff -urN linux-2.4.20/arch/ppc64/kernel/misc.S linux-2.4.20/arch/ppc64/kernel/misc.S --- linux-2.4.20/arch/ppc64/kernel/misc.S 2002-11-28 18:53:11.000000000 -0500 +++ linux-2.4.20/arch/ppc64/kernel/misc.S 2003-04-06 15:55:39.000000000 -0400 @@ -493,9 +493,9 @@ /* * Create a kernel thread - * kernel_thread(fn, arg, flags) + * arch_kernel_thread(fn, arg, flags) */ -_GLOBAL(kernel_thread) +_GLOBAL(arch_kernel_thread) mr r6,r3 /* function */ ori r3,r5,CLONE_VM /* flags */ li r0,__NR_clone diff -urN linux-2.4.20/arch/s390/config.in linux-2.4.20/arch/s390/config.in --- linux-2.4.20/arch/s390/config.in 2002-11-28 18:53:11.000000000 -0500 +++ linux-2.4.20/arch/s390/config.in 2003-04-06 15:55:39.000000000 -0400 @@ -76,3 +76,11 @@ endmenu source lib/Config.in + +mainmenu_option next_comment +comment 'Grsecurity' +bool 'Grsecurity' CONFIG_GRKERNSEC +if [ "$CONFIG_GRKERNSEC" = "y" ]; then + source grsecurity/Config.in +fi +endmenu diff -urN linux-2.4.20/arch/s390/kernel/process.c linux-2.4.20/arch/s390/kernel/process.c --- linux-2.4.20/arch/s390/kernel/process.c 2002-08-02 20:39:43.000000000 -0400 +++ linux-2.4.20/arch/s390/kernel/process.c 2003-04-06 15:55:39.000000000 -0400 @@ -105,7 +105,7 @@ show_trace((unsigned long *) regs->gprs[15]); } -int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) +int arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) { int clone_arg = flags | CLONE_VM; int retval; diff -urN linux-2.4.20/arch/s390x/config.in linux-2.4.20/arch/s390x/config.in --- linux-2.4.20/arch/s390x/config.in 2002-11-28 18:53:11.000000000 -0500 +++ linux-2.4.20/arch/s390x/config.in 2003-04-06 15:55:39.000000000 -0400 @@ -80,3 +80,11 @@ endmenu source lib/Config.in + +mainmenu_option next_comment +comment 'Grsecurity' +bool 'Grsecurity' CONFIG_GRKERNSEC +if [ "$CONFIG_GRKERNSEC" = "y" ]; then + source grsecurity/Config.in +fi +endmenu diff -urN linux-2.4.20/arch/s390x/kernel/process.c linux-2.4.20/arch/s390x/kernel/process.c --- linux-2.4.20/arch/s390x/kernel/process.c 2002-11-28 18:53:11.000000000 -0500 +++ linux-2.4.20/arch/s390x/kernel/process.c 2003-04-06 15:55:39.000000000 -0400 @@ -102,7 +102,7 @@ show_trace((unsigned long *) regs->gprs[15]); } -int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) +int arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) { int clone_arg = flags | CLONE_VM; int retval; diff -urN linux-2.4.20/arch/sh/config.in linux-2.4.20/arch/sh/config.in --- linux-2.4.20/arch/sh/config.in 2002-11-28 18:53:11.000000000 -0500 +++ linux-2.4.20/arch/sh/config.in 2003-04-06 15:55:39.000000000 -0400 @@ -388,3 +388,11 @@ endmenu source lib/Config.in + +mainmenu_option next_comment +comment 'Grsecurity' +bool 'Grsecurity' CONFIG_GRKERNSEC +if [ "$CONFIG_GRKERNSEC" = "y" ]; then + source grsecurity/Config.in +fi +endmenu diff -urN linux-2.4.20/arch/sh/kernel/process.c linux-2.4.20/arch/sh/kernel/process.c --- linux-2.4.20/arch/sh/kernel/process.c 2001-10-15 16:36:48.000000000 -0400 +++ linux-2.4.20/arch/sh/kernel/process.c 2003-04-06 15:55:39.000000000 -0400 @@ -118,7 +118,7 @@ * This is the mechanism for creating a new kernel thread. * */ -int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) +int arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) { /* Don't use this in BL=1(cli). Or else, CPU resets! */ register unsigned long __sc0 __asm__ ("r0"); register unsigned long __sc3 __asm__ ("r3") = __NR_clone; diff -urN linux-2.4.20/arch/sparc/config.in linux-2.4.20/arch/sparc/config.in --- linux-2.4.20/arch/sparc/config.in 2002-11-28 18:53:12.000000000 -0500 +++ linux-2.4.20/arch/sparc/config.in 2003-04-06 15:55:39.000000000 -0400 @@ -268,3 +268,11 @@ endmenu source lib/Config.in + +mainmenu_option next_comment +comment 'Grsecurity' +bool 'Grsecurity' CONFIG_GRKERNSEC +if [ "$CONFIG_GRKERNSEC" = "y" ]; then + source grsecurity/Config.in +fi +endmenu diff -urN linux-2.4.20/arch/sparc/kernel/process.c linux-2.4.20/arch/sparc/kernel/process.c --- linux-2.4.20/arch/sparc/kernel/process.c 2002-08-02 20:39:43.000000000 -0400 +++ linux-2.4.20/arch/sparc/kernel/process.c 2003-04-06 15:55:39.000000000 -0400 @@ -676,7 +676,7 @@ * a system call from a "real" process, but the process memory space will * not be free'd until both the parent and the child have exited. */ -pid_t kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) +pid_t arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) { long retval; diff -urN linux-2.4.20/arch/sparc/kernel/ptrace.c linux-2.4.20/arch/sparc/kernel/ptrace.c --- linux-2.4.20/arch/sparc/kernel/ptrace.c 2002-08-02 20:39:43.000000000 -0400 +++ linux-2.4.20/arch/sparc/kernel/ptrace.c 2003-04-06 15:55:39.000000000 -0400 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -310,6 +311,9 @@ goto out; } + if(gr_handle_ptrace(child, request)) + goto out_tsk; + if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH) || (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) { if (ptrace_attach(child)) { diff -urN linux-2.4.20/arch/sparc/kernel/sys_sparc.c linux-2.4.20/arch/sparc/kernel/sys_sparc.c --- linux-2.4.20/arch/sparc/kernel/sys_sparc.c 2001-04-13 23:15:55.000000000 -0400 +++ linux-2.4.20/arch/sparc/kernel/sys_sparc.c 2003-04-06 15:55:39.000000000 -0400 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -54,6 +55,13 @@ return -ENOMEM; if (ARCH_SUN4C_SUN4 && len > 0x20000000) return -ENOMEM; + +#ifdef CONFIG_GRKERNSEC_PAX_RANDMMAP + if ((current->flags & PF_PAX_RANDMMAP) && (!addr || filp)) + addr = TASK_UNMAPPED_BASE + current->mm->delta_mmap; + else +#endif + if (!addr) addr = TASK_UNMAPPED_BASE; @@ -222,6 +230,11 @@ struct file * file = NULL; unsigned long retval = -EBADF; +#ifdef CONFIG_GRKERNSEC_PAX_RANDEXEC + if (flags & MAP_MIRROR) + return -EINVAL; +#endif + if (!(flags & MAP_ANONYMOUS)) { file = fget(fd); if (!file) @@ -240,6 +253,12 @@ if (len > TASK_SIZE - PAGE_SIZE || addr + len > TASK_SIZE - PAGE_SIZE) goto out_putf; + if (gr_handle_mmap(file, prot)) { + fput(file); + retval = -EACCES; + goto out; + } + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); down_write(¤t->mm->mmap_sem); diff -urN linux-2.4.20/arch/sparc/kernel/sys_sunos.c linux-2.4.20/arch/sparc/kernel/sys_sunos.c --- linux-2.4.20/arch/sparc/kernel/sys_sunos.c 2002-11-28 18:53:12.000000000 -0500 +++ linux-2.4.20/arch/sparc/kernel/sys_sunos.c 2003-04-06 15:55:39.000000000 -0400 @@ -68,6 +68,11 @@ struct file * file = NULL; unsigned long retval, ret_type; +#ifdef CONFIG_GRKERNSEC_PAX_RANDEXEC + if (flags & MAP_MIRROR) + return -EINVAL; +#endif + if(flags & MAP_NORESERVE) { static int cnt; if (cnt++ < 10) diff -urN linux-2.4.20/arch/sparc/mm/fault.c linux-2.4.20/arch/sparc/mm/fault.c --- linux-2.4.20/arch/sparc/mm/fault.c 2001-12-21 12:41:53.000000000 -0500 +++ linux-2.4.20/arch/sparc/mm/fault.c 2003-04-06 15:55:39.000000000 -0400 @@ -19,6 +19,9 @@ #include #include #include +#include +#include +#include #include #include @@ -200,6 +203,291 @@ return 0; } +#ifdef CONFIG_GRKERNSEC_PAX_PAGEEXEC +void pax_emuplt_close(struct vm_area_struct * vma) +{ + vma->vm_mm->call_dl_resolve = 0UL; +} + +static struct page* pax_emuplt_nopage(struct vm_area_struct *vma, unsigned long address, int write_access) +{ + struct page* page; + unsigned int *kaddr; + + page = alloc_page(GFP_HIGHUSER); + if (!page) + return page; + + kaddr = kmap(page); + memset(kaddr, 0, PAGE_SIZE); + kaddr[0] = 0x9DE3BFA8U; /* save */ + flush_dcache_page(page); + kunmap(page); + return page; +} + +static struct vm_operations_struct pax_vm_ops = { + close: pax_emuplt_close, + nopage: pax_emuplt_nopage, +}; + +static void pax_insert_vma(struct vm_area_struct *vma, unsigned long addr) +{ + vma->vm_mm = current->mm; + vma->vm_start = addr; + vma->vm_end = addr + PAGE_SIZE; + vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC; + vma->vm_page_prot = protection_map[vma->vm_flags & 0x0f]; + vma->vm_ops = &pax_vm_ops; + vma->vm_pgoff = 0UL; + vma->vm_file = NULL; + vma->vm_private_data = NULL; + insert_vm_struct(current->mm, vma); + ++current->mm->total_vm; +} + +/* + * PaX: decide what to do with offenders (regs->pc = fault address) + * + * returns 1 when task should be killed + * 2 when patched PLT trampoline was detected + * 3 when unpatched PLT trampoline was detected + * 4 when legitimate ET_EXEC was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + int err; + +#ifdef CONFIG_GRKERNSEC_PAX_RANDEXEC + if (current->flags & PF_PAX_RANDEXEC) { + if (regs->pc >= current->mm->start_code && + regs->pc < current->mm->end_code) + { + if (regs->u_regs[UREG_RETPC] + 8UL == regs->pc) + return 1; + + regs->pc += current->mm->delta_exec; + if (regs->npc >= current->mm->start_code && + regs->npc < current->mm->end_code) + regs->npc += current->mm->delta_exec; + return 4; + } + if (regs->pc >= current->mm->start_code + current->mm->delta_exec && + regs->pc < current->mm->end_code + current->mm->delta_exec) + { + regs->pc -= current->mm->delta_exec; + if (regs->npc >= current->mm->start_code + current->mm->delta_exec && + regs->npc < current->mm->end_code + current->mm->delta_exec) + regs->npc -= current->mm->delta_exec; + } + } +#endif + +#ifdef CONFIG_GRKERNSEC_PAX_EMUPLT + do { /* PaX: patched PLT emulation #1 */ + unsigned int sethi1, sethi2, jmpl; + + err = get_user(sethi1, (unsigned i