diff -urNp linux-2.4.29/Documentation/Configure.help linux-2.4.29/Documentation/Configure.help --- linux-2.4.29/Documentation/Configure.help 2005-01-16 23:04:12.607971384 -0500 +++ linux-2.4.29/Documentation/Configure.help 2005-01-16 23:05:32.286858352 -0500 @@ -2899,6 +2899,20 @@ CONFIG_IP_NF_MATCH_PKTTYPE If you want to compile it as a module, say M here and read Documentation/modules.txt. If unsure, say `N'. +stealth networking support +CONFIG_IP_NF_MATCH_STEALTH + Enabling this option will drop all syn packets coming to unserved tcp + ports as well as all packets coming to unserved udp ports. If you + are using your system to route any type of packets (ie. via NAT) + you should put this module at the end of your ruleset, since it will + drop packets that aren't going to ports that are listening on your + machine itself, it doesn't take into account that the packet might be + destined for someone on your internal network if you're using NAT for + instance. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + MAC address match support CONFIG_IP_NF_MATCH_MAC MAC matching allows you to match packets based on the source @@ -23427,6 +23441,960 @@ CONFIG_CF_AREA5 "Area6" will work for most boards. For ADX, select "Area5". +Grsecurity +CONFIG_GRKERNSEC + If you say Y here, you will be able to configure many features that + will enhance the security of your system. It is highly recommended + that you say Y here and read through the help for each option so + you fully understand the features and can evaluate their usefulness + for your machine. + +Additional security levels +CONFIG_GRKERNSEC_LOW + + Low additional security + ----------------------------------------------------------------------- + If you choose this option, several of the grsecurity options will + be enabled that will give you greater protection against a number + of attacks, while assuring that none of your software will have any + conflicts with the additional security measures. If you run a lot of + unusual software, or you are having problems with the higher security + levels, you should say Y here. With this option, the following features + are enabled: + + linking restrictions + fifo restrictions + random pids + enforcing nproc on execve() + restricted dmesg + random ip ids + enforced chdir("/") on chroot + + Medium additional security + ----------------------------------------------------------------------- + If you say Y here, several features in addition to those included in the + low additional security level will be enabled. These features provide + even more security to your system, though in rare cases they may + be incompatible with very old or poorly written software. If you + enable this option, make sure that your auth service (identd) is + running as gid 10 (usually group wheel). With this option the following + features (in addition to those provided in the low additional security + level) will be enabled: + + random tcp source ports + failed fork logging + time change logging + signal logging + deny mounts in chroot + deny double chrooting + deny sysctl writes in chroot + deny mknod in chroot + deny access to abstract AF_UNIX sockets out of chroot + deny pivot_root in chroot + denied writes of /dev/kmem, /dev/mem, and /dev/port + /proc restrictions with special gid set to 10 (usually wheel) + address space layout randomization + removal of addresses from /proc//[maps|stat] + + High additional security + ---------------------------------------------------------------------- + If you say Y here, many of the features of grsecurity will be enabled, + that will protect you against many kinds of attacks against + your system. The heightened security comes at a cost of an + increased chance of incompatibilities with rare software on your + machine. Since this security level enables PaX, you should view + and read about the PaX project. While + you are there, download chpax and run it on binaries that cause + problems with PaX. Also remember that since the /proc restrictions are + enabled, you must run your identd as group wheel (gid 10). + This security level enables the following features in addition to those + listed in the low and medium security levels: + + additional /proc restrictions + chmod restrictions in chroot + no signals, ptrace, or viewing processes outside of chroot + capability restrictions in chroot + deny fchdir out of chroot + priority restrictions in chroot + segmentation-based implementation of PaX + mprotect restrictions + kernel stack randomization + mount/unmount/remount logging + kernel symbol hiding + destroy unused shared memory + +Customized additional security +CONFIG_GRKERNSEC_CUSTOM + If you say Y here, you will be able to configure every grsecurity + option, which allows you to enable many more features that aren't + covered in the basic security levels. These additional features include + TPE, socket restrictions, and the sysctl system for grsecurity. It is + advised that you read through the help for each option to determine its + usefulness in your situation. + +Support soft mode +CONFIG_GRKERNSEC_PAX_SOFTMODE + Enabling this option will allow you to run PaX in soft mode, that + is, PaX features will not be enforced by default, only on executables + marked explicitly. You must also enable PT_PAX_FLAGS support as it + is the only way to mark executables for soft mode use. + + Soft mode can be activated by using the "pax_softmode=1" kernel command + line option on boot. Furthermore you can control various PaX features + at runtime via the entries in /proc/sys/kernel/pax. + +Use legacy ELF header marking +CONFIG_GRKERNSEC_PAX_EI_PAX + Enabling this option will allow you to control PaX features on + a per executable basis via the 'chpax' utility available at + http://pax.grsecurity.net/. The control flags will be read from + an otherwise reserved part of the ELF header. This marking has + numerous drawbacks (no support for soft-mode, toolchain does not + know about the non-standard use of the ELF header) therefore it + has been deprecated in favour of PT_PAX_FLAGS support. + + If you have applications not marked by the PT_PAX_FLAGS ELF + program header then you MUST enable this option otherwise they + will not get any protection. + + Note that if you enable PT_PAX_FLAGS marking support as well, + the PT_PAX_FLAG marks will override the legacy EI_PAX marks. + +Use ELF program header marking +CONFIG_GRKERNSEC_PAX_PT_PAX_FLAGS + Enabling this option will allow you to control PaX features on + a per executable basis via the 'paxctl' utility available at + http://pax.grsecurity.net/. The control flags will be read from + a PaX specific ELF program header (PT_PAX_FLAGS). This marking + has the benefits of supporting both soft mode and being fully + integrated into the toolchain (the binutils patch is available + from http://pax.grsecurity.net). + + If you have applications not marked by the PT_PAX_FLAGS ELF + program header then you MUST enable the EI_PAX marking support + otherwise they will not get any protection. + + Note that if you enable the legacy EI_PAX marking support as well, + the EI_PAX marks will be overridden by the PT_PAX_FLAGS marks. + +MAC system integration +CONFIG_GRKERNSEC_PAX_NO_ACL_FLAGS + Mandatory Access Control systems have the option of controlling + PaX flags on a per executable basis, choose the method supported + by your particular system. + + - "none": if your MAC system does not interact with PaX, + - "direct": if your MAC system defines pax_set_flags() itself, + - "hook": if your MAC system uses the pax_set_flags_func callback. + + NOTE: this option is for developers/integrators only. + +Enforce non-executable pages +CONFIG_GRKERNSEC_PAX_NOEXEC + By design some architectures do not allow for protecting memory + pages against execution or even if they do, Linux does not make + use of this feature. In practice this means that if a page is + readable (such as the stack or heap) it is also executable. + + There is a well known exploit technique that makes use of this + fact and a common programming mistake where an attacker can + introduce code of his choice somewhere in the attacked program's + memory (typically the stack or the heap) and then execute it. + + If the attacked program was running with different (typically + higher) privileges than that of the attacker, then he can elevate + his own privilege level (e.g. get a root shell, write to files for + which he does not have write access to, etc). + + Enabling this option will let you choose from various features + that prevent the injection and execution of 'foreign' code in + a program. + + This will also break programs that rely on the old behaviour and + expect that dynamically allocated memory via the malloc() family + of functions is executable (which it is not). Notable examples + are the XFree86 4.x server, the java runtime and wine. + +Paging based non-executable pages +CONFIG_GRKERNSEC_PAX_PAGEEXEC + This implementation is based on the paging feature of the CPU. + On i386 it has a variable performance impact on applications + depending on their memory usage pattern. You should carefully + test your applications before using this feature in production. + On alpha, parisc, sparc and sparc64 there is no performance + impact. On ppc there is a slight performance impact. + +Segmentation based non-executable pages +CONFIG_GRKERNSEC_PAX_SEGMEXEC + This implementation is based on the segmentation feature of the + CPU and has little performance impact, however applications will + be limited to a 1.5 GB address space instead of the normal 3 GB. + +Emulate trampolines +CONFIG_GRKERNSEC_PAX_EMUTRAMP + There are some programs and libraries that for one reason or + another attempt to execute special small code snippets from + non-executable memory pages. Most notable examples are the + signal handler return code generated by the kernel itself and + the GCC trampolines. + + If you enabled CONFIG_GRKERNSEC_PAX_PAGEEXEC or + CONFIG_GRKERNSEC_PAX_SEGMEXEC then such programs will no longer + work under your kernel. + + As a remedy you can say Y here and use the 'chpax' or 'paxctl' + utilities to enable trampoline emulation for the affected programs + yet still have the protection provided by the non-executable pages. + + On parisc and ppc you MUST enable this option and EMUSIGRT as + well, otherwise your system will not even boot. + + Alternatively you can say N here and use the 'chpax' or 'paxctl' + utilities to disable CONFIG_GRKERNSEC_PAX_PAGEEXEC and + CONFIG_GRKERNSEC_PAX_SEGMEXEC for the affected files. + + NOTE: enabling this feature *may* open up a loophole in the + protection provided by non-executable pages that an attacker + could abuse. Therefore the best solution is to not have any + files on your system that would require this option. This can + be achieved by not using libc5 (which relies on the kernel + signal handler return code) and not using or rewriting programs + that make use of the nested function implementation of GCC. + Skilled users can just fix GCC itself so that it implements + nested function calls in a way that does not interfere with PaX. + +Automatically emulate sigreturn trampolines +CONFIG_GRKERNSEC_PAX_EMUSIGRT + Enabling this option will have the kernel automatically detect + and emulate signal return trampolines executing on the stack + that would otherwise lead to task termination. + + This solution is intended as a temporary one for users with + legacy versions of libc (libc5, glibc 2.0, uClibc before 0.9.17, + Modula-3 runtime, etc) or executables linked to such, basically + everything that does not specify its own SA_RESTORER function in + normal executable memory like glibc 2.1+ does. + + On parisc and ppc you MUST enable this option, otherwise your + system will not even boot. + + NOTE: this feature cannot be disabled on a per executable basis + and since it *does* open up a loophole in the protection provided + by non-executable pages, the best solution is to not have any + files on your system that would require this option. + +Restrict mprotect() +CONFIG_GRKERNSEC_PAX_MPROTECT + Enabling this option will prevent programs from + - changing the executable status of memory pages that were + not originally created as executable, + - making read-only executable pages writable again, + - creating executable pages from anonymous memory. + + You should say Y here to complete the protection provided by + the enforcement of non-executable pages. + + NOTE: you can use the 'chpax' utility to control this + feature on a per file basis. chpax is available at + + +Disallow ELF text relocations +CONFIG_GRKERNSEC_PAX_NOELFRELOCS + Non-executable pages and mprotect() restrictions are effective + in preventing the introduction of new executable code into an + attacked task's address space. There remain only two venues + for this kind of attack: if the attacker can execute already + existing code in the attacked task then he can either have it + create and mmap() a file containing his code or have it mmap() + an already existing ELF library that does not have position + independent code in it and use mprotect() on it to make it + writable and copy his code there. While protecting against + the former approach is beyond PaX, the latter can be prevented + by having only PIC ELF libraries on one's system (which do not + need to relocate their code). If you are sure this is your case, + then enable this option otherwise be careful as you may not even + be able to boot or log on your system (for example, some PAM + modules are erroneously compiled as non-PIC by default). + + NOTE: if you are using dynamic ELF executables (as suggested + when using ASLR) then you must have made sure that you linked + your files using the PIC version of crt1 (the et_dyn.zip package + referenced there has already been updated to support this). + +Enforce non-executable kernel pages +CONFIG_GRKERNSEC_PAX_KERNEXEC + This is the kernel land equivalent of PAGEEXEC and MPROTECT, + that is, enabling this option will make it harder to inject + and execute 'foreign' code in kernel memory itself. + +Address Space Layout Randomization +CONFIG_GRKERNSEC_PAX_ASLR + Many if not most exploit techniques rely on the knowledge of + certain addresses in the attacked program. The following options + will allow the kernel to apply a certain amount of randomization + to specific parts of the program thereby forcing an attacker to + guess them in most cases. Any failed guess will most likely crash + the attacked program which allows the kernel to detect such attempts + and react on them. PaX itself provides no reaction mechanisms, + instead it is strongly encouraged that you make use of grsecurity's + built-in crash detection features or develop one yourself. + + By saying Y here you can choose to randomize the following areas: + - top of the task's kernel stack + - top of the task's userland stack + - base address for mmap() requests that do not specify one + (this includes all libraries) + - base address of the main executable + + It is strongly recommended to say Y here as address space layout + randomization has negligible impact on performance yet it provides + a very effective protection. + + NOTE: you can use the 'chpax' or 'paxctl' utilities to control most + of these features on a per file basis. + +Randomize kernel stack base +CONFIG_GRKERNSEC_PAX_RANDKSTACK + By saying Y here the kernel will randomize every task's kernel + stack on every system call. This will not only force an attacker + to guess it but also prevent him from making use of possible + leaked information about it. + + Since the kernel stack is a rather scarce resource, randomization + may cause unexpected stack overflows, therefore you should very + carefully test your system. Note that once enabled in the kernel + configuration, this feature cannot be disabled on a per file basis. + +Randomize user stack base +CONFIG_GRKERNSEC_PAX_RANDUSTACK + By saying Y here the kernel will randomize every task's userland + stack. The randomization is done in two steps where the second + one may apply a big amount of shift to the top of the stack and + cause problems for programs that want to use lots of memory (more + than 2.5 GB if SEGMEXEC is not active, or 1.25 GB when it is). + For this reason the second step can be controlled by 'chpax' or + 'paxctl' on a per file basis. + +Randomize ET_EXEC base +CONFIG_GRKERNSEC_PAX_RANDEXEC + By saying Y here the kernel will randomize the base address of normal + ET_EXEC ELF executables as well. This is accomplished by mapping the + executable in memory in a special way which also allows for detecting + attackers who attempt to execute its code for their purposes. Since + this special mapping causes performance degradation and the attack + detection may create false alarms as well, you should carefully test + your executables when this feature is enabled. + + This solution is intended only as a temporary one until you relink + your programs as a dynamic ELF file. + + NOTE: you can use the 'chpax' or 'paxctl' utilities to control + this feature on a per file basis. + +Allow ELF ET_EXEC text relocations +CONFIG_GRKERNSEC_PAX_ETEXECRELOCS + On some architectures like the alpha there are incorrectly + created applications that require text relocations and would + not work without enabling this option. If you are an alpha + user, you should enable this option and disable it once you + have made sure that none of your applications need it. + +Automatically emulate ELF PLT +CONFIG_GRKERNSEC_PAX_EMUPLT + Enabling this option will have the kernel automatically detect + and emulate the Procedure Linkage Table entries in ELF files. + On some architectures such entries are in writable memory, and + become non-executable leading to task termination. Therefore + it is mandatory that you enable this option on alpha, parisc, ppc, + sparc and sparc64, otherwise your system would not even boot. + + NOTE: this feature *does* open up a loophole in the protection + provided by the non-executable pages, therefore the proper + solution is to modify the toolchain to produce a PLT that does + not need to be writable. + +Randomize mmap() base +CONFIG_GRKERNSEC_PAX_RANDMMAP + By saying Y here the kernel will use a randomized base address for + mmap() requests that do not specify one themselves. As a result + all dynamically loaded libraries will appear at random addresses + and therefore be harder to exploit by a technique where an attacker + attempts to execute library code for his purposes (e.g. spawn a + shell from an exploited program that is running at an elevated + privilege level). + + Furthermore, if a program is relinked as a dynamic ELF file, its + base address will be randomized as well, completing the full + randomization of the address space layout. Attacking such programs + becomes a guess game. You can find an example of doing this at + and practical samples at + . + + NOTE: you can use the 'chpax' or 'paxctl' utilities to control this + feature on a per file basis. + +Deny writing to /dev/kmem, /dev/mem, and /dev/port +CONFIG_GRKERNSEC_KMEM + If you say Y here, /dev/kmem and /dev/mem won't be allowed to + be written to via mmap or otherwise to modify the running kernel. + /dev/port will also not be allowed to be opened. If you have module + support disabled, enabling this will close up four ways that are + currently used to insert malicious code into the running kernel. + Even with all these features enabled, we still highly recommend that + you use the RBAC system, as it is still possible for an attacker to + modify the running kernel through privileged I/O granted by ioperm/iopl. + If you are not using XFree86, you may be able to stop this additional + case by enabling the 'Disable privileged I/O' option. Though nothing + legitimately writes to /dev/kmem, XFree86 does need to write to /dev/mem, + but only to video memory, which is the only writing we allow in this + case. If /dev/kmem or /dev/mem are mmaped without PROT_WRITE, they will + not be allowed to mprotect it with PROT_WRITE later. + It is highly recommended that you say Y here if you meet all the + conditions above. + +Disable privileged I/O +CONFIG_GRKERNSEC_IO + If you say Y here, all ioperm and iopl calls will return an error. + Ioperm and iopl can be used to modify the running kernel. + Unfortunately, some programs need this access to operate properly, + the most notable of which are XFree86 and hwclock. hwclock can be + remedied by having RTC support in the kernel, so CONFIG_RTC is + enabled if this option is enabled, to ensure that hwclock operates + correctly. XFree86 still will not operate correctly with this option + enabled, so DO NOT CHOOSE Y IF YOU USE XFree86. If you use XFree86 + and you still want to protect your kernel against modification, + use the RBAC system. + +Hide kernel symbols +CONFIG_GRKERNSEC_HIDESYM + If you say Y here, getting information on loaded modules, and + displaying all kernel symbols through a syscall will be restricted + to users with CAP_SYS_MODULE. This option is only effective + provided the following conditions are met: + 1) The kernel using grsecurity is not precompiled by some distribution + 2) You are using the RBAC system and hiding other files such as your + kernel image and System.map + 3) You have the additional /proc restrictions enabled, which removes + /proc/kcore + If the above conditions are met, this option will aid to provide a + useful protection against local and remote kernel exploitation of + overflows and arbitrary read/write vulnerabilities. + +Deter exploit bruteforcing +CONFIG_GRKERNSEC_BRUTE + If you say Y here, attempts to bruteforce exploits against forking + daemons such as apache or sshd will be deterred. When a child of a + forking daemon is killed by PaX or crashes due to an illegal + instruction, the parent process will be delayed 30 seconds upon every + subsequent fork until the administrator is able to assess the + situation and restart the daemon. It is recommended that you also + enable signal logging in the auditing section so that logs are + generated when a process performs an illegal instruction. + +/proc//ipaddr support +CONFIG_GRKERNSEC_PROC_IPADDR + If you say Y here, a new entry will be added to each /proc/ + directory that contains the IP address of the person using the task. + The IP is carried across local TCP and AF_UNIX stream sockets. + This information can be useful for IDS/IPSes to perform remote response + to a local attack. The entry is readable by only the owner of the + process (and root if he has CAP_DAC_OVERRIDE, which can be removed via + the RBAC system), and thus does not create privacy concerns. + +Proc Restrictions +CONFIG_GRKERNSEC_PROC + If you say Y here, the permissions of the /proc filesystem + will be altered to enhance system security and privacy. You MUST + choose either a user only restriction or a user and group restriction. + Depending upon the option you choose, you can either restrict users to + see only the processes they themselves run, or choose a group that can + view all processes and files normally restricted to root if you choose + the "restrict to user only" option. NOTE: If you're running identd as + a non-root user, you will have to run it as the group you specify here. + +Restrict /proc to user only +CONFIG_GRKERNSEC_PROC_USER + If you say Y here, non-root users will only be able to view their own + processes, and restricts them from viewing network-related information, + and viewing kernel symbol and module information. + +Restrict /proc to user and group +CONFIG_GRKERNSEC_PROC_USERGROUP + If you say Y here, you will be able to select a group that will be + able to view all processes, network-related information, and + kernel and symbol information. This option is useful if you want + to run identd as a non-root user. + +Remove addresses from /proc/pid/[maps|stat] +CONFIG_GRKERNSEC_PROC_MEMMAP + If you say Y here, the /proc//maps and /proc//stat files will + give no information about the addresses of its mappings if + PaX features that rely on random addresses are enabled on the task. + If you use PaX it is greatly recommended that you say Y here as it + closes up a hole that makes the full ASLR useless for suid + binaries. + +Additional proc restrictions +CONFIG_GRKERNSEC_PROC_ADD + If you say Y here, additional restrictions will be placed on + /proc that keep normal users from viewing device information and + slabinfo information that could be useful for exploits. + +Dmesg(8) Restriction +CONFIG_GRKERNSEC_DMESG + If you say Y here, non-root users will not be able to use dmesg(8) + to view up to the last 4kb of messages in the kernel's log buffer. + If the sysctl option is enabled, a sysctl option with name "dmesg" is + created. + +Destroy unused shared memory +CONFIG_GRKERNSEC_SHM + If you say Y here, shared memory will be destroyed when no one is + attached to it. Otherwise, resources involved with the shared + memory can be used up and not be associated with any process (as the + shared memory still exists, and the creating process has exited). If + the sysctl option is enabled, a sysctl option with name + "destroy_unused_shm" is created. + +Linking restrictions +CONFIG_GRKERNSEC_LINK + If you say Y here, /tmp race exploits will be prevented, since users + will no longer be able to follow symlinks owned by other users in + world-writable +t directories (i.e. /tmp), unless the owner of the + symlink is the owner of the directory. users will also not be + able to hardlink to files they do not own. If the sysctl option is + enabled, a sysctl option with name "linking_restrictions" is created. + +FIFO restrictions +CONFIG_GRKERNSEC_FIFO + If you say Y here, users will not be able to write to FIFOs they don't + own in world-writable +t directories (i.e. /tmp), unless the owner of + the FIFO is the same owner of the directory it's held in. If the sysctl + option is enabled, a sysctl option with name "fifo_restrictions" is + created. + +Enforce RLIMIT_NPROC on execs +CONFIG_GRKERNSEC_EXECVE + If you say Y here, users with a resource limit on processes will + have the value checked during execve() calls. The current system + only checks the system limit during fork() calls. If the sysctl option + is enabled, a sysctl option with name "execve_limiting" is created. + +Single group for auditing +CONFIG_GRKERNSEC_AUDIT_GROUP + If you say Y here, the exec, chdir, (un)mount, and ipc logging features + will only operate on a group you specify. This option is recommended + if you only want to watch certain users instead of having a large + amount of logs from the entire system. If the sysctl option is enabled, + a sysctl option with name "audit_group" is created. + +GID for auditing +CONFIG_GRKERNSEC_AUDIT_GID + Here you can choose the GID that will be the target of kernel auditing. + Remember to add the users you want to log to the GID specified here. + If the sysctl option is enabled, whatever you choose here won't matter. + You'll have to specify the GID in your bootup script by echoing the GID + to the proper /proc entry. View the help on the sysctl option for more + information. If the sysctl option is enabled, a sysctl option with name + "audit_gid" is created. + +Chdir logging +CONFIG_GRKERNSEC_AUDIT_CHDIR + If you say Y here, all chdir() calls will be logged. If the sysctl + option is enabled, a sysctl option with name "audit_chdir" is created. + +(Un)Mount logging +CONFIG_GRKERNSEC_AUDIT_MOUNT + If you say Y here, all mounts and unmounts will be logged. If the + sysctl option is enabled, a sysctl option with name "audit_mount" is + created. + +IPC logging +CONFIG_GRKERNSEC_AUDIT_IPC + If you say Y here, creation and removal of message queues, semaphores, + and shared memory will be logged. If the sysctl option is enabled, a + sysctl option with name "audit_ipc" is created. + +Exec logging +CONFIG_GRKERNSEC_EXECLOG + If you say Y here, all execve() calls will be logged (since the + other exec*() calls are frontends to execve(), all execution + will be logged). Useful for shell-servers that like to keep track + of their users. If the sysctl option is enabled, a sysctl option with + name "exec_logging" is created. + WARNING: This option when enabled will produce a LOT of logs, especially + on an active system. + +Resource logging +CONFIG_GRKERNSEC_RESLOG + If you say Y here, all attempts to overstep resource limits will + be logged with the resource name, the requested size, and the current + limit. It is highly recommended that you say Y here. + +Signal logging +CONFIG_GRKERNSEC_SIGNAL + If you say Y here, certain important signals will be logged, such as + SIGSEGV, which will as a result inform you of when a error in a program + occurred, which in some cases could mean a possible exploit attempt. + If the sysctl option is enabled, a sysctl option with name + "signal_logging" is created. + +Fork failure logging +CONFIG_GRKERNSEC_FORKFAIL + If you say Y here, all failed fork() attempts will be logged. + This could suggest a fork bomb, or someone attempting to overstep + their process limit. If the sysctl option is enabled, a sysctl option + with name "forkfail_logging" is created. + +Time change logging +CONFIG_GRKERNSEC_TIME + If you say Y here, any changes of the system clock will be logged. + If the sysctl option is enabled, a sysctl option with name + "timechange_logging" is created. + +ELF text relocations logging +CONFIG_GRKERNSEC_AUDIT_TEXTREL + If you say Y here, text relocations will be logged with the filename + of the offending library or binary. The purpose of the feature is + to help Linux distribution developers get rid of libraries and + binaries that need text relocations which hinder the future progress + of PaX. Only Linux distribution developers should say Y here, and + never on a production machine, as this option creates an information + leak that could aid an attacker in defeating the randomization of + a single memory region. If the sysctl option is enabled, a sysctl + option with name "audit_textrel" is created. + +Chroot jail restrictions +CONFIG_GRKERNSEC_CHROOT + If you say Y here, you will be able to choose several options that will + make breaking out of a chrooted jail much more difficult. If you + encounter no software incompatibilities with the following options, it + is recommended that you enable each one. + +Deny access to abstract AF_UNIX sockets out of chroot +CONFIG_GRKERNSEC_CHROOT_UNIX + If you say Y here, processes inside a chroot will not be able to + connect to abstract (meaning not belonging to a filesystem) Unix + domain sockets that were bound outside of a chroot. It is recommended + that you say Y here. If the sysctl option is enabled, a sysctl option + with name "chroot_deny_unix" is created. + +Deny shmat() out of chroot +CONFIG_GRKERNSEC_CHROOT_SHMAT + If you say Y here, processes inside a chroot will not be able to attach + to shared memory segments that were created outside of the chroot jail. + It is recommended that you say Y here. If the sysctl option is enabled, + a sysctl option with name "chroot_deny_shmat" is created. + +Protect outside processes +CONFIG_GRKERNSEC_CHROOT_FINDTASK + If you say Y here, processes inside a chroot will not be able to + kill, send signals with fcntl, ptrace, capget, setpgid, getpgid, + getsid, or view any process outside of the chroot. If the sysctl + option is enabled, a sysctl option with name "chroot_findtask" is + created. + +Deny mounts in chroot +CONFIG_GRKERNSEC_CHROOT_MOUNT + If you say Y here, processes inside a chroot will not be able to + mount or remount filesystems. If the sysctl option is enabled, a + sysctl option with name "chroot_deny_mount" is created. + +Deny pivot_root in chroot +CONFIG_GRKERNSEC_CHROOT_PIVOT + If you say Y here, processes inside a chroot will not be able to use + a function called pivot_root() that was introduced in Linux 2.3.41. It + works similar to chroot in that it changes the root filesystem. This + function could be misused in a chrooted process to attempt to break out + of the chroot, and therefore should not be allowed. If the sysctl + option is enabled, a sysctl option with name "chroot_deny_pivot" is + created. + +Deny double-chroots +CONFIG_GRKERNSEC_CHROOT_DOUBLE + If you say Y here, processes inside a chroot will not be able to chroot + again outside of the chroot. This is a widely used method of breaking + out of a chroot jail and should not be allowed. If the sysctl option + is enabled, a sysctl option with name "chroot_deny_chroot" is created. + +Deny fchdir outside of chroot +CONFIG_GRKERNSEC_CHROOT_FCHDIR + If you say Y here, a well-known method of breaking chroots by fchdir'ing + to a file descriptor of the chrooting process that points to a directory + outside the filesystem will be stopped. If the sysctl option + is enabled, a sysctl option with name "chroot_deny_fchdir" is created. + +Enforce chdir("/") on all chroots +CONFIG_GRKERNSEC_CHROOT_CHDIR + If you say Y here, the current working directory of all newly-chrooted + applications will be set to the the root directory of the chroot. + The man page on chroot(2) states: + Note that this call does not change the current working + directory, so that `.' can be outside the tree rooted at + `/'. In particular, the super-user can escape from a + `chroot jail' by doing `mkdir foo; chroot foo; cd ..'. + + It is recommended that you say Y here, since it's not known to break + any software. If the sysctl option is enabled, a sysctl option with + name "chroot_enforce_chdir" is created. + +Deny (f)chmod +s in chroot +CONFIG_GRKERNSEC_CHROOT_CHMOD + If you say Y here, processes inside a chroot will not be able to chmod + or fchmod files to make them have suid or sgid bits. This protects + against another published method of breaking a chroot. If the sysctl + option is enabled, a sysctl option with name "chroot_deny_chmod" is + created. + +Deny mknod in chroot +CONFIG_GRKERNSEC_CHROOT_MKNOD + If you say Y here, processes inside a chroot will not be allowed to + mknod. The problem with using mknod inside a chroot is that it + would allow an attacker to create a device entry that is the same + as one on the physical root of your system, which could range from + anything from the console device to a device for your harddrive (which + they could then use to wipe the drive or steal data). It is recommended + that you say Y here, unless you run into software incompatibilities. + If the sysctl option is enabled, a sysctl option with name + "chroot_deny_mknod" is created. + +Restrict priority changes in chroot +CONFIG_GRKERNSEC_CHROOT_NICE + If you say Y here, processes inside a chroot will not be able to raise + the priority of processes in the chroot, or alter the priority of + processes outside the chroot. This provides more security than simply + removing CAP_SYS_NICE from the process' capability set. If the + sysctl option is enabled, a sysctl option with name "chroot_restrict_nice" + is created. + +Log all execs within chroot +CONFIG_GRKERNSEC_CHROOT_EXECLOG + If you say Y here, all executions inside a chroot jail will be logged + to syslog. This can cause a large amount of logs if certain + applications (eg. djb's daemontools) are installed on the system, and + is therefore left as an option. If the sysctl option is enabled, a + sysctl option with name "chroot_execlog" is created. + +Deny sysctl writes in chroot +CONFIG_GRKERNSEC_CHROOT_SYSCTL + If you say Y here, an attacker in a chroot will not be able to + write to sysctl entries, either by sysctl(2) or through a /proc + interface. It is strongly recommended that you say Y here. If the + sysctl option is enabled, a sysctl option with name + "chroot_deny_sysctl" is created. + +Chroot jail capability restrictions +CONFIG_GRKERNSEC_CHROOT_CAPS + If you say Y here, the capabilities on all root processes within a + chroot jail will be lowered to stop module insertion, raw i/o, + system and net admin tasks, rebooting the system, modifying immutable + files, modifying IPC owned by another, and changing the system time. + This is left an option because it can break some apps. Disable this + if your chrooted apps are having problems performing those kinds of + tasks. If the sysctl option is enabled, a sysctl option with + name "chroot_caps" is created. + +Trusted path execution +CONFIG_GRKERNSEC_TPE + If you say Y here, you will be able to choose a gid to add to the + supplementary groups of users you want to mark as "untrusted." + These users will not be able to execute any files that are not in + root-owned directories writable only by root. If the sysctl option + is enabled, a sysctl option with name "tpe" is created. + +Group for trusted path execution +CONFIG_GRKERNSEC_TPE_GID + Here you can choose the GID to enable trusted path protection for. + Remember to add the users you want protection enabled for to the GID + specified here. If the sysctl option is enabled, whatever you choose + here won't matter. You'll have to specify the GID in your bootup + script by echoing the GID to the proper /proc entry. View the help + on the sysctl option for more information. If the sysctl option is + enabled, a sysctl option with name "tpe_gid" is created. + +Partially restrict non-root users +CONFIG_GRKERNSEC_TPE_ALL + If you say Y here, All non-root users other than the ones in the + group specified in the main TPE option will only be allowed to + execute files in directories they own that are not group or + world-writable, or in directories owned by root and writable only by + root. If the sysctl option is enabled, a sysctl option with name + "tpe_restrict_all" is created. + +Randomized PIDs +CONFIG_GRKERNSEC_RANDPID + If you say Y here, all PIDs created on the system will be + pseudo-randomly generated. This is extremely effective along + with the /proc restrictions to disallow an attacker from guessing + pids of daemons, etc. PIDs are also used in some cases as part + of a naming system for temporary files, so this option would keep + those filenames from being predicted as well. We also use code + to make sure that PID numbers aren't reused too soon. If the sysctl + option is enabled, a sysctl option with name "rand_pids" is created. + +Larger entropy pools +CONFIG_GRKERNSEC_RANDNET + If you say Y here, the entropy pools used for many features of Linux + and grsecurity will be doubled in size. Since several grsecurity + features use additional randomness, it is recommended that you say Y + here. Saying Y here has a similar effect as modifying + /proc/sys/kernel/random/poolsize. + +Truly random TCP ISN selection +CONFIG_GRKERNSEC_RANDISN + If you say Y here, Linux's default selection of TCP Initial Sequence + Numbers (ISNs) will be replaced with that of OpenBSD. Linux uses + an MD4 hash based on the connection plus a time value to create the + ISN, while OpenBSD's selection is random. If the sysctl option is + enabled, a sysctl option with name "rand_isns" is created. + +Randomized IP IDs +CONFIG_GRKERNSEC_RANDID + If you say Y here, all the id field on all outgoing packets + will be randomized. This hinders os fingerprinters and + keeps your machine from being used as a bounce for an untraceable + portscan. Ids are used for fragmented packets, fragments belonging + to the same packet have the same id. By default linux only + increments the id value on each packet sent to an individual host. + We use a port of the OpenBSD random ip id code to achieve the + randomness, while keeping the possibility of id duplicates to + near none. If the sysctl option is enabled, a sysctl option with name + "rand_ip_ids" is created. + +Randomized TCP source ports +CONFIG_GRKERNSEC_RANDSRC + If you say Y here, situations where a source port is generated on the + fly for the TCP protocol (ie. with connect() ) will be altered so that + the source port is generated at random, instead of a simple incrementing + algorithm. If the sysctl option is enabled, a sysctl option with name + "rand_tcp_src_ports" is created. + +Randomized RPC XIDs +CONFIG_GRKERNSEC_RANDRPC + If you say Y here, the method of determining XIDs for RPC requests will + be randomized, instead of using linux's default behavior of simply + incrementing the XID. If you want your RPC connections to be more + secure, say Y here. If the sysctl option is enabled, a sysctl option + with name "rand_rpc" is created. + +Socket restrictions +CONFIG_GRKERNSEC_SOCKET + If you say Y here, you will be able to choose from several options. + If you assign a GID on your system and add it to the supplementary + groups of users you want to restrict socket access to, this patch + will perform up to three things, based on the option(s) you choose. + +Deny all socket access +CONFIG_GRKERNSEC_SOCKET_ALL + If you say Y here, you will be able to choose a GID of whose users will + be unable to connect to other hosts from your machine or run server + applications from your machine. If the sysctl option is enabled, a + sysctl option with name "socket_all" is created. + +Group for disabled socket access +CONFIG_GRKERNSEC_SOCKET_ALL_GID + Here you can choose the GID to disable socket access for. Remember to + add the users you want socket access disabled for to the GID + specified here. If the sysctl option is enabled, whatever you choose + here won't matter. You'll have to specify the GID in your bootup + script by echoing the GID to the proper /proc entry. View the help + on the sysctl option for more information. If the sysctl option is + enabled, a sysctl option with name "socket_all_gid" is created. + +Deny all client socket access +CONFIG_GRKERNSEC_SOCKET_CLIENT + If you say Y here, you will be able to choose a GID of whose users will + be unable to connect to other hosts from your machine, but will be + able to run servers. If this option is enabled, all users in the group + you specify will have to use passive mode when initiating ftp transfers + from the shell on your machine. If the sysctl option is enabled, a + sysctl option with name "socket_client" is created. + +Group for disabled client socket access +CONFIG_GRKERNSEC_SOCKET_CLIENT_GID + Here you can choose the GID to disable client socket access for. + Remember to add the users you want client socket access disabled for to + the GID specified here. If the sysctl option is enabled, whatever you + choose here won't matter. You'll have to specify the GID in your bootup + script by echoing the GID to the proper /proc entry. View the help + on the sysctl option for more information. If the sysctl option is + enabled, a sysctl option with name "socket_client_gid" is created. + +Deny all server socket access +CONFIG_GRKERNSEC_SOCKET_SERVER + If you say Y here, you will be able to choose a GID of whose users will + be unable to run server applications from your machine. If the sysctl + option is enabled, a sysctl option with name "socket_server" is created. + +Group for disabled server socket access +CONFIG_GRKERNSEC_SOCKET_SERVER_GID + Here you can choose the GID to disable server socket access for. + Remember to add the users you want server socket access disabled for to + the GID specified here. If the sysctl option is enabled, whatever you + choose here won't matter. You'll have to specify the GID in your bootup + script by echoing the GID to the proper /proc entry. View the help + on the sysctl option for more information. If the sysctl option is + enabled, a sysctl option with name "socket_server_gid" is created. + +Sysctl support +CONFIG_GRKERNSEC_SYSCTL + If you say Y here, you will be able to change the options that + grsecurity runs with at bootup, without having to recompile your + kernel. You can echo values to files in /proc/sys/kernel/grsecurity + to enable (1) or disable (0) various features. All the sysctl entries + are mutable until the "grsec_lock" entry is set to a non-zero value. + All features enabled in the kernel configuration are disabled at boot + if you do not say Y to the "Turn on features by default" option. + All options should be set at startup, and the grsec_lock entry should + be set to a non-zero value after all the options are set. + *THIS IS EXTREMELY IMPORTANT* + +Turn on features by default +CONFIG_GRKERNSEC_SYSCTL_ON + If you say Y here, instead of having all features enabled in the + kernel configuration disabled at boot time, the features will be + enabled at boot time. It is recommended you say Y here unless + there is some reason you would want all sysctl-tunable features to + be disabled by default. As mentioned elsewhere, it is important + to enable the grsec_lock entry once you have finished modifying + the sysctl entries. + +Number of burst messages +CONFIG_GRKERNSEC_FLOODBURST + This option allows you to choose the maximum number of messages allowed + within the flood time interval you chose in a separate option. The + default should be suitable for most people, however if you find that + many of your logs are being interpreted as flooding, you may want to + raise this value. + +Seconds in between log messages +CONFIG_GRKERNSEC_FLOODTIME + This option allows you to enforce the number of seconds between + grsecurity log messages. The default should be suitable for most + people, however, if you choose to change it, choose a value small enough + to allow informative logs to be produced, but large enough to + prevent flooding. + +Hide kernel processes +CONFIG_GRKERNSEC_ACL_HIDEKERN + If you say Y here, all kernel threads will be hidden to all + processes but those whose subject has the "view hidden processes" + flag. + +Maximum tries before password lockout +CONFIG_GRKERNSEC_ACL_MAXTRIES + This option enforces the maximum number of times a user can attempt + to authorize themselves with the grsecurity RBAC system before being + denied the ability to attempt authorization again for a specified time. + The lower the number, the harder it will be to brute-force a password. + +Time to wait after max password tries, in seconds +CONFIG_GRKERNSEC_ACL_TIMEOUT + This option specifies the time the user must wait after attempting to + authorize to the RBAC system with the maximum number of invalid + passwords. The higher the number, the harder it will be to brute-force + a password. + Disable data cache CONFIG_DCACHE_DISABLE This option allows you to run the kernel with data cache disabled. diff -urNp linux-2.4.29/Makefile linux-2.4.29/Makefile --- linux-2.4.29/Makefile 2005-01-16 23:04:12.627968344 -0500 +++ linux-2.4.29/Makefile 2005-01-16 23:05:46.091759688 -0500 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 4 SUBLEVEL = 29 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc3-grsec KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) @@ -126,9 +126,10 @@ export SVGA_MODE = -DSVGA_MODE=NORMAL_VG CORE_FILES =kernel/kernel.o mm/mm.o fs/fs.o ipc/ipc.o NETWORKS =net/network.o +GRSECURITY =grsecurity/grsec.o LIBS =$(TOPDIR)/lib/lib.a -SUBDIRS =kernel drivers mm fs net ipc lib crypto +SUBDIRS =kernel drivers mm fs net ipc lib crypto grsecurity DRIVERS-n := DRIVERS-y := @@ -272,7 +273,7 @@ export kbuild_2_4_nostdinc export CPPFLAGS CFLAGS CFLAGS_KERNEL AFLAGS AFLAGS_KERNEL -export NETWORKS DRIVERS LIBS HEAD LDFLAGS LINKFLAGS MAKEBOOT ASFLAGS +export NETWORKS DRIVERS LIBS HEAD LDFLAGS LINKFLAGS MAKEBOOT ASFLAGS GRSECURITY .S.s: $(CPP) $(AFLAGS) $(AFLAGS_KERNEL) -traditional -o $*.s $< @@ -291,6 +292,7 @@ vmlinux: include/linux/version.h $(CONFI $(CORE_FILES) \ $(DRIVERS) \ $(NETWORKS) \ + $(GRSECURITY) \ $(LIBS) \ --end-group \ -o vmlinux @@ -375,6 +377,11 @@ init/do_mounts.o: init/do_mounts.c inclu fs lib mm ipc kernel drivers net: dummy $(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" $(subst $@, _dir_$@, $@) +cscope: + find include -type d \( -name "asm-*" -o -name config \) -prune -o -name '*.h' -print > cscope.files + find $(SUBDIRS) init include/asm-$(ARCH) include/asm-generic -name '*.[ch]' >> cscope.files + cscope -k -b -q < cscope.files + TAGS: dummy { find include/asm-${ARCH} -name '*.h' -print ; \ find include -type d \( -name "asm-*" -o -name config \) -prune -o -name '*.h' -print ; \ diff -urNp linux-2.4.29/arch/alpha/config.in linux-2.4.29/arch/alpha/config.in --- linux-2.4.29/arch/alpha/config.in 2004-11-17 06:54:21.000000000 -0500 +++ linux-2.4.29/arch/alpha/config.in 2005-01-16 23:05:32.295856984 -0500 @@ -468,3 +468,12 @@ endmenu source crypto/Config.in source lib/Config.in + +mainmenu_option next_comment +comment 'Grsecurity' +bool 'Grsecurity' CONFIG_GRKERNSEC +if [ "$CONFIG_GRKERNSEC" = "y" ]; then + source grsecurity/Config.in +fi +endmenu + diff -urNp linux-2.4.29/arch/alpha/kernel/osf_sys.c linux-2.4.29/arch/alpha/kernel/osf_sys.c --- linux-2.4.29/arch/alpha/kernel/osf_sys.c 2003-06-13 10:51:29.000000000 -0400 +++ linux-2.4.29/arch/alpha/kernel/osf_sys.c 2005-01-16 23:05:32.297856680 -0500 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -230,6 +231,11 @@ asmlinkage unsigned long osf_mmap(unsign struct file *file = NULL; unsigned long ret = -EBADF; +#ifdef CONFIG_GRKERNSEC_PAX_RANDEXEC + if (flags & MAP_MIRROR) + return -EINVAL; +#endif + #if 0 if (flags & (_MAP_HASSEMAPHORE | _MAP_INHERIT | _MAP_UNALIGNED)) printk("%s: unimplemented OSF mmap flags %04lx\n", @@ -240,6 +246,13 @@ asmlinkage unsigned long osf_mmap(unsign if (!file) goto out; } + + if(gr_handle_mmap(file, prot)) { + fput(file); + ret = -EACCES; + goto out; + } + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); down_write(¤t->mm->mmap_sem); ret = do_mmap(file, addr, len, prot, flags, off); @@ -1357,6 +1370,10 @@ arch_get_unmapped_area(struct file *filp merely specific addresses, but regions of memory -- perhaps this feature should be incorporated into all ports? */ +#ifdef CONFIG_GRKERNSEC_PAX_RANDMMAP + if (!(current->flags & PF_PAX_RANDMMAP) || !filp) +#endif + if (addr) { addr = arch_get_unmapped_area_1 (PAGE_ALIGN(addr), len, limit); if (addr != -ENOMEM) @@ -1364,8 +1381,15 @@ arch_get_unmapped_area(struct file *filp } /* Next, try allocating at TASK_UNMAPPED_BASE. */ - addr = arch_get_unmapped_area_1 (PAGE_ALIGN(TASK_UNMAPPED_BASE), - len, limit); + + addr = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_GRKERNSEC_PAX_RANDMMAP + if (current->flags & PF_PAX_RANDMMAP) + addr += current->mm->delta_mmap; +#endif + + addr = arch_get_unmapped_area_1 (PAGE_ALIGN(addr), len, limit); if (addr != -ENOMEM) return addr; diff -urNp linux-2.4.29/arch/alpha/kernel/ptrace.c linux-2.4.29/arch/alpha/kernel/ptrace.c --- linux-2.4.29/arch/alpha/kernel/ptrace.c 2003-06-13 10:51:29.000000000 -0400 +++ linux-2.4.29/arch/alpha/kernel/ptrace.c 2005-01-16 23:05:32.298856528 -0500 @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -275,6 +276,10 @@ sys_ptrace(long request, long pid, long read_unlock(&tasklist_lock); if (!child) goto out_notsk; + + if(gr_handle_ptrace(child, request)) + goto out; + if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out; diff -urNp linux-2.4.29/arch/alpha/mm/fault.c linux-2.4.29/arch/alpha/mm/fault.c --- linux-2.4.29/arch/alpha/mm/fault.c 2002-11-28 18:53:08.000000000 -0500 +++ linux-2.4.29/arch/alpha/mm/fault.c 2005-01-16 23:05:32.299856376 -0500 @@ -53,6 +53,139 @@ __load_new_mm_context(struct mm_struct * __reload_thread(¤t->thread); } +/* + * PaX: decide what to do with offenders (regs->pc = fault address) + * + * returns 1 when task should be killed + * 2 when patched PLT trampoline was detected + * 3 when unpatched PLT trampoline was detected + * 4 when legitimate ET_EXEC was detected + */ +#ifdef CONFIG_GRKERNSEC_PAX_PAGEEXEC +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + int err; + +#ifdef CONFIG_GRKERNSEC_PAX_RANDEXEC + if (current->flags & PF_PAX_RANDEXEC) { + if (regs->pc >= current->mm->start_code && + regs->pc < current->mm->end_code) + { + if (regs->r26 == regs->pc) + return 1; + regs->pc += current->mm->delta_exec; + return 4; + } + } +#endif + +#ifdef CONFIG_GRKERNSEC_PAX_EMUPLT + do { /* PaX: patched PLT emulation #1 */ + unsigned int ldah, ldq, jmp; + + err = get_user(ldah, (unsigned int *)regs->pc); + err |= get_user(ldq, (unsigned int *)(regs->pc+4)); + err |= get_user(jmp, (unsigned int *)(regs->pc+8)); + + if (err) + break; + + if ((ldah & 0xFFFF0000U)== 0x277B0000U && + (ldq & 0xFFFF0000U) == 0xA77B0000U && + jmp == 0x6BFB0000U) + { + unsigned long r27, addr; + unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16; + unsigned long addrl = ldq | 0xFFFFFFFFFFFF0000UL; + + addr = regs->r27 + ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL); + err = get_user(r27, (unsigned long*)addr); + if (err) + break; + + regs->r27 = r27; + regs->pc = r27; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #2 */ + unsigned int ldah, lda, br; + + err = get_user(ldah, (unsigned int *)regs->pc); + err |= get_user(lda, (unsigned int *)(regs->pc+4)); + err |= get_user(br, (unsigned int *)(regs->pc+8)); + + if (err) + break; + + if ((ldah & 0xFFFF0000U)== 0x277B0000U && + (lda & 0xFFFF0000U) == 0xA77B0000U && + (br & 0xFFE00000U) == 0xC3E00000U) + { + unsigned long addr = br | 0xFFFFFFFFFFE00000UL; + unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16; + unsigned long addrl = lda | 0xFFFFFFFFFFFF0000UL; + + regs->r27 += ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL); + regs->pc += 12 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2); + return 2; + } + } while (0); + + do { /* PaX: unpatched PLT emulation */ + unsigned int br; + + err = get_user(br, (unsigned int *)regs->pc); + + if (!err && (br & 0xFFE00000U) == 0xC3800000U) { + unsigned int br2, ldq, nop, jmp; + unsigned long addr = br | 0xFFFFFFFFFFE00000UL, resolver; + + addr = regs->pc + 4 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2); + err = get_user(br2, (unsigned int *)addr); + err |= get_user(ldq, (unsigned int *)(addr+4)); + err |= get_user(nop, (unsigned int *)(addr+8)); + err |= get_user(jmp, (unsigned int *)(addr+12)); + err |= get_user(resolver, (unsigned long *)(addr+16)); + + if (err) + break; + + if (br2 == 0xC3600000U && + ldq == 0xA77B000CU && + nop == 0x47FF041FU && + jmp == 0x6B7B0000U) + { + regs->r28 = regs->pc+4; + regs->r27 = addr+16; + regs->pc = resolver; + return 3; + } + } + } while (0); +#endif + + return 1; +} + +void pax_report_insns(void *pc) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) { + printk("."); + break; + } + printk("%08x ", c); + } + printk("\n"); +} +#endif + /* * This routine handles page faults. It determines the address, @@ -133,8 +266,32 @@ do_page_fault(unsigned long address, uns good_area: info.si_code = SEGV_ACCERR; if (cause < 0) { - if (!(vma->vm_flags & VM_EXEC)) + if (!(vma->vm_flags & VM_EXEC)) { + +#ifdef CONFIG_GRKERNSEC_PAX_PAGEEXEC + if (!(current->flags & PF_PAX_PAGEEXEC) || address != regs->pc) + goto bad_area; + + up_read(&mm->mmap_sem); + switch(pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_GRKERNSEC_PAX_EMUPLT + case 2: + case 3: + return; +#endif + +#ifdef CONFIG_GRKERNSEC_PAX_RANDEXEC + case 4: + return; +#endif + } + pax_report_fault(regs, (void*)regs->pc, (void*)rdusp()); + do_exit(SIGKILL); +#else goto bad_area; +#endif + } } else if (!cause) { /* Allow reads even for write-only mappings */ if (!(vma->vm_flags & (VM_READ | VM_WRITE))) diff -urNp linux-2.4.29/arch/arm/config.in linux-2.4.29/arch/arm/config.in --- linux-2.4.29/arch/arm/config.in 2004-11-17 06:54:21.000000000 -0500 +++ linux-2.4.29/arch/arm/config.in 2005-01-16 23:05:32.300856224 -0500 @@ -736,3 +736,11 @@ endmenu source crypto/Config.in source lib/Config.in + +mainmenu_option next_comment +comment 'Grsecurity' +bool 'Grsecurity' CONFIG_GRKERNSEC +if [ "$CONFIG_GRKERNSEC" = "y" ]; then + source grsecurity/Config.in +fi +endmenu diff -urNp linux-2.4.29/arch/cris/config.in linux-2.4.29/arch/cris/config.in --- linux-2.4.29/arch/cris/config.in 2004-11-17 06:54:21.000000000 -0500 +++ linux-2.4.29/arch/cris/config.in 2005-01-16 23:05:32.301856072 -0500 @@ -276,3 +276,12 @@ int 'Kernel messages buffer length shift source crypto/Config.in source lib/Config.in endmenu + +mainmenu_option next_comment +comment 'Grsecurity' +bool 'Grsecurity' CONFIG_GRKERNSEC +if [ "$CONFIG_GRKERNSEC" = "y" ]; then + source grsecurity/Config.in +fi +endmenu + diff -urNp linux-2.4.29/arch/i386/Makefile linux-2.4.29/arch/i386/Makefile --- linux-2.4.29/arch/i386/Makefile 2004-11-17 06:54:21.000000000 -0500 +++ linux-2.4.29/arch/i386/Makefile 2005-01-16 23:05:32.302855920 -0500 @@ -118,6 +118,9 @@ arch/i386/mm: dummy MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot +arch/i386/vmlinux.lds: arch/i386/vmlinux.lds.S FORCE + $(CPP) -C -P -I$(HPATH) -D__KERNEL__ -imacros $(HPATH)/linux/config.h -imacros $(HPATH)/asm-i386/segment.h -imacros $(HPATH)/asm-i386/page.h -Ui386 arch/i386/vmlinux.lds.S >arch/i386/vmlinux.lds + vmlinux: arch/i386/vmlinux.lds FORCE: ; @@ -154,6 +157,7 @@ archclean: @$(MAKEBOOT) clean archmrproper: + rm -f arch/i386/vmlinux.lds archdep: @$(MAKEBOOT) dep diff -urNp linux-2.4.29/arch/i386/boot/bootsect.S linux-2.4.29/arch/i386/boot/bootsect.S --- linux-2.4.29/arch/i386/boot/bootsect.S 2003-08-25 07:44:39.000000000 -0400 +++ linux-2.4.29/arch/i386/boot/bootsect.S 2005-01-16 23:05:32.303855768 -0500 @@ -237,7 +237,7 @@ rp_read: #ifdef __BIG_KERNEL__ # look in setup.S for bootsect_kludge bootsect_kludge = 0x220 # 0x200 + 0x20 which is the size of the - lcall bootsect_kludge # bootsector + bootsect_kludge offset + lcall *bootsect_kludge # bootsector + bootsect_kludge offset #else movw %es, %ax subw $SYSSEG, %ax diff -urNp linux-2.4.29/arch/i386/boot/setup.S linux-2.4.29/arch/i386/boot/setup.S --- linux-2.4.29/arch/i386/boot/setup.S 2004-02-18 08:36:30.000000000 -0500 +++ linux-2.4.29/arch/i386/boot/setup.S 2005-01-16 23:05:32.304855616 -0500 @@ -637,7 +637,7 @@ edd_done: cmpw $0, %cs:realmode_swtch jz rmodeswtch_normal - lcall %cs:realmode_swtch + lcall *%cs:realmode_swtch jmp rmodeswtch_end diff -urNp linux-2.4.29/arch/i386/config.in linux-2.4.29/arch/i386/config.in --- linux-2.4.29/arch/i386/config.in 2004-11-17 06:54:21.000000000 -0500 +++ linux-2.4.29/arch/i386/config.in 2005-01-16 23:05:32.306855312 -0500 @@ -99,6 +99,7 @@ if [ "$CONFIG_M586MMX" = "y" ]; then fi if [ "$CONFIG_M686" = "y" ]; then define_int CONFIG_X86_L1_CACHE_SHIFT 5 + define_bool CONFIG_X86_ALIGNMENT_16 y define_bool CONFIG_X86_HAS_TSC y define_bool CONFIG_X86_GOOD_APIC y bool 'PGE extensions (not for Cyrix/Transmeta)' CONFIG_X86_PGE @@ -108,6 +109,7 @@ if [ "$CONFIG_M686" = "y" ]; then fi if [ "$CONFIG_MPENTIUMIII" = "y" ]; then define_int CONFIG_X86_L1_CACHE_SHIFT 5 + define_bool CONFIG_X86_ALIGNMENT_16 y define_bool CONFIG_X86_HAS_TSC y define_bool CONFIG_X86_GOOD_APIC y define_bool CONFIG_X86_PGE y @@ -116,6 +118,7 @@ if [ "$CONFIG_MPENTIUMIII" = "y" ]; then fi if [ "$CONFIG_MPENTIUM4" = "y" ]; then define_int CONFIG_X86_L1_CACHE_SHIFT 7 + define_bool CONFIG_X86_ALIGNMENT_16 y define_bool CONFIG_X86_HAS_TSC y define_bool CONFIG_X86_GOOD_APIC y define_bool CONFIG_X86_PGE y @@ -135,6 +138,7 @@ if [ "$CONFIG_MK8" = "y" ]; then fi if [ "$CONFIG_MK7" = "y" ]; then define_int CONFIG_X86_L1_CACHE_SHIFT 6 + define_bool CONFIG_X86_ALIGNMENT_16 y define_bool CONFIG_X86_HAS_TSC y define_bool CONFIG_X86_GOOD_APIC y define_bool CONFIG_X86_USE_3DNOW y @@ -487,3 +491,11 @@ endmenu source crypto/Config.in source lib/Config.in + +mainmenu_option next_comment +comment 'Grsecurity' +bool 'Grsecurity' CONFIG_GRKERNSEC +if [ "$CONFIG_GRKERNSEC" = "y" ]; then + source grsecurity/Config.in +fi +endmenu diff -urNp linux-2.4.29/arch/i386/kernel/apm.c linux-2.4.29/arch/i386/kernel/apm.c --- linux-2.4.29/arch/i386/kernel/apm.c 2003-08-25 07:44:39.000000000 -0400 +++ linux-2.4.29/arch/i386/kernel/apm.c 2005-01-16 23:05:32.308855008 -0500 @@ -614,7 +614,7 @@ static u8 apm_bios_call(u32 func, u32 eb __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" - "lcall %%cs:" SYMBOL_NAME_STR(apm_bios_entry) "\n\t" + "lcall *%%ss:" SYMBOL_NAME_STR(apm_bios_entry) "\n\t" "setc %%al\n\t" "popl %%ebp\n\t" "popl %%edi\n\t" @@ -666,7 +666,7 @@ static u8 apm_bios_call_simple(u32 func, __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" - "lcall %%cs:" SYMBOL_NAME_STR(apm_bios_entry) "\n\t" + "lcall *%%ss:" SYMBOL_NAME_STR(apm_bios_entry) "\n\t" "setc %%bl\n\t" "popl %%ebp\n\t" "popl %%edi\n\t" @@ -1985,6 +1985,12 @@ static int __init apm_init(void) __va((unsigned long)0x40 << 4)); _set_limit((char *)&gdt[APM_40 >> 3], 4095 - (0x40 << 4)); +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + set_base(gdt2[APM_40 >> 3], + __va((unsigned long)0x40 << 4)); + _set_limit((char *)&gdt2[APM_40 >> 3], 4095 - (0x40 << 4)); +#endif + apm_bios_entry.offset = apm_info.bios.offset; apm_bios_entry.segment = APM_CS; set_base(gdt[APM_CS >> 3], @@ -1993,6 +1999,16 @@ static int __init apm_init(void) __va((unsigned long)apm_info.bios.cseg_16 << 4)); set_base(gdt[APM_DS >> 3], __va((unsigned long)apm_info.bios.dseg << 4)); + +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + set_base(gdt2[APM_CS >> 3], + __va((unsigned long)apm_info.bios.cseg << 4)); + set_base(gdt2[APM_CS_16 >> 3], + __va((unsigned long)apm_info.bios.cseg_16 << 4)); + set_base(gdt2[APM_DS >> 3], + __va((unsigned long)apm_info.bios.dseg << 4)); +#endif + #ifndef APM_RELAX_SEGMENTS if (apm_info.bios.version == 0x100) { #endif @@ -2002,6 +2018,13 @@ static int __init apm_init(void) _set_limit((char *)&gdt[APM_CS_16 >> 3], 64 * 1024 - 1); /* For the DEC Hinote Ultra CT475 (and others?) */ _set_limit((char *)&gdt[APM_DS >> 3], 64 * 1024 - 1); + +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + _set_limit((char *)&gdt2[APM_CS >> 3], 64 * 1024 - 1); + _set_limit((char *)&gdt2[APM_CS_16 >> 3], 64 * 1024 - 1); + _set_limit((char *)&gdt2[APM_DS >> 3], 64 * 1024 - 1); +#endif + #ifndef APM_RELAX_SEGMENTS } else { _set_limit((char *)&gdt[APM_CS >> 3], @@ -2010,6 +2033,16 @@ static int __init apm_init(void) (apm_info.bios.cseg_16_len - 1) & 0xffff); _set_limit((char *)&gdt[APM_DS >> 3], (apm_info.bios.dseg_len - 1) & 0xffff); + +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + _set_limit((char *)&gdt2[APM_CS >> 3], + (apm_info.bios.cseg_len - 1) & 0xffff); + _set_limit((char *)&gdt2[APM_CS_16 >> 3], + (apm_info.bios.cseg_16_len - 1) & 0xffff); + _set_limit((char *)&gdt2[APM_DS >> 3], + (apm_info.bios.dseg_len - 1) & 0xffff); +#endif + } #endif diff -urNp linux-2.4.29/arch/i386/kernel/entry.S linux-2.4.29/arch/i386/kernel/entry.S --- linux-2.4.29/arch/i386/kernel/entry.S 2003-06-13 10:51:29.000000000 -0400 +++ linux-2.4.29/arch/i386/kernel/entry.S 2005-01-16 23:05:32.310854704 -0500 @@ -209,6 +209,17 @@ ENTRY(system_call) jae badsys call *SYMBOL_NAME(sys_call_table)(,%eax,4) movl %eax,EAX(%esp) # save the return value + +#ifdef CONFIG_GRKERNSEC_PAX_RANDKSTACK + cli # need_resched and signals atomic test + cmpl $0,need_resched(%ebx) + jne reschedule + cmpl $0,sigpending(%ebx) + jne signal_return + call SYMBOL_NAME(pax_randomize_kstack) + jmp restore_all +#endif + ENTRY(ret_from_sys_call) cli # need_resched and signals atomic test cmpl $0,need_resched(%ebx) @@ -389,8 +400,56 @@ ENTRY(alignment_check) jmp error_code ENTRY(page_fault) +#ifdef CONFIG_GRKERNSEC_PAX_PAGEEXEC + ALIGN + pushl $ SYMBOL_NAME(pax_do_page_fault) +#else pushl $ SYMBOL_NAME(do_page_fault) +#endif + +#ifndef CONFIG_GRKERNSEC_PAX_EMUTRAMP jmp error_code +#else + pushl %ds + pushl %eax + xorl %eax,%eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + decl %eax # eax = -1 + pushl %ecx + pushl %ebx + cld + movl %es,%ecx + movl ORIG_EAX(%esp), %esi # get the error code + movl ES(%esp), %edi # get the function address + movl %eax, ORIG_EAX(%esp) + movl %ecx, ES(%esp) + movl %esp,%edx + pushl %esi # push the error code + pushl %edx # push the pt_regs pointer + movl $(__KERNEL_DS),%edx + movl %edx,%ds + movl %edx,%es + GET_CURRENT(%ebx) + call *%edi + addl $8,%esp + decl %eax + jnz ret_from_exception + + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %eax + popl %ds + popl %es + addl $4,%esp + jmp system_call +#endif ENTRY(machine_check) pushl $0 @@ -402,7 +461,7 @@ ENTRY(spurious_interrupt_bug) pushl $ SYMBOL_NAME(do_spurious_interrupt_bug) jmp error_code -.data +.section .rodata, "a",@progbits ENTRY(sys_call_table) .long SYMBOL_NAME(sys_ni_syscall) /* 0 - old "setup()" system call*/ .long SYMBOL_NAME(sys_exit) diff -urNp linux-2.4.29/arch/i386/kernel/head.S linux-2.4.29/arch/i386/kernel/head.S --- linux-2.4.29/arch/i386/kernel/head.S 2003-11-28 13:26:19.000000000 -0500 +++ linux-2.4.29/arch/i386/kernel/head.S 2005-01-16 23:05:32.311854552 -0500 @@ -37,10 +37,17 @@ #define X86_VENDOR_ID CPU_PARAMS+36 /* tied to NCAPINTS in cpufeature.h */ /* + * Real beginning of normal "text" segment + */ +ENTRY(stext) +ENTRY(_stext) + +/* * swapper_pg_dir is the main page directory, address 0x00101000 * * On entry, %esi points to the real-mode code as a 32-bit pointer. */ +.global startup_32 startup_32: /* * Set segments to known values @@ -51,9 +58,23 @@ startup_32: movl %eax,%es movl %eax,%fs movl %eax,%gs + #ifdef CONFIG_SMP - orw %bx,%bx - jz 1f + orw %bx,%bx + jnz 1f +#endif + +/* + * Clear BSS first so that there are no surprises... + * No need to cld as DF is already clear from cld above... + */ + xorl %eax,%eax + movl $ SYMBOL_NAME(__bss_start) - __PAGE_OFFSET,%edi + movl $ SYMBOL_NAME(__bss_end) - __PAGE_OFFSET,%ecx + subl %edi,%ecx + rep + stosb +1: /* * New page tables may be in 4Mbyte page mode and may @@ -71,22 +92,28 @@ startup_32: */ #define cr4_bits mmu_cr4_features-__PAGE_OFFSET cmpl $0,cr4_bits - je 3f + je 1f movl %cr4,%eax # Turn on paging options (PSE,PAE,..) orl cr4_bits,%eax movl %eax,%cr4 - jmp 3f 1: + +#ifdef CONFIG_SMP + orw %bx,%bx + jnz 3f #endif + /* * Initialize page tables */ movl $pg0-__PAGE_OFFSET,%edi /* initialize page tables */ - movl $007,%eax /* "007" doesn't mean with right to kill, but - PRESENT+RW+USER */ + movl $0x63,%eax /* "0x63" is PRESENT+RW+ACCESSED+DIRTY */ 2: stosl +#ifdef CONFIG_X86_PAE + addl $4,%edi +#endif add $0x1000,%eax - cmp $empty_zero_page-__PAGE_OFFSET,%edi + cmp $0x01000063,%eax jne 2b /* @@ -100,9 +127,19 @@ startup_32: movl %eax,%cr0 /* ..and set paging (PG) bit */ jmp 1f /* flush the prefetch-queue */ 1: + +#if !defined(CONFIG_GRKERNSEC_PAX_KERNEXEC) || defined(CONFIG_SMP) + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + orw %bx,%bx + jz 1f +#endif + movl $1f,%eax jmp *%eax /* make sure eip is relocated */ 1: +#endif + /* Set up the stack pointer */ lss stack_start,%esp @@ -115,16 +152,21 @@ startup_32: 1: #endif /* CONFIG_SMP */ -/* - * Clear BSS first so that there are no surprises... - * No need to cld as DF is already clear from cld above... - */ - xorl %eax,%eax - movl $ SYMBOL_NAME(__bss_start),%edi - movl $ SYMBOL_NAME(_end),%ecx - subl %edi,%ecx - rep - stosb +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + movl $ __KERNEL_TEXT_OFFSET,%eax + movw %ax,(SYMBOL_NAME(gdt_table) + 18) + rorl $16,%eax + movb %al,(SYMBOL_NAME(gdt_table) + 20) + movb %ah,(SYMBOL_NAME(gdt_table) + 23) + +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + movb %al,(SYMBOL_NAME(gdt_table2) + 20) + movb %ah,(SYMBOL_NAME(gdt_table2) + 23) + rorl $16,%eax + movw %ax,(SYMBOL_NAME(gdt_table2) + 18) +#endif + +#endif /* * start system 32-bit setup. We need to re-do some of the things done @@ -272,8 +314,6 @@ L6: jmp L6 # main should never return here, but # just in case, we know what happens. -ready: .byte 0 - /* * We depend on ET to be correct. This checks for 287/387. */ @@ -319,13 +359,6 @@ rp_sidt: jne rp_sidt ret -ENTRY(stack_start) - .long SYMBOL_NAME(init_task_union)+8192 - .long __KERNEL_DS - -/* This is the default interrupt "handler" :-) */ -int_msg: - .asciz "Unknown interrupt\n" ALIGN ignore_int: cld @@ -347,6 +380,17 @@ ignore_int: popl %eax iret +.section .rodata,"a" +ready: .byte 0 + +ENTRY(stack_start) + .long SYMBOL_NAME(init_task_union)+8192 + .long __KERNEL_DS + +/* This is the default interrupt "handler" :-) */ +int_msg: + .asciz "Unknown interrupt\n" + /* * The interrupt descriptor table has room for 256 idt's, * the global descriptor table is dependent on the number @@ -372,54 +416,146 @@ gdt_descr: SYMBOL_NAME(gdt): .long SYMBOL_NAME(gdt_table) +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC +.globl SYMBOL_NAME(gdt2) + .word 0 +gdt_descr2: + .word GDT_ENTRIES*8-1 +SYMBOL_NAME(gdt2): + .long SYMBOL_NAME(gdt_table2) +#endif + /* - * This is initialized to create an identity-mapping at 0-8M (for bootup - * purposes) and another mapping of the 0-8M area at virtual address + * This is initialized to create an identity-mapping at 0-16M (for bootup + * purposes) and another mapping of the 0-16M area at virtual address * PAGE_OFFSET. */ -.org 0x1000 +.section .data.swapper_pg_dir,"a",@progbits ENTRY(swapper_pg_dir) - .long 0x00102007 - .long 0x00103007 - .fill BOOT_USER_PGD_PTRS-2,4,0 - /* default: 766 entries */ - .long 0x00102007 - .long 0x00103007 - /* default: 254 entries */ - .fill BOOT_KERNEL_PGD_PTRS-2,4,0 +#ifdef CONFIG_X86_PAE + .long swapper_pm_dir-__PAGE_OFFSET+1 + .long 0 + .long swapper_pm_dir+512*8-__PAGE_OFFSET+1 + .long 0 + .long swapper_pm_dir+512*16-__PAGE_OFFSET+1 + .long 0 + .long swapper_pm_dir+512*24-__PAGE_OFFSET+1 + .long 0 +#else + .long pg0-__PAGE_OFFSET+63 + .long pg0+1024*4-__PAGE_OFFSET+63 + .long pg0+1024*8-__PAGE_OFFSET+63 + .long pg0+1024*12-__PAGE_OFFSET+63 + .fill BOOT_USER_PGD_PTRS-4,4,0 + /* default: 764 entries */ + .long pg0-__PAGE_OFFSET+67 + .long pg0+1024*4-__PAGE_OFFSET+63 + .long pg0+1024*8-__PAGE_OFFSET+63 + .long pg0+1024*12-__PAGE_OFFSET+63 + /* default: 252 entries */ + .fill BOOT_KERNEL_PGD_PTRS-4,4,0 +#endif + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC +ENTRY(kernexec_pg_dir) +#ifdef CONFIG_X86_PAE + .long kernexec_pm_dir-__PAGE_OFFSET+1 + .long 0 + .long kernexec_pm_dir+512*8-__PAGE_OFFSET+1 + .long 0 + .long kernexec_pm_dir+512*16-__PAGE_OFFSET+1 + .long 0 + .long kernexec_pm_dir+512*24-__PAGE_OFFSET+1 + .long 0 +#else + .fill 1024,4,0 +#endif +#endif + +#if CONFIG_X86_PAE +.section .data.swapper_pm_dir,"a",@progbits +ENTRY(swapper_pm_dir) + .long pg0-__PAGE_OFFSET+63 + .long 0 + .long pg0+512*8-__PAGE_OFFSET+63 + .long 0 + .long pg0+512*16-__PAGE_OFFSET+63 + .long 0 + .long pg0+512*24-__PAGE_OFFSET+63 + .long 0 + .long pg0+512*32-__PAGE_OFFSET+63 + .long 0 + .long pg0+512*40-__PAGE_OFFSET+63 + .long 0 + .long pg0+512*48-__PAGE_OFFSET+63 + .long 0 + .long pg0+512*56-__PAGE_OFFSET+63 + .long 0 + .fill BOOT_USER_PMD_PTRS-8,8,0 + /* default: 1024+512-4 entries */ + .long pg0-__PAGE_OFFSET+67 + .long 0 + .long pg0+512*8-__PAGE_OFFSET+63 + .long 0 + .long pg0+512*16-__PAGE_OFFSET+63 + .long 0 + .long pg0+512*24-__PAGE_OFFSET+63 + .long 0 + .long pg0+512*32-__PAGE_OFFSET+63 + .long 0 + .long pg0+512*40-__PAGE_OFFSET+63 + .long 0 + .long pg0+512*48-__PAGE_OFFSET+63 + .long 0 + .long pg0+512*56-__PAGE_OFFSET+63 + .long 0 + /* default: 512-4 entries */ + .fill BOOT_KERNEL_PMD_PTRS-8,8,0 + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC +ENTRY(kernexec_pm_dir) + .fill 512,8,0 + .fill 512,8,0 + .fill 512,8,0 + .fill 512,8,0 +#endif +#endif /* - * The page tables are initialized to only 8MB here - the final page + * The page tables are initialized to only 16MB here - the final page * tables are set up later depending on memory size. */ -.org 0x2000 +.section .data.pg0,"a",@progbits ENTRY(pg0) + .fill 1024*4,4,0 -.org 0x3000 -ENTRY(pg1) +#if CONFIG_X86_PAE + .fill 1024*4,4,0 +#endif /* * empty_zero_page must immediately follow the page tables ! (The * initialization loop counts until empty_zero_page) */ - -.org 0x4000 +.section .rodata.empty_zero_page,"a",@progbits ENTRY(empty_zero_page) - -.org 0x5000 + .fill 1024,4,0 /* - * Real beginning of normal "text" segment + * The IDT has to be page-aligned to simplify the Pentium + * F0 0F bug workaround. We have a special link segment + * for this. */ -ENTRY(stext) -ENTRY(_stext) +.section .rodata.idt,"a",@progbits +ENTRY(idt_table) + .fill 256,8,0 /* * This starts the data section. Note that the above is all * in the text section because it has alignment requirements * that we cannot fulfill any other way. */ -.data +.section .rodata,"a",@progbits ALIGN /* @@ -430,19 +566,41 @@ ALIGN */ ENTRY(gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x0000000000000000 /* not used */ - .quad 0x00cf9a000000ffff /* 0x10 kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* 0x18 kernel 4GB data at 0x00000000 */ - .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */ - .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */ + .quad 0x0000000000000000 /* not used */ + .quad 0x00cf9b000000ffff /* 0x10 kernel 4GB code at 0x00000000 */ + .quad 0x00cf93000000ffff /* 0x18 kernel 4GB data at 0x00000000 */ + .quad 0x00cffb000000ffff /* 0x23 user 4GB code at 0x00000000 */ + .quad 0x00cff3000000ffff /* 0x2b user 4GB data at 0x00000000 */ + .quad 0x0000000000000000 /* not used */ + .quad 0x0000000000000000 /* not used */ + /* + * The APM segments have byte granularity and their bases + * and limits are set at run time. + */ + .quad 0x0040930000000000 /* 0x40 APM set up for bad BIOS's */ + .quad 0x00409b0000000000 /* 0x48 APM CS code */ + .quad 0x00009b0000000000 /* 0x50 APM CS 16 code (16 bit) */ + .quad 0x0040930000000000 /* 0x58 APM DS data */ + .fill NR_CPUS*4,8,0 /* space for TSS's and LDT's */ + +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC +ENTRY(gdt_table2) + .quad 0x0000000000000000 /* NULL descriptor */ + .quad 0x0000000000000000 /* not used */ + .quad 0x00cf9b000000ffff /* 0x10 kernel 4GB code at 0x00000000 */ + .quad 0x00cf93000000ffff /* 0x18 kernel 4GB data at 0x00000000 */ + .quad 0x60c5fb000000ffff /* 0x23 user 1.5GB code at 0x60000000 */ + .quad 0x00c5f3000000ffff /* 0x2b user 1.5GB data at 0x00000000 */ + .quad 0x0000000000000000 /* not used */ .quad 0x0000000000000000 /* not used */ /* * The APM segments have byte granularity and their bases * and limits are set at run time. */ - .quad 0x0040920000000000 /* 0x40 APM set up for bad BIOS's */ - .quad 0x00409a0000000000 /* 0x48 APM CS code */ - .quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */ - .quad 0x0040920000000000 /* 0x58 APM DS data */ + .quad 0x0040930000000000 /* 0x40 APM set up for bad BIOS's */ + .quad 0x00409b0000000000 /* 0x48 APM CS code */ + .quad 0x00009b0000000000 /* 0x50 APM CS 16 code (16 bit) */ + .quad 0x0040930000000000 /* 0x58 APM DS data */ .fill NR_CPUS*4,8,0 /* space for TSS's and LDT's */ +#endif diff -urNp linux-2.4.29/arch/i386/kernel/i386_ksyms.c linux-2.4.29/arch/i386/kernel/i386_ksyms.c --- linux-2.4.29/arch/i386/kernel/i386_ksyms.c 2004-04-14 09:05:25.000000000 -0400 +++ linux-2.4.29/arch/i386/kernel/i386_ksyms.c 2005-01-16 23:05:32.312854400 -0500 @@ -74,6 +74,9 @@ EXPORT_SYMBOL(pm_power_off); EXPORT_SYMBOL(get_cmos_time); EXPORT_SYMBOL(apm_info); EXPORT_SYMBOL(gdt); +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC +EXPORT_SYMBOL(gdt2); +#endif EXPORT_SYMBOL(empty_zero_page); #ifdef CONFIG_DEBUG_IOVIRT diff -urNp linux-2.4.29/arch/i386/kernel/init_task.c linux-2.4.29/arch/i386/kernel/init_task.c --- linux-2.4.29/arch/i386/kernel/init_task.c 2001-09-17 18:29:09.000000000 -0400 +++ linux-2.4.29/arch/i386/kernel/init_task.c 2005-01-16 23:05:32.313854248 -0500 @@ -29,5 +29,9 @@ union task_union init_task_union * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ -struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS }; +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC +struct tss_struct init_tss[NR_CPUS] __attribute__((__aligned__(SMP_CACHE_BYTES), __section__(".rodata"))) = { [0 ... NR_CPUS-1] = INIT_TSS }; +#else +struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS }; +#endif diff -urNp linux-2.4.29/arch/i386/kernel/ioport.c linux-2.4.29/arch/i386/kernel/ioport.c --- linux-2.4.29/arch/i386/kernel/ioport.c 2003-06-13 10:51:29.000000000 -0400 +++ linux-2.4.29/arch/i386/kernel/ioport.c 2005-01-16 23:05:32.314854096 -0500 @@ -14,6 +14,8 @@ #include #include #include +#include +#include /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ static void set_bitmap(unsigned long *bitmap, short base, short extent, int new_value) @@ -57,10 +59,22 @@ asmlinkage int sys_ioperm(unsigned long struct thread_struct * t = ¤t->thread; struct tss_struct * tss = init_tss + smp_processor_id(); +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + unsigned long flags, cr3; +#endif + if ((from + num <= from) || (from + num > IO_BITMAP_SIZE*32)) return -EINVAL; +#ifdef CONFIG_GRKERNSEC_IO + if (turn_on) { + gr_handle_ioperm(); +#else if (turn_on && !capable(CAP_SYS_RAWIO)) +#endif return -EPERM; +#ifdef CONFIG_GRKERNSEC_IO + } +#endif /* * If it's the first ioperm() call in this thread's lifetime, set the * IO bitmap up. ioperm() is much less timing critical than clone(), @@ -78,6 +92,11 @@ asmlinkage int sys_ioperm(unsigned long * do it in the per-thread copy and in the TSS ... */ set_bitmap(t->io_bitmap, from, num, !turn_on); + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + pax_open_kernel(flags, cr3); +#endif + if (tss->bitmap == IO_BITMAP_OFFSET) { /* already active? */ set_bitmap(tss->io_bitmap, from, num, !turn_on); } else { @@ -85,6 +104,10 @@ asmlinkage int sys_ioperm(unsigned long tss->bitmap = IO_BITMAP_OFFSET; /* Activate it in the TSS */ } +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + pax_close_kernel(flags, cr3); +#endif + return 0; } @@ -109,8 +132,13 @@ asmlinkage int sys_iopl(unsigned long un return -EINVAL; /* Trying to gain more privileges? */ if (level > old) { +#ifdef CONFIG_GRKERNSEC_IO + gr_handle_iopl(); + return -EPERM; +#else if (!capable(CAP_SYS_RAWIO)) return -EPERM; +#endif } regs->eflags = (regs->eflags & 0xffffcfff) | (level << 12); return 0; diff -urNp linux-2.4.29/arch/i386/kernel/ldt.c linux-2.4.29/arch/i386/kernel/ldt.c --- linux-2.4.29/arch/i386/kernel/ldt.c 2004-02-18 08:36:30.000000000 -0500 +++ linux-2.4.29/arch/i386/kernel/ldt.c 2005-01-16 23:05:32.315853944 -0500 @@ -151,7 +151,7 @@ static int read_default_ldt(void * ptr, { int err; unsigned long size; - void *address; + const void *address; err = 0; address = &default_ldt[0]; @@ -214,6 +214,13 @@ static int write_ldt(void * ptr, unsigne } } +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + if ((current->flags & PF_PAX_SEGMEXEC) && (ldt_info.contents & 2)) { + error = -EINVAL; + goto out_unlock; + } +#endif + entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) | (ldt_info.limit & 0x0ffff); entry_2 = (ldt_info.base_addr & 0xff000000) | @@ -224,7 +231,7 @@ static int write_ldt(void * ptr, unsigne ((ldt_info.seg_not_present ^ 1) << 15) | (ldt_info.seg_32bit << 22) | (ldt_info.limit_in_pages << 23) | - 0x7000; + 0x7100; if (!oldmode) entry_2 |= (ldt_info.useable << 20); diff -urNp linux-2.4.29/arch/i386/kernel/pci-pc.c linux-2.4.29/arch/i386/kernel/pci-pc.c --- linux-2.4.29/arch/i386/kernel/pci-pc.c 2004-11-17 06:54:21.000000000 -0500 +++ linux-2.4.29/arch/i386/kernel/pci-pc.c 2005-01-16 23:05:32.316853792 -0500 @@ -17,6 +17,7 @@ #include #include #include +#include #include "pci-i386.h" @@ -575,7 +576,6 @@ union bios32 { * we'll make pcibios_present() take a memory start parameter and store * the array there. */ - static struct { unsigned long address; unsigned short segment; @@ -1493,6 +1493,7 @@ void __init pcibios_init(void) if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT)) pcibios_sort(); #endif + } char * __devinit pcibios_setup(char *str) diff -urNp linux-2.4.29/arch/i386/kernel/process.c linux-2.4.29/arch/i386/kernel/process.c --- linux-2.4.29/arch/i386/kernel/process.c 2004-11-17 06:54:21.000000000 -0500 +++ linux-2.4.29/arch/i386/kernel/process.c 2005-01-16 23:05:32.318853488 -0500 @@ -209,18 +209,18 @@ __setup("reboot=", reboot_setup); doesn't work with at least one type of 486 motherboard. It is easy to stop this code working; hence the copious comments. */ -static unsigned long long +static const unsigned long long real_mode_gdt_entries [3] = { 0x0000000000000000ULL, /* Null descriptor */ - 0x00009a000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ - 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ + 0x00009b000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ + 0x000093000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ }; static struct { unsigned short size __attribute__ ((packed)); - unsigned long long * base __attribute__ ((packed)); + const unsigned long long * base __attribute__ ((packed)); } real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, real_mode_gdt_entries }, real_mode_idt = { 0x3ff, 0 }, @@ -552,7 +552,7 @@ int copy_thread(int nr, unsigned long cl { struct pt_regs * childregs; - childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1; + childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p - sizeof(unsigned long))) - 1; struct_cpy(childregs, regs); childregs->eax = 0; childregs->esp = esp; @@ -613,6 +613,16 @@ void dump_thread(struct pt_regs * regs, dump->u_fpvalid = dump_fpu (regs, &dump->i387); } +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC +void pax_switch_segments(struct task_struct * tsk) +{ + if (tsk->flags & PF_PAX_SEGMEXEC) + __asm__ __volatile__("lgdt %0": "=m" (gdt_descr2)); + else + __asm__ __volatile__("lgdt %0": "=m" (gdt_descr)); +} +#endif + /* * This special macro can be used to load a debugging register */ @@ -650,12 +660,15 @@ void fastcall __switch_to(struct task_st *next = &next_p->thread; struct tss_struct *tss = init_tss + smp_processor_id(); +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + unsigned long flags, cr3; +#endif + unlazy_fpu(prev_p); - /* - * Reload esp0, LDT and the page table pointer: - */ - tss->esp0 = next->esp0; +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + pax_switch_segments(next_p); +#endif /* * Save away %fs and %gs. No need to save %es and %ds, as @@ -683,6 +696,15 @@ void fastcall __switch_to(struct task_st loaddebug(next, 7); } +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + pax_open_kernel(flags, cr3); +#endif + + /* + * Reload esp0, LDT and the page table pointer: + */ + tss->esp0 = next->esp0; + if (prev->ioperm || next->ioperm) { if (next->ioperm) { /* @@ -705,6 +727,11 @@ void fastcall __switch_to(struct task_st */ tss->bitmap = INVALID_IO_BITMAP_OFFSET; } + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + pax_close_kernel(flags, cr3); +#endif + } asmlinkage int sys_fork(struct pt_regs regs) @@ -792,3 +819,43 @@ unsigned long get_wchan(struct task_stru } #undef last_sched #undef first_sched + +#ifdef CONFIG_GRKERNSEC_PAX_RANDKSTACK +asmlinkage void pax_randomize_kstack(void) +{ + struct tss_struct *tss = init_tss + smp_processor_id(); + unsigned long time; + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + unsigned long flags, cr3; +#endif + +#ifdef CONFIG_GRKERNSEC_PAX_SOFTMODE + if (!pax_aslr) + return; +#endif + + rdtscl(time); + + /* P4 seems to return a 0 LSB, ignore it */ +#ifdef CONFIG_MPENTIUM4 + time &= 0x3EUL; + time <<= 1; +#else + time &= 0x1FUL; + time <<= 2; +#endif + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + pax_open_kernel(flags, cr3); +#endif + + tss->esp0 ^= time; + current->thread.esp0 = tss->esp0; + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + pax_close_kernel(flags, cr3); +#endif + +} +#endif diff -urNp linux-2.4.29/arch/i386/kernel/ptrace.c linux-2.4.29/arch/i386/kernel/ptrace.c --- linux-2.4.29/arch/i386/kernel/ptrace.c 2002-08-02 20:39:42.000000000 -0400 +++ linux-2.4.29/arch/i386/kernel/ptrace.c 2005-01-16 23:05:32.319853336 -0500 @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -177,6 +178,9 @@ asmlinkage int sys_ptrace(long request, if (pid == 1) /* you may not mess with init */ goto out_tsk; + if(gr_handle_ptrace(child, request)) + goto out_tsk; + if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out_tsk; @@ -256,6 +260,17 @@ asmlinkage int sys_ptrace(long request, if(addr < (long) &dummy->u_debugreg[4] && ((unsigned long) data) >= TASK_SIZE-3) break; +#ifdef CONFIG_GRKERNSEC + if(addr >= (long) &dummy->u_debugreg[0] && + addr <= (long) &dummy->u_debugreg[3]){ + long reg = (addr - (long) &dummy->u_debugreg[0]) >> 2; + long type = (child->thread.debugreg[7] >> (DR_CONTROL_SHIFT + 4*reg)) & 3; + long align = (child->thread.debugreg[7] >> (DR_CONTROL_SHIFT + 2 + 4*reg)) & 3; + if((type & 1) && (data & align)) + break; + } +#endif + if(addr == (long) &dummy->u_debugreg[7]) { data &= ~DR_CONTROL_RESERVED; for(i=0; i<4; i++) diff -urNp linux-2.4.29/arch/i386/kernel/setup.c linux-2.4.29/arch/i386/kernel/setup.c --- linux-2.4.29/arch/i386/kernel/setup.c 2004-08-07 19:26:04.000000000 -0400 +++ linux-2.4.29/arch/i386/kernel/setup.c 2005-01-16 23:05:32.323852728 -0500 @@ -129,7 +129,11 @@ char ignore_irq13; /* set if exception 16 works */ struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; +#ifdef CONFIG_X86_PAE +unsigned long mmu_cr4_features = X86_CR4_PAE; +#else unsigned long mmu_cr4_features; +#endif EXPORT_SYMBOL(mmu_cr4_features); /* @@ -170,7 +174,7 @@ unsigned char aux_device_present; extern void mcheck_init(struct cpuinfo_x86 *c); extern void dmi_scan_machine(void); extern int root_mountflags; -extern char _text, _etext, _edata, _end; +extern char _text, _etext, _data, _edata, _end; static int have_cpuid_p(void) __init; @@ -1215,7 +1219,7 @@ void __init setup_arch(char **cmdline_p) code_resource.start = virt_to_bus(&_text); code_resource.end = virt_to_bus(&_etext)-1; - data_resource.start = virt_to_bus(&_etext); + data_resource.start = virt_to_bus(&_data); data_resource.end = virt_to_bus(&_edata)-1; parse_cmdline_early(cmdline_p); @@ -3221,7 +3225,7 @@ void __init cpu_init (void) set_tss_desc(nr,t); gdt_table[__TSS(nr)].b &= 0xfffffdff; load_TR(nr); - load_LDT(&init_mm.context); + _load_LDT(&init_mm.context); /* * Clear all 6 debug registers: @@ -3287,7 +3291,16 @@ int __init ppro_with_ram_bug(void) printk(KERN_INFO "Your Pentium Pro seems ok.\n"); return 0; } - + +#ifdef CONFIG_GRKERNSEC_PAX_SOFTMODE +static int __init setup_pax_softmode(char *str) +{ + get_option (&str, &pax_softmode); + return 1; +} +__setup("pax_softmode=", setup_pax_softmode); +#endif + /* * Local Variables: * mode:c diff -urNp linux-2.4.29/arch/i386/kernel/sys_i386.c linux-2.4.29/arch/i386/kernel/sys_i386.c --- linux-2.4.29/arch/i386/kernel/sys_i386.c 2003-08-25 07:44:39.000000000 -0400 +++ linux-2.4.29/arch/i386/kernel/sys_i386.c 2005-01-16 23:05:32.324852576 -0500 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -48,6 +49,11 @@ static inline long do_mmap2( int error = -EBADF; struct file * file = NULL; +#if defined(CONFIG_GRKERNSEC_PAX_SEGMEXEC) || defined(CONFIG_GRKERNSEC_PAX_RANDEXEC) + if (flags & MAP_MIRROR) + return -EINVAL; +#endif + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); if (!(flags & MAP_ANONYMOUS)) { file = fget(fd); @@ -55,6 +61,12 @@ static inline long do_mmap2( goto out; } + if(gr_handle_mmap(file, prot)) { + fput(file); + error = -EACCES; + goto out; + } + down_write(¤t->mm->mmap_sem); error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); up_write(¤t->mm->mmap_sem); diff -urNp linux-2.4.29/arch/i386/kernel/trampoline.S linux-2.4.29/arch/i386/kernel/trampoline.S --- linux-2.4.29/arch/i386/kernel/trampoline.S 2002-11-28 18:53:09.000000000 -0500 +++ linux-2.4.29/arch/i386/kernel/trampoline.S 2005-01-16 23:05:32.325852424 -0500 @@ -54,7 +54,7 @@ r_base = . lmsw %ax # into protected mode jmp flush_instr flush_instr: - ljmpl $__KERNEL_CS, $0x00100000 + ljmpl $__KERNEL_CS, $SYMBOL_NAME(startup_32)-__PAGE_OFFSET # jump to startup_32 in arch/i386/kernel/head.S idt_48: diff -urNp linux-2.4.29/arch/i386/kernel/traps.c linux-2.4.29/arch/i386/kernel/traps.c --- linux-2.4.29/arch/i386/kernel/traps.c 2002-11-28 18:53:09.000000000 -0500 +++ linux-2.4.29/arch/i386/kernel/traps.c 2005-01-16 23:05:32.326852272 -0500 @@ -54,15 +54,10 @@ asmlinkage int system_call(void); asmlinkage void lcall7(void); asmlinkage void lcall27(void); -struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, +const struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } }; -/* - * The IDT has to be page-aligned to simplify the Pentium - * F0 0F bug workaround.. We have a special link segment - * for this. - */ -struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, }; +extern struct desc_struct idt_table[256]; asmlinkage void divide_error(void); asmlinkage void debug(void); @@ -228,13 +223,13 @@ void show_registers(struct pt_regs *regs show_stack((unsigned long*)esp); printk("\nCode: "); - if(regs->eip < PAGE_OFFSET) + if(regs->eip + __KERNEL_TEXT_OFFSET < PAGE_OFFSET) goto bad; for(i=0;i<20;i++) { unsigned char c; - if(__get_user(c, &((unsigned char*)regs->eip)[i])) { + if(__get_user(c, &((unsigned char*)regs->eip)[i+__KERNEL_TEXT_OFFSET])) { bad: printk(" Bad EIP value."); break; @@ -256,7 +251,7 @@ static void handle_BUG(struct pt_regs *r if (regs->xcs & 3) goto no_bug; /* Not in kernel */ - eip = regs->eip; + eip = regs->eip + __KERNEL_TEXT_OFFSET; if (eip < PAGE_OFFSET) goto no_bug; @@ -422,6 +417,13 @@ gp_in_kernel: regs->eip = fixup; return; } + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + if ((regs->xcs & 0xFFFF) == __KERNEL_CS) + die("PAX: suspicious general protection fault", regs, error_code); + else +#endif + die("general protection fault", regs, error_code); } } @@ -527,13 +529,12 @@ asmlinkage void do_debug(struct pt_regs { unsigned int condition; struct task_struct *tsk = current; - unsigned long eip = regs->eip; siginfo_t info; __asm__ __volatile__("movl %%db6,%0" : "=r" (condition)); /* If the user set TF, it's simplest to clear it right away. */ - if ((eip >=PAGE_OFFSET) && (regs->eflags & TF_MASK)) + if (!(regs->xcs & 3) && (regs->eflags & TF_MASK) && !(regs->eflags & VM_MASK)) goto clear_TF; /* Mask out spurious debug traps due to lazy DR7 setting */ @@ -779,6 +780,8 @@ asmlinkage void math_emulate(long arg) #ifndef CONFIG_X86_F00F_WORKS_OK void __init trap_init_f00f_bug(void) { + +#ifndef CONFIG_GRKERNSEC_PAX_KERNEXEC /* * "idt" is magic - it overlaps the idt_descr * variable so that updating idt will automatically @@ -788,6 +791,8 @@ void __init trap_init_f00f_bug(void) idt = (struct desc_struct *)__fix_to_virt(FIX_F00F); __asm__ __volatile__("lidt %0": "=m" (idt_descr)); +#endif + } #endif @@ -826,7 +831,7 @@ static void __init set_system_gate(unsig _set_gate(idt_table+n,15,3,addr); } -static void __init set_call_gate(void *a, void *addr) +static void __init set_call_gate(const void *a, void *addr) { _set_gate(a,12,3,addr); } @@ -852,14 +857,45 @@ __asm__ __volatile__ ("movw %w3,0(%2)\n\ "rorl $16,%%eax" \ : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type)) -void set_tss_desc(unsigned int n, void *addr) +void set_tss_desc(unsigned int n, const void *addr) { _set_tssldt_desc(gdt_table+__TSS(n), (int)addr, 235, 0x89); + +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + _set_tssldt_desc(gdt_table2+__TSS(n), (int)addr, 235, 0x89); +#endif + +} + +void __set_ldt_desc(unsigned int n, const void *addr, unsigned int size) +{ + _set_tssldt_desc(gdt_table+__LDT(n), (int)addr, ((size << 3)-1), 0x82); + +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + _set_tssldt_desc(gdt_table2+__LDT(n), (int)addr, ((size << 3)-1), 0x82); +#endif + } -void set_ldt_desc(unsigned int n, void *addr, unsigned int size) +void set_ldt_desc(unsigned int n, const void *addr, unsigned int size) { + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + unsigned long flags, cr3; + + pax_open_kernel(flags, cr3); +#endif + _set_tssldt_desc(gdt_table+__LDT(n), (int)addr, ((size << 3)-1), 0x82); + +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + _set_tssldt_desc(gdt_table2+__LDT(n), (int)addr, ((size << 3)-1), 0x82); +#endif + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + pax_close_kernel(flags, cr3); +#endif + } #ifdef CONFIG_X86_VISWS_APIC diff -urNp linux-2.4.29/arch/i386/kernel/vm86.c linux-2.4.29/arch/i386/kernel/vm86.c --- linux-2.4.29/arch/i386/kernel/vm86.c 2004-11-17 06:54:21.000000000 -0500 +++ linux-2.4.29/arch/i386/kernel/vm86.c 2005-01-16 23:05:32.328851968 -0500 @@ -44,6 +44,7 @@ #include #include #include +#include /* * Known problems: @@ -97,6 +98,10 @@ struct pt_regs * fastcall save_v86_state struct pt_regs *ret; unsigned long tmp; +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + unsigned long flags, cr3; +#endif + if (!current->thread.vm86_info) { printk("no vm86_info: BAD\n"); do_exit(SIGSEGV); @@ -111,7 +116,17 @@ struct pt_regs * fastcall save_v86_state do_exit(SIGSEGV); } tss = init_tss + smp_processor_id(); + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + pax_open_kernel(flags, cr3); +#endif + tss->esp0 = current->thread.esp0 = current->thread.saved_esp0; + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + pax_close_kernel(flags, cr3); +#endif + current->thread.saved_esp0 = 0; ret = KVM86->regs32; return ret; @@ -237,6 +252,11 @@ out: static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk) { struct tss_struct *tss; + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + unsigned long flags, cr3; +#endif + /* * make sure the vm86() system call doesn't try to do anything silly */ @@ -278,8 +298,17 @@ static void do_sys_vm86(struct kernel_vm info->regs32->eax = 0; tsk->thread.saved_esp0 = tsk->thread.esp0; tss = init_tss + smp_processor_id(); + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + pax_open_kernel(flags, cr3); +#endif + tss->esp0 = tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + pax_close_kernel(flags, cr3); +#endif + tsk->thread.screen_bitmap = info->screen_bitmap; if (info->flags & VM86_SCREEN_BITMAP) mark_screen_rdonly(tsk); diff -urNp linux-2.4.29/arch/i386/mm/fault.c linux-2.4.29/arch/i386/mm/fault.c --- linux-2.4.29/arch/i386/mm/fault.c 2004-08-07 19:26:04.000000000 -0400 +++ linux-2.4.29/arch/i386/mm/fault.c 2005-01-16 23:05:32.330851664 -0500 @@ -19,6 +19,8 @@ #include #include #include /* For unblank_screen() */ +#include +#include #include #include @@ -127,6 +129,10 @@ void bust_spinlocks(int yes) asmlinkage void do_invalid_op(struct pt_regs *, unsigned long); extern unsigned long idt; +#if defined(CONFIG_GRKERNSEC_PAX_PAGEEXEC) || defined(CONFIG_GRKERNSEC_PAX_EMUTRAMP) || defined(CONFIG_GRKERNSEC_PAX_RANDEXEC) +static int pax_handle_fetch_fault(struct pt_regs *regs); +#endif + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate @@ -137,23 +143,30 @@ extern unsigned long idt; * bit 1 == 0 means read, 1 means write * bit 2 == 0 means kernel, 1 means user-mode */ -asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) +#ifdef CONFIG_GRKERNSEC_PAX_PAGEEXEC +static int do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address) +#else +asmlinkage int do_page_fault(struct pt_regs *regs, unsigned long error_code) +#endif { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct * vma; +#ifndef CONFIG_GRKERNSEC_PAX_PAGEEXEC unsigned long address; - unsigned long page; +#endif unsigned long fixup; int write; siginfo_t info; +#ifndef CONFIG_GRKERNSEC_PAX_PAGEEXEC /* get the address */ __asm__("movl %%cr2,%0":"=r" (address)); /* It's safe to allow irq's after cr2 has been saved */ if (regs->eflags & X86_EFLAGS_IF) local_irq_enable(); +#endif tsk = current; @@ -258,7 +271,7 @@ good_area: tsk->thread.screen_bitmap |= 1 << bit; } up_read(&mm->mmap_sem); - return; + return 0; /* * Something tried to access memory that isn't in our memory map.. @@ -267,6 +280,46 @@ good_area: bad_area: up_read(&mm->mmap_sem); +#if defined(CONFIG_GRKERNSEC_PAX_PAGEEXEC) || defined(CONFIG_GRKERNSEC_PAX_SEGMEXEC) + if ((error_code & 4) && !(regs->eflags & X86_EFLAGS_VM)) { + +#ifdef CONFIG_GRKERNSEC_PAX_PAGEEXEC + if ((tsk->flags & PF_PAX_PAGEEXEC) && !(error_code & 3) && (regs->eip == address)) { + pax_report_fault(regs, (void*)regs->eip, (void*)regs->esp); + do_exit(SIGKILL); + } +#endif + +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + if ((tsk->flags & PF_PAX_SEGMEXEC) && !(error_code & 3) && (regs->eip + SEGMEXEC_TASK_SIZE == address)) { + +#if defined(CONFIG_GRKERNSEC_PAX_EMUTRAMP) || defined(CONFIG_GRKERNSEC_PAX_RANDEXEC) + switch (pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_GRKERNSEC_PAX_RANDEXEC + case 5: + return 0; +#endif + +#ifdef CONFIG_GRKERNSEC_PAX_EMUTRAMP + case 4: + return 0; + case 3: + case 2: + return 1; +#endif + + } +#endif + + pax_report_fault(regs, (void*)regs->eip, (void*)regs->esp); + do_exit(SIGKILL); + } +#endif + + } +#endif + /* User mode accesses just cause a SIGSEGV */ if (error_code & 4) { tsk->thread.cr2 = address; @@ -278,7 +331,7 @@ bad_area: /* info.si_code has been set above */ info.si_addr = (void *)address; force_sig_info(SIGSEGV, &info, tsk); - return; + return 0; } /* @@ -291,7 +344,7 @@ bad_area: if (nr == 6) { do_invalid_op(regs, 0); - return; + return 0; } } @@ -299,7 +352,7 @@ no_context: /* Are we prepared to handle this kernel fault? */ if ((fixup = search_exception_table(regs->eip)) != 0) { regs->eip = fixup; - return; + return 0; } /* @@ -311,19 +364,42 @@ no_context: if (address < PAGE_SIZE) printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + else if (init_mm.start_code + __KERNEL_TEXT_OFFSET <= address && + address < init_mm.end_code + __KERNEL_TEXT_OFFSET) { + if (tsk->curr_ip) + printk(KERN_ERR "PAX: From %u.%u.%u.%u: %s:%d, uid/euid: %u/%u, attempted to modify kernel code", + NIPQUAD(tsk->curr_ip), tsk->comm, tsk->pid, tsk->uid, tsk->euid); + else + printk(KERN_ERR "PAX: %s:%d, uid/euid: %u/%u, attempted to modify kernel code", + tsk->comm, tsk->pid, tsk->uid, tsk->euid); + } +#endif + else printk(KERN_ALERT "Unable to handle kernel paging request"); printk(" at virtual address %08lx\n",address); printk(" printing eip:\n"); printk("%08lx\n", regs->eip); - asm("movl %%cr3,%0":"=r" (page)); - page = ((unsigned long *) __va(page))[address >> 22]; - printk(KERN_ALERT "*pde = %08lx\n", page); - if (page & 1) { - page &= PAGE_MASK; - address &= 0x003ff000; - page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; - printk(KERN_ALERT "*pte = %08lx\n", page); + { + unsigned long index = pgd_index(address); + unsigned long pgd_paddr; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + + asm("movl %%cr3,%0":"=r" (pgd_paddr)); + pgd = index + (pgd_t *)__va(pgd_paddr); + printk(KERN_ALERT "*pgd = %016llx\n", (u64)pgd_val(*pgd)); + if (pgd_present(*pgd)) { + pmd = pmd_offset(pgd, address); + printk(KERN_ALERT "*pmd = %016llx\n", (u64)pmd_val(*pmd)); + if (pmd_present(*pmd) && !(pmd_val(*pmd) & _PAGE_PSE)) { + pte = pte_offset(pmd, address); + printk(KERN_ALERT "*pte = %016llx\n", (u64)pte_val(*pte)); + } + } } die("Oops", regs, error_code); bust_spinlocks(0); @@ -363,7 +439,7 @@ do_sigbus: /* Kernel mode? Handle exceptions or die */ if (!(error_code & 4)) goto no_context; - return; + return 0; vmalloc_fault: { @@ -396,6 +472,455 @@ vmalloc_fault: pte_k = pte_offset(pmd_k, address); if (!pte_present(*pte_k)) goto no_context; - return; + return 0; + } +} +#ifdef CONFIG_GRKERNSEC_PAX_PAGEEXEC +/* PaX: called with the page_table_lock spinlock held */ +static inline pte_t * pax_get_pte(struct mm_struct *mm, unsigned long address) +{ + pgd_t *pgd; + pmd_t *pmd; + + pgd = pgd_offset(mm, address); + if (!pgd || !pgd_present(*pgd)) + return 0; + pmd = pmd_offset(pgd, address); + if (!pmd || !pmd_present(*pmd)) + return 0; + return pte_offset(pmd, address); +} +#endif + +/* + * PaX: decide what to do with offenders (regs->eip = fault address) + * + * returns 1 when task should be killed + * 2 when sigreturn trampoline was detected + * 3 when rt_sigreturn trampoline was detected + * 4 when gcc trampoline was detected + * 5 when legitimate ET_EXEC was detected + */ +#if defined(CONFIG_GRKERNSEC_PAX_PAGEEXEC) || defined(CONFIG_GRKERNSEC_PAX_SEGMEXEC) +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ +#ifdef CONFIG_GRKERNSEC_PAX_EMUTRAMP + static const unsigned char trans[8] = {6, 1, 2, 0, 13, 5, 3, 4}; +#endif + +#if defined(CONFIG_GRKERNSEC_PAX_RANDEXEC) || defined(CONFIG_GRKERNSEC_PAX_EMUTRAMP) + int err; +#endif + + if (regs->eflags & X86_EFLAGS_VM) + return 1; + +#ifdef CONFIG_GRKERNSEC_PAX_RANDEXEC + if (current->flags & PF_PAX_RANDEXEC) { + unsigned long esp_4; + if (regs->eip >= current->mm->start_code && + regs->eip < current->mm->end_code) + { + err = get_user(esp_4, (unsigned long*)(regs->esp-4UL)); + if (err || esp_4 == regs->eip) + return 1; + regs->eip += current->mm->delta_exec; + return 5; + } + } +#endif + +#ifdef CONFIG_GRKERNSEC_PAX_EMUTRAMP + +#ifndef CONFIG_GRKERNSEC_PAX_EMUSIGRT + if (!(current->flags & PF_PAX_EMUTRAMP)) + return 1; +#endif + + do { /* PaX: sigreturn emulation */ + unsigned char pop, mov; + unsigned short sys; + unsigned long nr; + + err = get_user(pop, (unsigned char *)(regs->eip)); + err |= get_user(mov, (unsigned char *)(regs->eip + 1)); + err |= get_user(nr, (unsigned long *)(regs->eip + 2)); + err |= get_user(sys, (unsigned short *)(regs->eip + 6)); + + if (err) + break; + + if (pop == 0x58 && + mov == 0xb8 && + nr == __NR_sigreturn && + sys == 0x80cd) + { + +#ifdef CONFIG_GRKERNSEC_PAX_EMUSIGRT + int sig; + struct k_sigaction *ka; + __sighandler_t handler; + + if (get_user(sig, (int *)regs->esp)) + return 1; + if (sig < 1 || sig > _NSIG || sig == SIGKILL || sig == SIGSTOP) + return 1; + ka = ¤t->sig->action[sig-1]; + handler = ka->sa.sa_handler; + if (handler == SIG_DFL || handler == SIG_IGN) { + if (!(current->flags & PF_PAX_EMUTRAMP)) + return 1; + } else if (ka->sa.sa_flags & SA_SIGINFO) + return 1; +#endif + + regs->esp += 4; + regs->eax = nr; + regs->eip += 8; + return 2; + } + } while (0); + + do { /* PaX: rt_sigreturn emulation */ + unsigned char mov; + unsigned short sys; + unsigned long nr; + + err = get_user(mov, (unsigned char *)(regs->eip)); + err |= get_user(nr, (unsigned long *)(regs->eip + 1)); + err |= get_user(sys, (unsigned short *)(regs->eip + 5)); + + if (err) + break; + + if (mov == 0xb8 && + nr == __NR_rt_sigreturn && + sys == 0x80cd) + { + +#ifdef CONFIG_GRKERNSEC_PAX_EMUSIGRT + int sig; + struct k_sigaction *ka; + __sighandler_t handler; + + if (get_user(sig, (int *)regs->esp)) + return 1; + if (sig < 1 || sig > _NSIG || sig == SIGKILL || sig == SIGSTOP) + return 1; + ka = ¤t->sig->action[sig-1]; + handler = ka->sa.sa_handler; + if (handler == SIG_DFL || handler == SIG_IGN) { + if (!(current->flags & PF_PAX_EMUTRAMP)) + return 1; + } else if (!(ka->sa.sa_flags & SA_SIGINFO)) + return 1; +#endif + + regs->eax = nr; + regs->eip += 7; + return 3; + } + } while (0); + +#ifdef CONFIG_GRKERNSEC_PAX_EMUSIGRT + if (!(current->flags & PF_PAX_EMUTRAMP)) + return 1; +#endif + + do { /* PaX: gcc trampoline emulation #1 */ + unsigned char mov1, mov2; + unsigned short jmp; + unsigned long addr1, addr2, ret; + unsigned short call; + + err = get_user(mov1, (unsigned char *)regs->eip); + err |= get_user(addr1, (unsigned long *)(regs->eip + 1)); + err |= get_user(mov2, (unsigned char *)(regs->eip + 5)); + err |= get_user(addr2, (unsigned long *)(regs->eip + 6)); + err |= get_user(jmp, (unsigned short *)(regs->eip + 10)); + err |= get_user(ret, (unsigned long *)regs->esp); + + if (err) + break; + + err = get_user(call, (unsigned short *)(ret-2)); + if (err) + break; + + if ((mov1 & 0xF8) == 0xB8 && + (mov2 & 0xF8) == 0xB8 && + (mov1 & 0x07) != (mov2 & 0x07) && + (jmp & 0xF8FF) == 0xE0FF && + (mov2 & 0x07) == ((jmp>>8) & 0x07) && + (call & 0xF8FF) == 0xD0FF && + regs->eip == ((unsigned long*)regs)[trans[(call>>8) & 0x07]]) + { + ((unsigned long *)regs)[trans[mov1 & 0x07]] = addr1; + ((unsigned long *)regs)[trans[mov2 & 0x07]] = addr2; + regs->eip = addr2; + return 4; + } + } while (0); + + do { /* PaX: gcc trampoline emulation #2 */ + unsigned char mov, jmp; + unsigned long addr1, addr2, ret; + unsigned short call; + + err = get_user(mov, (unsigned char *)regs->eip); + err |= get_user(addr1, (unsigned long *)(regs->eip + 1)); + err |= get_user(jmp, (unsigned char *)(regs->eip + 5)); + err |= get_user(addr2, (unsigned long *)(regs->eip + 6)); + err |= get_user(ret, (unsigned long *)regs->esp); + + if (err) + break; + + err = get_user(call, (unsigned short *)(ret-2)); + if (err) + break; + + if ((mov & 0xF8) == 0xB8 && + jmp == 0xE9 && + (call & 0xF8FF) == 0xD0FF && + regs->eip == ((unsigned long*)regs)[trans[(call>>8) & 0x07]]) + { + ((unsigned long *)regs)[trans[mov & 0x07]] = addr1; + regs->eip += addr2 + 10; + return 4; + } + } while (0); + + do { /* PaX: gcc trampoline emulation #3 */ + unsigned char mov, jmp; + char offset; + unsigned long addr1, addr2, ret; + unsigned short call; + + err = get_user(mov, (unsigned char *)regs->eip); + err |= get_user(addr1, (unsigned long *)(regs->eip + 1)); + err |= get_user(jmp, (unsigned char *)(regs->eip + 5)); + err |= get_user(addr2, (unsigned long *)(regs->eip + 6)); + err |= get_user(ret, (unsigned long *)regs->esp); + + if (err) + break; + + err = get_user(call, (unsigned short *)(ret-3)); + err |= get_user(offset, (char *)(ret-1)); + if (err) + break; + + if ((mov & 0xF8) == 0xB8 && + jmp == 0xE9 && + call == 0x55FF) + { + unsigned long addr; + + err = get_user(addr, (unsigned long*)(regs->ebp + (unsigned long)(long)offset)); + if (err || regs->eip != addr) + break; + + ((unsigned long *)regs)[trans[mov & 0x07]] = addr1; + regs->eip += addr2 + 10; + return 4; + } + } while (0); + + do { /* PaX: gcc trampoline emulation #4 */ + unsigned char mov, jmp, sib; + char offset; + unsigned long addr1, addr2, ret; + unsigned short call; + + err = get_user(mov, (unsigned char *)regs->eip); + err |= get_user(addr1, (unsigned long *)(regs->eip + 1)); + err |= get_user(jmp, (unsigned char *)(regs->eip + 5)); + err |= get_user(addr2, (unsigned long *)(regs->eip + 6)); + err |= get_user(ret, (unsigned long *)regs->esp); + + if (err) + break; + + err = get_user(call, (unsigned short *)(ret-4)); + err |= get_user(sib, (unsigned char *)(ret-2)); + err |= get_user(offset, (char *)(ret-1)); + if (err) + break; + + if ((mov & 0xF8) == 0xB8 && + jmp == 0xE9 && + call == 0x54FF && + sib == 0x24) + { + unsigned long addr; + + err = get_user(addr, (unsigned long*)(regs->esp + 4 + (unsigned long)(long)offset)); + if (err || regs->eip != addr) + break; + + ((unsigned long *)regs)[trans[mov & 0x07]] = addr1; + regs->eip += addr2 + 10; + return 4; + } + } while (0); + + do { /* PaX: gcc trampoline emulation #5 */ + unsigned char mov, jmp, sib; + unsigned long addr1, addr2, ret, offset; + unsigned short call; + + err = get_user(mov, (unsigned char *)regs->eip); + err |= get_user(addr1, (unsigned long *)(regs->eip + 1)); + err |= get_user(jmp, (unsigned char *)(regs->eip + 5)); + err |= get_user(addr2, (unsigned long *)(regs->eip + 6)); + err |= get_user(ret, (unsigned long *)regs->esp); + + if (err) + break; + + err = get_user(call, (unsigned short *)(ret-7)); + err |= get_user(sib, (unsigned char *)(ret-5)); + err |= get_user(offset, (unsigned long *)(ret-4)); + if (err) + break; + + if ((mov & 0xF8) == 0xB8 && + jmp == 0xE9 && + call == 0x94FF && + sib == 0x24) + { + unsigned long addr; + + err = get_user(addr, (unsigned long*)(regs->esp + 4 + offset)); + if (err || regs->eip != addr) + break; + + ((unsigned long *)regs)[trans[mov & 0x07]] = addr1; + regs->eip += addr2 + 10; + return 4; + } + } while (0); +#endif + + return 1; /* PaX in action */ +} +#endif + +#if defined(CONFIG_GRKERNSEC_PAX_PAGEEXEC) || defined(CONFIG_GRKERNSEC_PAX_SEGMEXEC) +void pax_report_insns(void *pc) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 20; i++) { + unsigned char c; + if (get_user(c, (unsigned char*)pc+i)) { + printk("."); + break; + } + printk("%02x ", c); } + printk("\n"); } +#endif + +#ifdef CONFIG_GRKERNSEC_PAX_PAGEEXEC +/* + * PaX: handle the extra page faults or pass it down to the original handler + * + * returns 0 when nothing special was detected + * 1 when sigreturn trampoline (syscall) has to be emulated + */ +asmlinkage int pax_do_page_fault(struct pt_regs *regs, unsigned long error_code) +{ + struct mm_struct *mm = current->mm; + unsigned long address; + pte_t *pte; + unsigned char pte_mask; + int ret; + + __asm__("movl %%cr2,%0":"=r" (address)); + + /* It's safe to allow irq's after cr2 has been saved */ + if (likely(regs->eflags & X86_EFLAGS_IF)) + local_irq_enable(); + + if (unlikely((error_code & 5) != 5 || + address >= TASK_SIZE || + (regs->eflags & X86_EFLAGS_VM) || + !(current->flags & PF_PAX_PAGEEXEC))) + return do_page_fault(regs, error_code, address); + + /* PaX: it's our fault, let's handle it if we can */ + + /* PaX: take a look at read faults before acquiring any locks */ + if (unlikely(!(error_code & 2) && (regs->eip == address))) { + /* instruction fetch attempt from a protected page in user mode */ + ret = pax_handle_fetch_fault(regs); + switch (ret) { +#ifdef CONFIG_GRKERNSEC_PAX_RANDEXEC + case 5: + return 0; +#endif + +#ifdef CONFIG_GRKERNSEC_PAX_EMUTRAMP + case 4: + return 0; + case 3: + case 2: + return 1; +#endif + } + pax_report_fault(regs, (void*)regs->eip, (void*)regs->esp); + do_exit(SIGKILL); + } + + pte_mask = _PAGE_ACCESSED | _PAGE_USER | ((error_code & 2) << (_PAGE_BIT_DIRTY-1)); + + spin_lock(&mm->page_table_lock); + pte = pax_get_pte(mm, address); + if (unlikely(!pte || !(pte_val(*pte) & _PAGE_PRESENT) || pte_exec(*pte))) { + spin_unlock(&mm->page_table_lock); + do_page_fault(regs, error_code, address); + return 0; + } + + if (unlikely((error_code & 2) && !pte_write(*pte))) { + /* write attempt to a protected page in user mode */ + spin_unlock(&mm->page_table_lock); + do_page_fault(regs, error_code, address); + return 0; + } + + /* + * PaX: fill DTLB with user rights and retry + */ + __asm__ __volatile__ ( + "orb %2,%1\n" +#if defined(CONFIG_M586) || defined(CONFIG_M586TSC) +/* + * PaX: let this uncommented 'invlpg' remind us on the behaviour of Intel's + * (and AMD's) TLBs. namely, they do not cache PTEs that would raise *any* + * page fault when examined during a TLB load attempt. this is true not only + * for PTEs holding a non-present entry but also present entries that will + * raise a page fault (such as those set up by PaX, or the copy-on-write + * mechanism). in effect it means that we do *not* need to flush the TLBs + * for our target pages since their PTEs are simply not in the TLBs at all. + * the best thing in omitting it is that we gain around 15-20% speed in the + * fast path of the page fault handler and can get rid of tracing since we + * can no longer flush unintended entries. + */ + + "invlpg %0\n" +#endif + + "testb $0,%0\n" + "xorb %3,%1\n" + : + : "m" (*(char*)address), "m" (*(char*)pte) , "q" (pte_mask) , "i" (_PAGE_USER) + : "memory", "cc"); + spin_unlock(&mm->page_table_lock); + return 0; +} +#endif diff -urNp linux-2.4.29/arch/i386/mm/init.c linux-2.4.29/arch/i386/mm/init.c --- linux-2.4.29/arch/i386/mm/init.c 2004-04-14 09:05:25.000000000 -0400 +++ linux-2.4.29/arch/i386/mm/init.c 2005-01-16 23:05:32.332851360 -0500 @@ -37,6 +37,7 @@ #include #include #include +#include mmu_gather_t mmu_gathers[NR_CPUS]; unsigned long highstart_pfn, highend_pfn; @@ -122,7 +123,7 @@ void show_mem(void) /* References to section boundaries */ -extern char _text, _etext, _edata, __bss_start, _end; +extern char _text, _etext, _data, _edata, __bss_start, _end; extern char __init_begin, __init_end; static inline void set_pte_phys (unsigned long vaddr, @@ -178,17 +179,7 @@ static void __init fixrange_init (unsign pgd = pgd_base + i; for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) { -#if CONFIG_X86_PAE - if (pgd_none(*pgd)) { - pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); - set_pgd(pgd, __pgd(__pa(pmd) + 0x1)); - if (pmd != pmd_offset(pgd, 0)) - printk("PAE BUG #02!\n"); - } pmd = pmd_offset(pgd, vaddr); -#else - pmd = (pmd_t *)pgd; -#endif for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) { if (pmd_none(*pmd)) { pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); @@ -217,25 +208,22 @@ static void __init pagetable_init (void) end = (unsigned long)__va(max_low_pfn*PAGE_SIZE); pgd_base = swapper_pg_dir; -#if CONFIG_X86_PAE - for (i = 0; i < PTRS_PER_PGD; i++) - set_pgd(pgd_base + i, __pgd(1 + __pa(empty_zero_page))); -#endif i = __pgd_offset(PAGE_OFFSET); pgd = pgd_base + i; + if (cpu_has_pse) { + set_in_cr4(X86_CR4_PSE); + boot_cpu_data.wp_works_ok = 1; + + if (cpu_has_pge) + set_in_cr4(X86_CR4_PGE); + } + for (; i < PTRS_PER_PGD; pgd++, i++) { vaddr = i*PGDIR_SIZE; if (end && (vaddr >= end)) break; -#if CONFIG_X86_PAE - pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); - set_pgd(pgd, __pgd(__pa(pmd) + 0x1)); -#else - pmd = (pmd_t *)pgd; -#endif - if (pmd != pmd_offset(pgd, 0)) - BUG(); + pmd = pmd_offset(pgd, PAGE_OFFSET); for (j = 0; j < PTRS_PER_PMD; pmd++, j++) { vaddr = i*PGDIR_SIZE + j*PMD_SIZE; if (end && (vaddr >= end)) @@ -243,14 +231,10 @@ static void __init pagetable_init (void) if (cpu_has_pse) { unsigned long __pe; - set_in_cr4(X86_CR4_PSE); - boot_cpu_data.wp_works_ok = 1; __pe = _KERNPG_TABLE + _PAGE_PSE + __pa(vaddr); /* Make it "global" too if supported */ - if (cpu_has_pge) { - set_in_cr4(X86_CR4_PGE); + if (cpu_has_pge) __pe += _PAGE_GLOBAL; - } set_pmd(pmd, __pmd(__pe)); continue; } @@ -289,17 +273,6 @@ static void __init pagetable_init (void) pte = pte_offset(pmd, vaddr); pkmap_page_table = pte; #endif - -#if CONFIG_X86_PAE - /* - * Add low memory identity-mappings - SMP needs it when - * starting up on an AP from real-mode. In the non-PAE - * case we already have these mappings through head.S. - * All user-space mappings are explicitly cleared after - * SMP startup. - */ - pgd_base[0] = pgd_base[USER_PTRS_PER_PGD]; -#endif } void __init zap_low_mappings (void) @@ -312,11 +285,7 @@ void __init zap_low_mappings (void) * us, because pgd_clear() is a no-op on i386. */ for (i = 0; i < USER_PTRS_PER_PGD; i++) -#if CONFIG_X86_PAE - set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); -#else set_pgd(swapper_pg_dir+i, __pgd(0)); -#endif flush_tlb_all(); } @@ -353,17 +322,17 @@ void __init paging_init(void) pagetable_init(); load_cr3(swapper_pg_dir); + __flush_tlb_all(); -#if CONFIG_X86_PAE - /* - * We will bail out later - printk doesn't work right now so - * the user would just see a hanging kernel. - */ - if (cpu_has_pae) - set_in_cr4(X86_CR4_PAE); +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + +#ifdef CONFIG_X86_PAE + memcpy(kernexec_pm_dir, swapper_pm_dir, sizeof(kernexec_pm_dir)); +#else + memcpy(kernexec_pg_dir, swapper_pg_dir, sizeof(kernexec_pg_dir)); #endif - __flush_tlb_all(); +#endif #ifdef CONFIG_HIGHMEM kmap_init(); @@ -529,7 +498,7 @@ void __init mem_init(void) reservedpages = free_pages_init(); codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; + datasize = (unsigned long) &_edata - (unsigned long) &_data; initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", @@ -542,10 +511,6 @@ void __init mem_init(void) (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) ); -#if CONFIG_X86_PAE - if (!cpu_has_pae) - panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!"); -#endif if (boot_cpu_data.wp_works_ok < 0) test_wp_bit(); @@ -589,6 +554,45 @@ void free_initmem(void) { unsigned long addr; +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + /* PaX: limit KERNEL_CS to actual size */ + { + unsigned long limit; + pgd_t *pgd; + pmd_t *pmd; + + limit = (unsigned long)&_etext >> PAGE_SHIFT; + gdt_table[2].a = (gdt_table[2].a & 0xFFFF0000UL) | (limit & 0x0FFFFUL); + gdt_table[2].b = (gdt_table[2].b & 0xFFF0FFFFUL) | (limit & 0xF0000UL); + +#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC + gdt_table2[2].a = (gdt_table2[2].a & 0xFFFF0000UL) | (limit & 0x0FFFFUL); + gdt_table2[2].b = (gdt_table2[2].b & 0xFFF0FFFFUL) | (limit & 0xF0000UL); +#endif + + /* PaX: make KERNEL_CS read-only */ + for (addr = __KERNEL_TEXT_OFFSET; addr < (unsigned long)&_data; addr += PMD_SIZE) { + pgd = pgd_offset_k(addr); + pmd = pmd_offset(pgd, addr); + set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_GLOBAL)); + } + +#ifdef CONFIG_X86_PAE + memcpy(kernexec_pm_dir, swapper_pm_dir, sizeof(kernexec_pm_dir)); +#else + memcpy(kernexec_pg_dir, swapper_pg_dir, sizeof(kernexec_pg_dir)); +#endif + + for (addr = __KERNEL_TEXT_OFFSET; addr < (unsigned long)&_data; addr += PMD_SIZE) { + pgd = pgd_offset_k(addr); + pmd = pmd_offset(pgd, addr); + set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_RW)); + } + flush_tlb_all(); + } +#endif + + memset(&__init_begin, 0, &__init_end - &__init_begin); addr = (unsigned long)(&__init_begin); for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { ClearPageReserved(virt_to_page(addr)); diff -urNp linux-2.4.29/arch/i386/mm/ioremap.c linux-2.4.29/arch/i386/mm/ioremap.c --- linux-2.4.29/arch/i386/mm/ioremap.c 2003-11-28 13:26:19.000000000 -0500 +++ linux-2.4.29/arch/i386/mm/ioremap.c 2005-01-16 23:05:32.332851360 -0500 @@ -49,7 +49,7 @@ static inline int remap_area_pmd(pmd_t * if (address >= end) BUG(); do { - pte_t * pte = pte_alloc(&init_mm, pmd, address); + pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); if (!pte) return -ENOMEM; remap_area_pte(pte, address, end - address, address + phys_addr, flags); diff -urNp linux-2.4.29/arch/i386/vmlinux.lds linux-2.4.29/arch/i386/vmlinux.lds --- linux-2.4.29/arch/i386/vmlinux.lds 2002-02-25 14:37:53.000000000 -0500 +++ linux-2.4.29/arch/i386/vmlinux.lds 1969-12-31 19:00:00.000000000 -0500 @@ -1,82 +0,0 @@ -/* ld script to make i386 Linux kernel - * Written by Martin Mares ; - */ -OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") -OUTPUT_ARCH(i386) -ENTRY(_start) -SECTIONS -{ - . = 0xC0000000 + 0x100000; - _text = .; /* Text and read-only data */ - .text : { - *(.text) - *(.fixup) - *(.gnu.warning) - } = 0x9090 - - _etext = .; /* End of text section */ - - .rodata : { *(.rodata) *(.rodata.*) } - .kstrtab : { *(.kstrtab) } - - . = ALIGN(16); /* Exception table */ - __start___ex_table = .; - __ex_table : { *(__ex_table) } - __stop___ex_table = .; - - __start___ksymtab = .; /* Kernel symbol table */ - __ksymtab : { *(__ksymtab) } - __stop___ksymtab = .; - - .data : { /* Data */ - *(.data) - CONSTRUCTORS - } - - _edata = .; /* End of data section */ - - . = ALIGN(8192); /* init_task */ - .data.init_task : { *(.data.init_task) } - - . = ALIGN(4096); /* Init code and data */ - __init_begin = .; - .text.init : { *(.text.init) } - .data.init : { *(.data.init) } - . = ALIGN(16); - __setup_start = .; - .setup.init : { *(.setup.init) } - __setup_end = .; - __initcall_start = .; - .initcall.init : { *(.initcall.init) } - __initcall_end = .; - . = ALIGN(4096); - __init_end = .; - - . = ALIGN(4096); - .data.page_aligned : { *(.data.idt) } - - . = ALIGN(32); - .data.cacheline_aligned : { *(.data.cacheline_aligned) } - - __bss_start = .; /* BSS */ - .bss : { - *(.bss) - } - _end = . ; - - /* Sections to be discarded */ - /DISCARD/ : { - *(.text.exit) - *(.data.exit) - *(.exitcall.exit) - } - - /* Stabs debugging sections. */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } -} diff -urNp linux-2.4.29/arch/i386/vmlinux.lds.S linux-2.4.29/arch/i386/vmlinux.lds.S --- linux-2.4.29/arch/i386/vmlinux.lds.S 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.4.29/arch/i386/vmlinux.lds.S 2005-01-16 23:05:32.342849840 -0500 @@ -0,0 +1,129 @@ +/* ld script to make i386 Linux kernel + * Written by Martin Mares ; + */ +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(_start) +SECTIONS +{ + . = __PAGE_OFFSET + 0x100000; + .text.startup : { + BYTE(0xEA) /* jmp far */ + LONG(startup_32 + __KERNEL_TEXT_OFFSET - __PAGE_OFFSET) + SHORT(__KERNEL_CS) + } + + . = ALIGN(4096); /* Init code and data */ + __init_begin = .; + .data.init : { *(.data.init) } + . = ALIGN(16); + __setup_start = .; + .setup.init : { *(.setup.init) } + __setup_end = .; + __initcall_start = .; + .initcall.init : { *(.initcall.init) } + __initcall_end = .; + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + __text_init_start = .; + .text.init (. - __KERNEL_TEXT_OFFSET) : AT (__text_init_start) { + *(.text.init) + . = ALIGN(4*1024*1024) - 1; + BYTE(0) + } + __init_end = . + __KERNEL_TEXT_OFFSET; + +/* + * PaX: this must be kept in synch with the KERNEL_CS base + * in the GDTs in arch/i386/kernel/head.S + */ + _text = .; /* Text and read-only data */ + .text : AT (. + __KERNEL_TEXT_OFFSET) { +#else + .text.init : { *(.text.init) } + . = ALIGN(4096); + __init_end = .; + _text = .; /* Text and read-only data */ + .text : { +#endif + + *(.text) + *(.fixup) + *(.gnu.warning) + } = 0x9090 + + _etext = .; /* End of text section */ + + . = ALIGN(4096); + . += __KERNEL_TEXT_OFFSET; + .rodata.page_aligned : { + *(.rodata.empty_zero_page) + *(.rodata.idt) + } + .rodata : { *(.rodata) *(.rodata.*) } + .kstrtab : { *(.kstrtab) } + + . = ALIGN(16); /* Exception table */ + __start___ex_table = .; + __ex_table : { *(__ex_table) } + __stop___ex_table = .; + + __start___ksymtab = .; /* Kernel symbol table */ + __ksymtab : { *(__ksymtab) } + __stop___ksymtab = .; + +#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC + . = ALIGN(4*1024*1024); +#else + . = ALIGN(32); +#endif + + _data = .; + .data : { /* Data */ + *(.data) + CONSTRUCTORS + } + + . = ALIGN(32); + .data.cacheline_aligned : { *(.data.cacheline_aligned) } + + . = ALIGN(8192); + .data.init_task : { *(.data.init_task) } + + . = ALIGN(4096); + .data.page_aligned : { + *(.data.pg0) + +#ifdef CONFIG_X86_PAE + *(.data.swapper_pm_dir) +#endif + + *(.data.swapper_pg_dir) + } + + _edata = .; /* End of data section */ + + __bss_start = .; /* BSS */ + .bss : { + *(.bss) + } + __bss_end = . ; + + _end = . ; + + /* Sections to be discarded */ + /DISCARD/ : { + *(.text.exit) + *(.data.exit) + *(.exitcall.exit) + } + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } +} diff -urNp linux-2.4.29/arch/ia64/config.in linux-2.4.29/arch/ia64/config.in --- linux-2.4.29/arch/ia64/config.in 2004-11-17 06:54:21.000000000 -0500 +++ linux-2.4.29/arch/ia64/config.in 2005-01-16 23:05:32.343849688 -0500 @@ -319,3 +319,12 @@ fi int 'Kernel messages buffer length shift (0 = default)' CONFIG_LOG_BUF_SHIFT 0 endmenu + +mainmenu_option next_comment +comment 'Grsecurity' +bool 'Grsecurity' CONFIG_GRKERNSEC +if [ "$CONFIG_GRKERNSEC" = "y" ]; then + source grsecurity/Config.in +fi +endmenu + diff -urNp linux-2.4.29/arch/ia64/ia32/binfmt_elf32.c linux-2.4.29/arch/ia64/ia32/binfmt_elf32.c --- linux-2.4.29/arch/ia64/ia32/binfmt_elf32.c 2005-01-16 23:04:12.640966368 -0500 +++ linux-2.4.29/arch/ia64/ia32/binfmt_elf32.c 2005-01-16 23:05:32.344849536 -0500 @@ -46,6 +46,16 @@ extern void put_dirty_page (struct task_ static void elf32_set_personality (void); +#ifdef CONFIG_GRKERNSEC_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE(tsk) ((tsk)->personality == PER_LINUX32 ? 0x08048000UL : 0x4000000000000000UL) +#define PAX_DELTA_MMAP_LSB(tsk) IA32_PAGE_SHIFT +#define PAX_DELTA_MMAP_LEN(tsk) ((tsk)->personality == PER_LINUX32 ? 16 : 43 - IA32_PAGE_SHIFT) +#define PAX_DELTA_EXEC_LSB(tsk) IA32_PAGE_SHIFT +#define PAX_DELTA_EXEC_LEN(tsk) ((tsk)->personality == PER_LINUX32 ? 16 : 43 - IA32_PAGE_SHIFT) +#define PAX_DELTA_STACK_LSB(tsk) IA32_PAGE_SHIFT +#define PAX_DELTA_STACK_LEN(tsk) ((tsk)->personality == PER_LINUX32 ? 16 : 43 - IA32_PAGE_SHIFT) +#endif + #define ELF_PLAT_INIT(_r, load_addr) ia64_elf32_init(_r) #define setup_arg_pages(bprm) ia32_setup_arg_pages(bprm) #define elf_map elf32_map @@ -190,8 +200,15 @@ ia32_setup_arg_pages (struct linux_binpr mpnt->vm_mm = current->mm; mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p; mpnt->vm_end = IA32_STACK_TOP; - mpnt->vm_page_prot = PAGE_COPY; mpnt->vm_flags = VM_STACK_FLAGS; + +#ifdef CONFIG_GRKERNSEC_PAX_PAGEEXEC + if (!(current->flags & PF_PAX_PAGEEXEC)) + mpnt->vm_page_prot = protection_map[(VM_STACK_FLAGS | VM_EXEC) & 0x7]; + else +#endif + + mpnt->vm_page_prot = protection_map[VM_STACK_FLAGS & 0x7]; mpnt->vm_ops = NULL; mpnt->vm_pgoff = 0; mpnt->vm_file = NULL; diff -urNp linux-2.4.29/arch/ia64/ia32/sys_ia32.c linux-2.4.29/arch/ia64/ia32/sys_ia32.c --- linux-2.4.29/arch/ia64/ia32/sys_ia32.c 2005-01-16 23:04:12.645965608 -0500 +++ linux-2.4.29/arch/ia64/ia32/sys_ia32.c 2005-01-16 23:05:32.349848776 -0500 @@ -536,6 +536,11 @@ sys32_mmap (struct mmap_arg_struct *arg) flags = a.flags; +#ifdef CONFIG_GRKERNSEC_PAX_RANDEXEC + if (flags & MAP_MIRROR) + return -EINVAL; +#endif + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); if (!(flags & MAP_ANONYMOUS)) { file = fget(a.fd); @@ -557,6 +562,11 @@ sys32_mmap2 (unsigned int addr, unsigned struct file *file = NULL; unsigned long retval; +#ifdef CONFIG_GRKERNSEC_PAX_RANDEXEC + if (flags & MAP_MIRROR) + return -EINVAL; +#endif + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); if (!(flags & MAP_ANONYMOUS)) { file = fget(fd); diff -urNp linux-2.4.29/arch/ia64/kernel/ptrace.c linux-2.4.29/arch/ia64/kernel/ptrace.c --- linux-2.4.29/arch/ia64/kernel/ptrace.c 2004-04-14 09:05:26.000000000 -0400 +++ linux-2.4.29/arch/ia64/kernel/ptrace.c 2005-01-16 23:05:32.352848320 -0500 @@ -16,6 +16,7 @@ #include #inclu