diff --git a/defs.h b/defs.h index 0c4bda7..79544eb 100644 --- a/defs.h +++ b/defs.h @@ -75,7 +75,8 @@ void ioapicinit(void); // kalloc.c char* kalloc(void); void kfree(char*); -void kinit(void*, void*); +void kinit1(void*, void*); +void kinit2(void*, void*); // lapic.c int lapicid(void); @@ -160,6 +161,18 @@ void uartputc(int); // vm.c void seginit(void); void switchuvm(struct proc*); +pde_t* setupkvm(void); +void kvmalloc(void); +void switchkvm(void); +void inituvm(pde_t*, char*, uint); +int allocuvm(pde_t*, uint, uint); +int deallocuvm(pde_t*, uint, uint); +void freevm(pde_t*); +pde_t* copyuvm(pde_t*, uint); +int loaduvm(pde_t*, char*, struct inode*, uint, uint); +void clearpteu(pde_t*, char*); +char* uva2ka(pde_t*, char*); +int copyout(pde_t*, uint, void*, uint); // number of elements in fixed-size array #define NELEM(x) (sizeof(x)/sizeof((x)[0])) diff --git a/entry.S b/entry.S index 9422e26..a9cf503 100644 --- a/entry.S +++ b/entry.S @@ -20,6 +20,7 @@ #include "asm.h" #include "mmu.h" +#include "memlayout.h" #include "param.h" # Multiboot header. Data to direct multiboot loader. @@ -37,11 +38,25 @@ multiboot_header: # Since we haven't set up virtual memory yet, our entry point is # the physical address of 'entry'. .globl _start -_start = entry +_start = V2P_WO(entry) # Entering xv6 on boot processor, with paging off. .globl entry entry: + # Turn on page size extension for 4MB pages + movl %cr4, %eax + orl $(CR4_PSE), %eax + movl %eax, %cr4 + + # Set page directory + movl $(V2P_WO(entrypgdir)), %eax + movl %eax, %cr3 + + # Turn on paging. + movl %cr0, %eax + orl $(CR0_PG|CR0_WP), %eax + movl %eax, %cr0 + # Set up the stack pointer. movl $(stack + KSTACKSIZE), %esp diff --git a/exec.c b/exec.c index b756995..0b27d48 100644 --- a/exec.c +++ b/exec.c @@ -12,31 +12,22 @@ exec(char *path, char **argv) { char *s, *last; int i, off; - uint argc; + uint argc, sz, sp, ustack[3+MAXARG+1]; struct elfhdr elf; struct inode *ip; - struct proc *curproc = myproc(); struct proghdr ph; - uint sz = 0; - char *offset; - uint usp, ustack[3*MAXARG + 1]; - - // Prepare new address space - if((offset = kalloc()) == 0){ - return -1; - } - memset(offset, 0, PGSIZE); + pde_t *pgdir, *oldpgdir; + struct proc *curproc = myproc(); - // read path begin_op(); if((ip = namei(path)) == 0){ end_op(); - kfree(offset); // FIX: Prevent memory leak cprintf("exec: fail\n"); return -1; } ilock(ip); + pgdir = 0; // Check ELF header if(readi(ip, (char*)&elf, 0, sizeof(elf)) != sizeof(elf)) @@ -44,7 +35,11 @@ exec(char *path, char **argv) if(elf.magic != ELF_MAGIC) goto bad; + if((pgdir = setupkvm()) == 0) + goto bad; + // Load program into memory. + sz = 0; for(i=0, off=elf.phoff; i PGSIZE - KSTACKSIZE) + if(ph.vaddr % PGSIZE != 0) goto bad; - if(readi(ip, (char*)(offset + ph.vaddr), ph.off, ph.filesz) != ph.filesz) + if(loaduvm(pgdir, (char*)ph.vaddr, ip, ph.off, ph.filesz) < 0) goto bad; - if(ph.vaddr + ph.memsz > sz) - sz = ph.vaddr + ph.memsz; } iunlockput(ip); end_op(); ip = 0; - // Save program name for debugging. - for(last=s=path; *s; s++) - if(*s == '/') - last = s+1; - safestrcpy(curproc->name, last, sizeof(curproc->name)); + // Allocate two pages at the next page boundary. + // Make the first inaccessible (stack guard). Use the second as the + // user stack. + sz = PGROUNDUP(sz); + if((sz = allocuvm(pgdir, sz, sz + 2*PGSIZE)) == 0) + goto bad; + clearpteu(pgdir, (char*)(sz - 2*PGSIZE)); + sp = sz; // Push argument strings, prepare rest of stack in ustack. - usp = PGSIZE - KSTACKSIZE; for(argc = 0; argv[argc]; argc++) { if(argc >= MAXARG) goto bad; - usp = usp - (strlen(argv[argc]) + 1); - // cprintf("%s\n", argv[argc]); - memmove((uint*)(usp + offset), argv[argc], strlen(argv[argc]) + 1); - ustack[3+argc] = usp; // Add pointer to the string on the stack + sp = (sp - (strlen(argv[argc]) + 1)) & ~3; + if(copyout(pgdir, sp, argv[argc], strlen(argv[argc]) + 1) < 0) + goto bad; + ustack[3+argc] = sp; } - // cprintf("%d\n", argc); ustack[3+argc] = 0; ustack[0] = 0xffffffff; // fake return PC ustack[1] = argc; - ustack[2] = (usp - (argc+1)*4); // argv pointer - usp -= (3+argc+1)*4; - memmove(usp + offset, ustack, (3+argc+1)*4); + ustack[2] = sp - (argc+1)*4; // argv pointer - curproc->tf->eip = elf.entry; // main - curproc->tf->esp = usp; + sp -= (3+argc+1) * 4; + if(copyout(pgdir, sp, ustack, (3+argc+1)*4) < 0) + goto bad; + + // Save program name for debugging. + for(last=s=path; *s; s++) + if(*s == '/') + last = s+1; + safestrcpy(curproc->name, last, sizeof(curproc->name)); - // We free the old address space. It does not contain the kernel stack! kfree - // writes 1s to the entire PGSIZE. All the return addresses etc will get - // messed up, otherwise! - kfree(curproc->offset); + // Commit to the user image. + oldpgdir = curproc->pgdir; + curproc->pgdir = pgdir; curproc->sz = sz; - curproc->offset = offset; + curproc->tf->eip = elf.entry; // main + curproc->tf->esp = sp; switchuvm(curproc); + freevm(oldpgdir); return 0; bad: + if(pgdir) + freevm(pgdir); if(ip){ iunlockput(ip); end_op(); } - kfree(offset); // FIX: Prevent memory leak on ELF load failure return -1; } diff --git a/kalloc.c b/kalloc.c index 4495c33..05c846b 100644 --- a/kalloc.c +++ b/kalloc.c @@ -20,10 +20,23 @@ struct { struct run *freelist; } kmem; +// Initialization happens in two phases. +// 1. main() calls kinit1() while still using entrypgdir, to place just +// the pages mapped by entrypgdir on the free list. +// 2. main() calls kinit2() after kvmalloc() has installed kpgdir and +// switched to it, to place the remaining pages on the free list. +// The existing pushcli/popcli in kfree/kalloc is already safe before +// locks are initialized, so no use_lock flag is needed in this variant. void -kinit(void *vstart, void *vend) +kinit1(void *vstart, void *vend) { - freerange(vstart, vend); // (freerange calls kfree, so the lock must exist!) + freerange(vstart, vend); +} + +void +kinit2(void *vstart, void *vend) +{ + freerange(vstart, vend); } void diff --git a/kernel.ld b/kernel.ld index 0b70493..6f2f8b0 100644 --- a/kernel.ld +++ b/kernel.ld @@ -9,8 +9,11 @@ SECTIONS { /* Link the kernel at this address: "." means the current address */ /* Must be equal to KERNLINK */ - . = 0x00100000; + . = 0x80100000; + /* AT(0x100000) tells the bootloader to load the code into + * physical memory at 0x100000, even though the linker assigns + * high-half virtual addresses. */ .text : AT(0x100000) { *(.text .stub .text.* .gnu.linkonce.t.*) } diff --git a/main.c b/main.c index 599e3f5..488b619 100644 --- a/main.c +++ b/main.c @@ -12,18 +12,32 @@ extern char end[]; // first address after kernel loaded from ELF file +// Bootstrap page directory. Used only by entry.S before kvmalloc() runs. +// Maps VA [0, 4MB) -> PA [0, 4MB) so entry.S can keep fetching low-half +// instructions, plus VA [KERNBASE, KERNBASE+4MB) -> PA [0, 4MB) so the +// kernel can run at its linked high-half virtual addresses. Both 4MB PSE. +__attribute__((__aligned__(PGSIZE))) +pde_t entrypgdir[NPDENTRIES] = { + // Map VA's [0, 4MB) to PA's [0, 4MB) + [0] = (0) | PTE_P | PTE_W | PTE_PS, + // Map VA's [KERNBASE, KERNBASE+4MB) to PA's [0, 4MB) + [KERNBASE>>PDXSHIFT] = (0) | PTE_P | PTE_W | PTE_PS, +}; + // Bootstrap processor starts running C code here. int main(void) { - kinit(end, P2V(PHYSTOP)); // phys page allocator + kinit1(end, P2V(4*1024*1024)); // phys pages inside entrypgdir's window + consoleinit(); // (moved up so early panics are visible) + uartinit(); + kvmalloc(); // build kpgdir covering all PHYSTOP + devices + kinit2(P2V(4*1024*1024), P2V(PHYSTOP)); // now safe to freerange the rest mpinit(); // detect other processors lapicinit(); // interrupt controller picinit(); // disable pic ioapicinit(); // another interrupt controller - consoleinit(); // console hardware - uartinit(); // serial port - ideinit(); // disk + ideinit(); // disk tvinit(); // trap vectors binit(); // buffer cache fileinit(); // file table diff --git a/memlayout.h b/memlayout.h index 55a5224..96c6c80 100644 --- a/memlayout.h +++ b/memlayout.h @@ -6,7 +6,7 @@ #define DEVSPACE 0xFE000000 // Other devices are at high addresses // Key addresses for address space layout -#define KERNBASE 0x0 // First kernel virtual address +#define KERNBASE 0x80000000 // First kernel virtual address #define KERNLINK (KERNBASE+EXTMEM) // Address where kernel is linked #define PROCSIZE 0x100 // 1MB is the size of each process (in multiple of 4KB) diff --git a/mmu.h b/mmu.h index c829a22..14a25c7 100644 --- a/mmu.h +++ b/mmu.h @@ -6,6 +6,10 @@ // Control Register flags #define CR0_PE 0x00000001 // Protection Enable +#define CR0_WP 0x00010000 // Write Protect +#define CR0_PG 0x80000000 // Paging + +#define CR4_PSE 0x00000010 // Page size extension // various segment selectors. #define SEG_KCODE 1 // kernel code @@ -61,7 +65,7 @@ struct segdesc { #define STS_IG32 0xE // 32-bit Interrupt Gate #define STS_TG32 0xF // 32-bit Trap Gate -#define PGSIZE (PROCSIZE << 12) // bytes mapped by a page. This is kept same as PROCSIZE so that the process can be contiguously allocated. +#define PGSIZE 4096 // bytes mapped by a page #define PGROUNDUP(sz) (((sz)+PGSIZE-1) & ~(PGSIZE-1)) @@ -143,3 +147,35 @@ struct gatedesc { } #endif + +// Page directory and page table constants. +#define NPDENTRIES 1024 // # directory entries per page directory +#define NPTENTRIES 1024 // # PTEs per page table + +#define PTXSHIFT 12 // offset of PTX in a linear address +#define PDXSHIFT 22 // offset of PDX in a linear address + +// page directory index +#define PDX(va) (((uint)(va) >> PDXSHIFT) & 0x3FF) + +// page table index +#define PTX(va) (((uint)(va) >> PTXSHIFT) & 0x3FF) + +// construct virtual address from indexes and offset +#define PGADDR(d, t, o) ((uint)((d) << PDXSHIFT | (t) << PTXSHIFT | (o))) + +#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1)) + +// Page table/directory entry flags. +#define PTE_P 0x001 // Present +#define PTE_W 0x002 // Writeable +#define PTE_U 0x004 // User +#define PTE_PS 0x080 // Page Size + +// Address in page table or page directory entry +#define PTE_ADDR(pte) ((uint)(pte) & ~0xFFF) +#define PTE_FLAGS(pte) ((uint)(pte) & 0xFFF) + +#ifndef __ASSEMBLER__ +typedef uint pte_t; +#endif diff --git a/mp.c b/mp.c index 25bcd5c..79bb0ad 100644 --- a/mp.c +++ b/mp.c @@ -5,7 +5,7 @@ #include "types.h" #include "defs.h" #include "param.h" -// #include "memlayout.h" +#include "memlayout.h" #include "mp.h" #include "x86.h" #include "mmu.h" @@ -32,8 +32,7 @@ mpsearch1(uint a, int len) { uchar *e, *p, *addr; - // addr = P2V(a); - addr = (uchar*) a; + addr = P2V(a); e = addr+len; for(p = addr; p < e; p += sizeof(struct mp)) if(memcmp(p, "_MP_", 4) == 0 && sum(p, sizeof(struct mp)) == 0) @@ -53,8 +52,7 @@ mpsearch(void) uint p; struct mp *mp; - // bda = (uchar *) P2V(0x400); - bda = (uchar *) 0x400; + bda = (uchar *) P2V(0x400); if((p = ((bda[0x0F]<<8)| bda[0x0E]) << 4)){ if((mp = mpsearch1(p, 1024))) return mp; @@ -79,8 +77,7 @@ mpconfig(struct mp **pmp) if((mp = mpsearch()) == 0 || mp->physaddr == 0) return 0; - // conf = (struct mpconf*) P2V((uint) mp->physaddr); - conf = (struct mpconf*) (uint) mp->physaddr; + conf = (struct mpconf*) P2V((uint) mp->physaddr); if(memcmp(conf, "PCMP", 4) != 0) return 0; if(conf->version != 1 && conf->version != 4) diff --git a/proc.c b/proc.c index aa468ee..b80ff65 100644 --- a/proc.c +++ b/proc.c @@ -68,16 +68,15 @@ allocproc(void) } p->sz = 0; - // kstack lives on a different segment + // kstack is its own kalloc'd page. Leave p->kstack as the base of the + // kalloc'd chunk so kfree(p->kstack) passes kalloc.c's alignment check. + // We use only the first KSTACKSIZE bytes of the 1MB chunk as the kernel + // stack; the rest is wasted but the chunk boundary remains aligned. if((p->kstack = kalloc()) == 0){ p->state = UNUSED; return 0; } - - sp = (char*)(p->kstack + PGSIZE); - - // Allocate kernel stack. - p->kstack = sp - KSTACKSIZE; + sp = p->kstack + KSTACKSIZE; // Leave room for trap frame. sp -= sizeof *p->tf; @@ -107,11 +106,18 @@ pinit(void) extern char _binary_initcode_start[], _binary_initcode_size[]; p = allocproc(); - + + // SHADOW PAGING: Build the initial page directory for initproc. + p->pgdir = setupkvm(); + inituvm(p->pgdir, _binary_initcode_start, (uint)_binary_initcode_size); + initproc = p; memmove(p->offset, _binary_initcode_start, (int)_binary_initcode_size); - p->sz = (uint)_binary_initcode_size; + // p->sz is the size of the user address space, not just the initcode + // bytes. inituvm mapped one page at virt 0, so argint/argptr must accept + // user addresses up to PGSIZE (the stack lives at the top of this page). + p->sz = PGSIZE; memset(p->tf, 0, sizeof(*p->tf)); p->tf->cs = (SEG_UCODE << 3) | DPL_USER; @@ -120,8 +126,8 @@ pinit(void) p->tf->ss = p->tf->ds; p->tf->eflags = FL_IF; - p->tf->esp = PGSIZE - KSTACKSIZE; - p->tf->eip = 0; // beginning of initcode.S + p->tf->esp = PGSIZE; // stack sits at top of the single inituvm'd page + p->tf->eip = 0; // beginning of initcode.S safestrcpy(p->name, "initcode", sizeof(p->name)); // cwd is set in forkret, after iinit has initialized the inode cache. @@ -207,6 +213,14 @@ fork(void) return -1; } + // SHADOW PAGING: Clone the parent's page directory. + if((np->pgdir = copyuvm(curproc->pgdir, curproc->sz)) == 0){ + kfree(np->kstack); + np->kstack = 0; + np->state = UNUSED; + return -1; + } + // 2. Copy the entire user memory segment // In p23's segmentation model, the physical memory starts at np->offset // and the size of the user space is curproc->sz @@ -314,6 +328,11 @@ wait(void) pid = p->pid; kfree(p->offset); // Free the 1MB physical block p->offset = 0; + kfree(p->kstack); // Free the kernel stack chunk + p->kstack = 0; + // SHADOW PAGING: Free the page directory. + freevm(p->pgdir); + p->pgdir = 0; p->pid = 0; p->parent = 0; p->name[0] = 0; @@ -449,21 +468,23 @@ procdump(void) popcli(); } +// Grow current process's memory by n bytes. +// Return 0 on success, -1 on failure. int growproc(int n) { - struct proc *p = myproc(); - int newsz; - - newsz = (int)p->sz + n; - if(newsz < 0) - return -1; - if(newsz >= PGSIZE - KSTACKSIZE) - return -1; - - if(n > 0) - memset(p->offset + p->sz, 0, n); + uint sz; + struct proc *curproc = myproc(); - p->sz = newsz; + sz = curproc->sz; + if(n > 0){ + if((sz = allocuvm(curproc->pgdir, sz, sz + n)) == 0) + return -1; + } else if(n < 0){ + if((sz = deallocuvm(curproc->pgdir, sz, sz + n)) == 0) + return -1; + } + curproc->sz = sz; + switchuvm(curproc); // reload CR3 to flush stale TLB entries return 0; } \ No newline at end of file diff --git a/proc.h b/proc.h index 39c70d2..54b1386 100644 --- a/proc.h +++ b/proc.h @@ -36,6 +36,7 @@ enum procstate { UNUSED, EMBRYO, RUNNABLE, RUNNING, SLEEPING, ZOMBIE }; // Per-process state struct proc { uint sz; // Size of process memory (bytes) + pde_t *pgdir; // Page table (unused on this branch; added for parity with xv6-public) char *offset; // Where code is copied char *kstack; // Bottom of kernel stack for this process enum procstate state; // Process state diff --git a/syscall.c b/syscall.c index 71d797f..e768539 100644 --- a/syscall.c +++ b/syscall.c @@ -14,15 +14,18 @@ // to a saved program counter, and then the first argument. // Fetch the int at addr from the current process. +// Under paging, p->pgdir is loaded in CR3 during syscall handling, so +// kernel can dereference user virts directly (they're PTE_U, kernel-readable). +// The bound check is now against the process's mapped size, not the old +// segmentation fixed-offset window. int fetchint(uint addr, int *ip) { struct proc *curproc = myproc(); - if(addr >= PGSIZE - KSTACKSIZE || addr+4 > PGSIZE - KSTACKSIZE) { + if(addr >= curproc->sz || addr+4 > curproc->sz) return -1; - } - *ip = *(int*)(addr + curproc->offset); + *ip = *(int*)(addr); return 0; } @@ -35,10 +38,10 @@ fetchstr(uint addr, char **pp) char *s, *ep; struct proc *curproc = myproc(); - if(addr >= PGSIZE - KSTACKSIZE) + if(addr >= curproc->sz) return -1; - *pp = (char*)(addr + curproc->offset); - ep = (char*)(PGSIZE - KSTACKSIZE + curproc->offset); + *pp = (char*)addr; + ep = (char*)curproc->sz; for(s = *pp; s < ep; s++){ if(*s == 0) return s - *pp; @@ -64,11 +67,9 @@ argptr(int n, char **pp, int size) if(argint(n, &i) < 0) return -1; - if((uint)i >= PGSIZE - KSTACKSIZE || (uint)i+size > PGSIZE - KSTACKSIZE) + if(size < 0 || (uint)i >= curproc->sz || (uint)i+size > curproc->sz) return -1; - - // You must add curproc->offset here so it reads the correct memory! - *pp = (char*)(i + curproc->offset); + *pp = (char*)i; return 0; } diff --git a/vm.c b/vm.c index caf87ae..99e40aa 100644 --- a/vm.c +++ b/vm.c @@ -1,3 +1,4 @@ +#include "param.h" #include "types.h" #include "memlayout.h" #include "asm.h" @@ -5,6 +6,10 @@ #include "proc.h" #include "defs.h" #include "x86.h" +#include "elf.h" + +extern char data[]; // defined by kernel.ld +pde_t *kpgdir; // for use in scheduler() // Set up CPU's kernel segment descriptors. // Run once on entry on each CPU. @@ -30,12 +35,10 @@ switchuvm(struct proc *p) panic("switchuvm: no process"); if(p->kstack == 0) panic("switchuvm: no kstack"); + if(p->pgdir == 0) + panic("switchuvm: no pgdir"); pushcli(); - mycpu()->gdt[SEG_UCODE] = SEG(STA_X|STA_R, p->offset, PROCSIZE << 12, DPL_USER); - mycpu()->gdt[SEG_UDATA] = SEG(STA_W, p->offset, PROCSIZE << 12, DPL_USER); - lgdt(mycpu()->gdt, sizeof(mycpu()->gdt)); - mycpu()->gdt[SEG_TSS] = SEG16(STS_T32A, &mycpu()->ts, sizeof(mycpu()->ts)-1, 0); mycpu()->gdt[SEG_TSS].s = 0; @@ -44,6 +47,325 @@ switchuvm(struct proc *p) // setting IOPL=0 in eflags *and* iomb beyond the tss segment limit // forbids I/O instructions (e.g., inb and outb) from user space mycpu()->ts.iomb = (ushort) 0xFFFF; + + // Flat 0-base / 4GB-limit user segments. All isolation now flows + // through the MMU via p->pgdir, not segment base. + mycpu()->gdt[SEG_UCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, DPL_USER); + mycpu()->gdt[SEG_UDATA] = SEG(STA_W, 0, 0xffffffff, DPL_USER); + lgdt(mycpu()->gdt, sizeof(mycpu()->gdt)); + ltr(SEG_TSS << 3); + + // Switch h/w page table register to the process's page table. + lcr3(V2P(p->pgdir)); popcli(); +} + +// Return the address of the PTE in page table pgdir +// that corresponds to virtual address va. If alloc!=0, +// create any required page table pages. +static pte_t * +walkpgdir(pde_t *pgdir, const void *va, int alloc) +{ + pde_t *pde; + pte_t *pgtab; + + pde = &pgdir[PDX(va)]; + if(*pde & PTE_P){ + pgtab = (pte_t*)P2V(PTE_ADDR(*pde)); + } else { + if(!alloc || (pgtab = (pte_t*)kalloc()) == 0) + return 0; + // Make sure all those PTE_P bits are zero. + memset(pgtab, 0, PGSIZE); + // The permissions here are overly generous, but they can + // be further restricted by the permissions in the page table + // entries, if necessary. + *pde = V2P(pgtab) | PTE_P | PTE_W | PTE_U; + } + return &pgtab[PTX(va)]; +} + +// Create PTEs for virtual addresses starting at va that refer to +// physical addresses starting at pa. va and size might not +// be page-aligned. +static int +mappages(pde_t *pgdir, void *va, uint size, uint pa, int perm) +{ + char *a, *last; + pte_t *pte; + + a = (char*)PGROUNDDOWN((uint)va); + last = (char*)PGROUNDDOWN(((uint)va) + size - 1); + for(;;){ + if((pte = walkpgdir(pgdir, a, 1)) == 0) + return -1; + if(*pte & PTE_P) + panic("remap"); + *pte = pa | perm | PTE_P; + if(a == last) + break; + a += PGSIZE; + pa += PGSIZE; + } + return 0; +} + +// This table defines the kernel's mappings, which are present in +// every process's page table. +static struct kmap { + void *virt; + uint phys_start; + uint phys_end; + int perm; +} kmap[] = { + { (void*)KERNBASE, 0, EXTMEM, PTE_W}, // I/O space + { (void*)KERNLINK, V2P(KERNLINK), V2P(data), 0}, // kern text+rodata + { (void*)data, V2P(data), PHYSTOP, PTE_W}, // kern data+memory + { (void*)DEVSPACE, DEVSPACE, 0, PTE_W}, // more devices +}; + +// Set up kernel part of a page table. +pde_t* +setupkvm(void) +{ + pde_t *pgdir; + struct kmap *k; + + if((pgdir = (pde_t*)kalloc()) == 0) + return 0; + memset(pgdir, 0, PGSIZE); + if (P2V(PHYSTOP) > (void*)DEVSPACE) + panic("PHYSTOP too high"); + for(k = kmap; k < &kmap[NELEM(kmap)]; k++) + if(mappages(pgdir, k->virt, k->phys_end - k->phys_start, + (uint)k->phys_start, k->perm) < 0) { + freevm(pgdir); + return 0; + } + return pgdir; +} + +// Switch h/w page table register to the kernel-only page table, +// for when no process is running. +void +switchkvm(void) +{ + lcr3(V2P(kpgdir)); // switch to the kernel page table +} + +// Allocate one page table for the machine for the kernel address +// space for scheduler processes. +void +kvmalloc(void) +{ + kpgdir = setupkvm(); + switchkvm(); +} + +// Load the initcode into address 0 of pgdir. +// sz must be less than a page. +void +inituvm(pde_t *pgdir, char *init, uint sz) +{ + char *mem; + + if(sz >= PGSIZE) + panic("inituvm: more than a page"); + mem = kalloc(); + memset(mem, 0, PGSIZE); + mappages(pgdir, 0, PGSIZE, V2P(mem), PTE_W|PTE_U); + memmove(mem, init, sz); +} + +// Allocate page tables and physical memory to grow process from oldsz to +// newsz, which need not be page aligned. Returns new size or 0 on error. +int +allocuvm(pde_t *pgdir, uint oldsz, uint newsz) +{ + char *mem; + uint a; + + if(newsz >= KERNBASE) + return 0; + if(newsz < oldsz) + return oldsz; + + a = PGROUNDUP(oldsz); + for(; a < newsz; a += PGSIZE){ + mem = kalloc(); + if(mem == 0){ + cprintf("allocuvm out of memory\n"); + deallocuvm(pgdir, newsz, oldsz); + return 0; + } + memset(mem, 0, PGSIZE); + if(mappages(pgdir, (char*)a, PGSIZE, V2P(mem), PTE_W|PTE_U) < 0){ + cprintf("allocuvm out of memory (2)\n"); + deallocuvm(pgdir, newsz, oldsz); + kfree(mem); + return 0; + } + } + return newsz; +} + +// Deallocate user pages to bring the process size from oldsz to +// newsz. oldsz and newsz need not be page-aligned, nor does newsz +// need to be less than oldsz. oldsz can be larger than the actual +// process size. Returns the new process size. +int +deallocuvm(pde_t *pgdir, uint oldsz, uint newsz) +{ + pte_t *pte; + uint a, pa; + + if(newsz >= oldsz) + return oldsz; + + a = PGROUNDUP(newsz); + for(; a < oldsz; a += PGSIZE){ + pte = walkpgdir(pgdir, (char*)a, 0); + if(!pte) + a = PGADDR(PDX(a) + 1, 0, 0) - PGSIZE; + else if((*pte & PTE_P) != 0){ + pa = PTE_ADDR(*pte); + if(pa == 0) + panic("kfree"); + char *v = P2V(pa); + kfree(v); + *pte = 0; + } + } + return newsz; +} + +// Free a page table and all the physical memory pages +// in the user part. +void +freevm(pde_t *pgdir) +{ + uint i; + + if(pgdir == 0) + panic("freevm: no pgdir"); + deallocuvm(pgdir, KERNBASE, 0); + for(i = 0; i < NPDENTRIES; i++){ + if(pgdir[i] & PTE_P){ + char * v = P2V(PTE_ADDR(pgdir[i])); + kfree(v); + } + } + kfree((char*)pgdir); +} + +// Given a parent process's page table, create a copy +// of it for a child. +pde_t* +copyuvm(pde_t *pgdir, uint sz) +{ + pde_t *d; + pte_t *pte; + uint pa, i, flags; + char *mem; + + if((d = setupkvm()) == 0) + return 0; + for(i = 0; i < sz; i += PGSIZE){ + if((pte = walkpgdir(pgdir, (void *) i, 0)) == 0) + panic("copyuvm: pte should exist"); + if(!(*pte & PTE_P)) + panic("copyuvm: page not present"); + pa = PTE_ADDR(*pte); + flags = PTE_FLAGS(*pte); + if((mem = kalloc()) == 0) + goto bad; + memmove(mem, (char*)P2V(pa), PGSIZE); + if(mappages(d, (void*)i, PGSIZE, V2P(mem), flags) < 0) { + kfree(mem); + goto bad; + } + } + return d; + +bad: + freevm(d); + return 0; +} + +// Load a program segment into pgdir. addr must be page-aligned +// and the pages from addr to addr+sz must already be mapped. +int +loaduvm(pde_t *pgdir, char *addr, struct inode *ip, uint offset, uint sz) +{ + uint i, pa, n; + pte_t *pte; + + if((uint) addr % PGSIZE != 0) + panic("loaduvm: addr must be page aligned"); + for(i = 0; i < sz; i += PGSIZE){ + if((pte = walkpgdir(pgdir, addr+i, 0)) == 0) + panic("loaduvm: address should exist"); + pa = PTE_ADDR(*pte); + if(sz - i < PGSIZE) + n = sz - i; + else + n = PGSIZE; + if(readi(ip, P2V(pa), offset+i, n) != n) + return -1; + } + return 0; +} + +// Clear PTE_U on a page. Used to create an inaccessible +// page beneath the user stack. +void +clearpteu(pde_t *pgdir, char *uva) +{ + pte_t *pte; + + pte = walkpgdir(pgdir, uva, 0); + if(pte == 0) + panic("clearpteu"); + *pte &= ~PTE_U; +} + +// Map user virtual address to kernel address. +char* +uva2ka(pde_t *pgdir, char *uva) +{ + pte_t *pte; + + pte = walkpgdir(pgdir, uva, 0); + if((*pte & PTE_P) == 0) + return 0; + if((*pte & PTE_U) == 0) + return 0; + return (char*)P2V(PTE_ADDR(*pte)); +} + +// Copy len bytes from p to user address va in page table pgdir. +// Most useful when pgdir is not the current page table. +// uva2ka ensures this only works for PTE_U pages. +int +copyout(pde_t *pgdir, uint va, void *p, uint len) +{ + char *buf, *pa0; + uint n, va0; + + buf = (char*)p; + while(len > 0){ + va0 = (uint)PGROUNDDOWN(va); + pa0 = uva2ka(pgdir, (char*)va0); + if(pa0 == 0) + return -1; + n = PGSIZE - (va - va0); + if(n > len) + n = len; + memmove(pa0 + (va - va0), buf, n); + len -= n; + buf += n; + va = va0 + PGSIZE; + } + return 0; } \ No newline at end of file diff --git a/x86.h b/x86.h index ac85f1e..d1e4517 100644 --- a/x86.h +++ b/x86.h @@ -126,6 +126,12 @@ rcr2(void) return val; } +static inline void +lcr3(uint val) +{ + asm volatile("movl %0,%%cr3" : : "r" (val)); +} + static inline void noop(void) {