Skip to content

Lab 2 system calls

In the last lab you used system calls to write a few utilities. In this lab you will add some new system calls to xv6, which will help you understand how they work and will expose you to some of the internals of the xv6 kernel. You will add more system calls in later labs.

Before you start coding, read Chapter 2 of the xv6 book, and Sections 4.3 and 4.4 of Chapter 4, and related source files:

  • The user-space "stubs" that route system calls into the kernel are in user/usys.S, which is generated by user/usys.pl when you run make. Declarations are in user/user.h

user/usys.S:

# generated by usys.pl - do not edit
#include "kernel/syscall.h"
.global fork
fork:
 li a7, SYS_fork
 ecall
 ret
.global exit
exit:
 li a7, SYS_exit
 ecall
 ret
.global wait
wait:
 li a7, SYS_wait
 ecall
 ret
.global pipe
pipe:
 li a7, SYS_pipe
 ecall
 ret
.global read
read:
 li a7, SYS_read
 ecall
 ret
.global write
write:
 li a7, SYS_write
 ecall
 ret
.global close
close:
 li a7, SYS_close
 ecall
 ret
.global kill
kill:
 li a7, SYS_kill
 ecall
 ret
.global exec
exec:
 li a7, SYS_exec
 ecall
 ret
.global open
open:
 li a7, SYS_open
 ecall
 ret
.global mknod
mknod:
 li a7, SYS_mknod
 ecall
 ret
.global unlink
unlink:
 li a7, SYS_unlink
 ecall
 ret
.global fstat
fstat:
 li a7, SYS_fstat
 ecall
 ret
.global link
link:
 li a7, SYS_link
 ecall
 ret
.global mkdir
mkdir:
 li a7, SYS_mkdir
 ecall
 ret
.global chdir
chdir:
 li a7, SYS_chdir
 ecall
 ret
.global dup
dup:
 li a7, SYS_dup
 ecall
 ret
.global getpid
getpid:
 li a7, SYS_getpid
 ecall
 ret
.global sbrk
sbrk:
 li a7, SYS_sbrk
 ecall
 ret
.global sleep
sleep:
 li a7, SYS_sleep
 ecall
 ret
.global uptime
uptime:
 li a7, SYS_uptime
 ecall
 ret

usys.pl:

#!/usr/bin/perl -w

# Generate usys.S, the stubs for syscalls.

print "# generated by usys.pl - do not edit\n";

print "#include \"kernel/syscall.h\"\n";

sub entry {
    my $name = shift;
    print ".global $name\n";
    print "${name}:\n";
    print " li a7, SYS_${name}\n";
    print " ecall\n";
    print " ret\n";
}

entry("fork");
entry("exit");
entry("wait");
entry("pipe");
entry("read");
entry("write");
entry("close");
entry("kill");
entry("exec");
entry("open");
entry("mknod");
entry("unlink");
entry("fstat");
entry("link");
entry("mkdir");
entry("chdir");
entry("dup");
entry("getpid");
entry("sbrk");
entry("sleep");
entry("uptime");
  • The kernel-space code that routes a system call to the kernel function that implements it is in kernel/syscall.c and kernel/syscall.h.

kerner/syscall.c:

#include "types.h"
#include "param.h"
#include "memlayout.h"
#include "riscv.h"
#include "spinlock.h"
#include "proc.h"
#include "syscall.h"
#include "defs.h"

// Fetch the uint64 at addr from the current process.
int
fetchaddr(uint64 addr, uint64 *ip)
{
  struct proc *p = myproc();
  if(addr >= p->sz || addr+sizeof(uint64) > p->sz) // both tests needed, in case of overflow
    return -1;
  if(copyin(p->pagetable, (char *)ip, addr, sizeof(*ip)) != 0)
    return -1;
  return 0;
}

// Fetch the nul-terminated string at addr from the current process.
// Returns length of string, not including nul, or -1 for error.
int
fetchstr(uint64 addr, char *buf, int max)
{
  struct proc *p = myproc();
  if(copyinstr(p->pagetable, buf, addr, max) < 0)
    return -1;
  return strlen(buf);
}

static uint64
argraw(int n)
{
  struct proc *p = myproc();
  switch (n) {
  case 0:
    return p->trapframe->a0;
  case 1:
    return p->trapframe->a1;
  case 2:
    return p->trapframe->a2;
  case 3:
    return p->trapframe->a3;
  case 4:
    return p->trapframe->a4;
  case 5:
    return p->trapframe->a5;
  }
  panic("argraw");
  return -1;
}

// Fetch the nth 32-bit system call argument.
void
argint(int n, int *ip)
{
  *ip = argraw(n);
}

// Retrieve an argument as a pointer.
// Doesn't check for legality, since
// copyin/copyout will do that.
void
argaddr(int n, uint64 *ip)
{
  *ip = argraw(n);
}

// Fetch the nth word-sized system call argument as a null-terminated string.
// Copies into buf, at most max.
// Returns string length if OK (including nul), -1 if error.
int
argstr(int n, char *buf, int max)
{
  uint64 addr;
  argaddr(n, &addr);
  return fetchstr(addr, buf, max);
}

// Prototypes for the functions that handle system calls.
extern uint64 sys_fork(void);
extern uint64 sys_exit(void);
extern uint64 sys_wait(void);
extern uint64 sys_pipe(void);
extern uint64 sys_read(void);
extern uint64 sys_kill(void);
extern uint64 sys_exec(void);
extern uint64 sys_fstat(void);
extern uint64 sys_chdir(void);
extern uint64 sys_dup(void);
extern uint64 sys_getpid(void);
extern uint64 sys_sbrk(void);
extern uint64 sys_sleep(void);
extern uint64 sys_uptime(void);
extern uint64 sys_open(void);
extern uint64 sys_write(void);
extern uint64 sys_mknod(void);
extern uint64 sys_unlink(void);
extern uint64 sys_link(void);
extern uint64 sys_mkdir(void);
extern uint64 sys_close(void);

// An array mapping syscall numbers from syscall.h
// to the function that handles the system call.
static uint64 (*syscalls[])(void) = {
[SYS_fork]    sys_fork,
[SYS_exit]    sys_exit,
[SYS_wait]    sys_wait,
[SYS_pipe]    sys_pipe,
[SYS_read]    sys_read,
[SYS_kill]    sys_kill,
[SYS_exec]    sys_exec,
[SYS_fstat]   sys_fstat,
[SYS_chdir]   sys_chdir,
[SYS_dup]     sys_dup,
[SYS_getpid]  sys_getpid,
[SYS_sbrk]    sys_sbrk,
[SYS_sleep]   sys_sleep,
[SYS_uptime]  sys_uptime,
[SYS_open]    sys_open,
[SYS_write]   sys_write,
[SYS_mknod]   sys_mknod,
[SYS_unlink]  sys_unlink,
[SYS_link]    sys_link,
[SYS_mkdir]   sys_mkdir,
[SYS_close]   sys_close,
};

void
syscall(void)
{
  int num;
  struct proc *p = myproc();

  num = p->trapframe->a7;
  if(num > 0 && num < NELEM(syscalls) && syscalls[num]) {
    // Use num to lookup the system call function for num, call it,
    // and store its return value in p->trapframe->a0
    p->trapframe->a0 = syscalls[num]();
  } else {
    printf("%d %s: unknown sys call %d\n",
            p->pid, p->name, num);
    p->trapframe->a0 = -1;
  }
}

kernel/syscall.h:

// System call numbers
#define SYS_fork    1
#define SYS_exit    2
#define SYS_wait    3
#define SYS_pipe    4
#define SYS_read    5
#define SYS_kill    6
#define SYS_exec    7
#define SYS_fstat   8
#define SYS_chdir   9
#define SYS_dup    10
#define SYS_getpid 11
#define SYS_sbrk   12
#define SYS_sleep  13
#define SYS_uptime 14
#define SYS_open   15
#define SYS_write  16
#define SYS_mknod  17
#define SYS_unlink 18
#define SYS_link   19
#define SYS_mkdir  20
#define SYS_close  21
  • Process-related code is kernel/proc.h and kernel/proc.c.

kernel/proc.h:

// Saved registers for kernel context switches.
struct context {
  uint64 ra;
  uint64 sp;

  // callee-saved
  uint64 s0;
  uint64 s1;
  uint64 s2;
  uint64 s3;
  uint64 s4;
  uint64 s5;
  uint64 s6;
  uint64 s7;
  uint64 s8;
  uint64 s9;
  uint64 s10;
  uint64 s11;
};

// Per-CPU state.
struct cpu {
  struct proc *proc;          // The process running on this cpu, or null.
  struct context context;     // swtch() here to enter scheduler().
  int noff;                   // Depth of push_off() nesting.
  int intena;                 // Were interrupts enabled before push_off()?
};

extern struct cpu cpus[NCPU];

// per-process data for the trap handling code in trampoline.S.
// sits in a page by itself just under the trampoline page in the
// user page table. not specially mapped in the kernel page table.
// uservec in trampoline.S saves user registers in the trapframe,
// then initializes registers from the trapframe's
// kernel_sp, kernel_hartid, kernel_satp, and jumps to kernel_trap.
// usertrapret() and userret in trampoline.S set up
// the trapframe's kernel_*, restore user registers from the
// trapframe, switch to the user page table, and enter user space.
// the trapframe includes callee-saved user registers like s0-s11 because the
// return-to-user path via usertrapret() doesn't return through
// the entire kernel call stack.
struct trapframe {
  /*   0 */ uint64 kernel_satp;   // kernel page table
  /*   8 */ uint64 kernel_sp;     // top of process's kernel stack
  /*  16 */ uint64 kernel_trap;   // usertrap()
  /*  24 */ uint64 epc;           // saved user program counter
  /*  32 */ uint64 kernel_hartid; // saved kernel tp
  /*  40 */ uint64 ra;
  /*  48 */ uint64 sp;
  /*  56 */ uint64 gp;
  /*  64 */ uint64 tp;
  /*  72 */ uint64 t0;
  /*  80 */ uint64 t1;
  /*  88 */ uint64 t2;
  /*  96 */ uint64 s0;
  /* 104 */ uint64 s1;
  /* 112 */ uint64 a0;
  /* 120 */ uint64 a1;
  /* 128 */ uint64 a2;
  /* 136 */ uint64 a3;
  /* 144 */ uint64 a4;
  /* 152 */ uint64 a5;
  /* 160 */ uint64 a6;
  /* 168 */ uint64 a7;
  /* 176 */ uint64 s2;
  /* 184 */ uint64 s3;
  /* 192 */ uint64 s4;
  /* 200 */ uint64 s5;
  /* 208 */ uint64 s6;
  /* 216 */ uint64 s7;
  /* 224 */ uint64 s8;
  /* 232 */ uint64 s9;
  /* 240 */ uint64 s10;
  /* 248 */ uint64 s11;
  /* 256 */ uint64 t3;
  /* 264 */ uint64 t4;
  /* 272 */ uint64 t5;
  /* 280 */ uint64 t6;
};

enum procstate { UNUSED, USED, SLEEPING, RUNNABLE, RUNNING, ZOMBIE };

// Per-process state
struct proc {
  struct spinlock lock;

  // p->lock must be held when using these:
  enum procstate state;        // Process state
  void *chan;                  // If non-zero, sleeping on chan
  int killed;                  // If non-zero, have been killed
  int xstate;                  // Exit status to be returned to parent's wait
  int pid;                     // Process ID

  // wait_lock must be held when using this:
  struct proc *parent;         // Parent process

  // these are private to the process, so p->lock need not be held.
  uint64 kstack;               // Virtual address of kernel stack
  uint64 sz;                   // Size of process memory (bytes)
  pagetable_t pagetable;       // User page table
  struct trapframe *trapframe; // data page for trampoline.S
  struct context context;      // swtch() here to run process
  struct file *ofile[NOFILE];  // Open files
  struct inode *cwd;           // Current directory
  char name[16];               // Process name (debugging)
};

kernel/proc.c:

#include "types.h"
#include "param.h"
#include "memlayout.h"
#include "riscv.h"
#include "spinlock.h"
#include "proc.h"
#include "defs.h"

struct cpu cpus[NCPU];

struct proc proc[NPROC];

struct proc *initproc;

int nextpid = 1;
struct spinlock pid_lock;

extern void forkret(void);
static void freeproc(struct proc *p);

extern char trampoline[]; // trampoline.S

// helps ensure that wakeups of wait()ing
// parents are not lost. helps obey the
// memory model when using p->parent.
// must be acquired before any p->lock.
struct spinlock wait_lock;

// Allocate a page for each process's kernel stack.
// Map it high in memory, followed by an invalid
// guard page.
void
proc_mapstacks(pagetable_t kpgtbl)
{
  struct proc *p;

  for(p = proc; p < &proc[NPROC]; p++) {
    char *pa = kalloc();
    if(pa == 0)
      panic("kalloc");
    uint64 va = KSTACK((int) (p - proc));
    kvmmap(kpgtbl, va, (uint64)pa, PGSIZE, PTE_R | PTE_W);
  }
}

// initialize the proc table.
void
procinit(void)
{
  struct proc *p;

  initlock(&pid_lock, "nextpid");
  initlock(&wait_lock, "wait_lock");
  for(p = proc; p < &proc[NPROC]; p++) {
      initlock(&p->lock, "proc");
      p->state = UNUSED;
      p->kstack = KSTACK((int) (p - proc));
  }
}

// Must be called with interrupts disabled,
// to prevent race with process being moved
// to a different CPU.
int
cpuid()
{
  int id = r_tp();
  return id;
}

// Return this CPU's cpu struct.
// Interrupts must be disabled.
struct cpu*
mycpu(void)
{
  int id = cpuid();
  struct cpu *c = &cpus[id];
  return c;
}

// Return the current struct proc *, or zero if none.
struct proc*
myproc(void)
{
  push_off();
  struct cpu *c = mycpu();
  struct proc *p = c->proc;
  pop_off();
  return p;
}

int
allocpid()
{
  int pid;

  acquire(&pid_lock);
  pid = nextpid;
  nextpid = nextpid + 1;
  release(&pid_lock);

  return pid;
}

// Look in the process table for an UNUSED proc.
// If found, initialize state required to run in the kernel,
// and return with p->lock held.
// If there are no free procs, or a memory allocation fails, return 0.
static struct proc*
allocproc(void)
{
  struct proc *p;

  for(p = proc; p < &proc[NPROC]; p++) {
    acquire(&p->lock);
    if(p->state == UNUSED) {
      goto found;
    } else {
      release(&p->lock);
    }
  }
  return 0;

found:
  p->pid = allocpid();
  p->state = USED;

  // Allocate a trapframe page.
  if((p->trapframe = (struct trapframe *)kalloc()) == 0){
    freeproc(p);
    release(&p->lock);
    return 0;
  }

  // An empty user page table.
  p->pagetable = proc_pagetable(p);
  if(p->pagetable == 0){
    freeproc(p);
    release(&p->lock);
    return 0;
  }

  // Set up new context to start executing at forkret,
  // which returns to user space.
  memset(&p->context, 0, sizeof(p->context));
  p->context.ra = (uint64)forkret;
  p->context.sp = p->kstack + PGSIZE;

  return p;
}

// free a proc structure and the data hanging from it,
// including user pages.
// p->lock must be held.
static void
freeproc(struct proc *p)
{
  if(p->trapframe)
    kfree((void*)p->trapframe);
  p->trapframe = 0;
  if(p->pagetable)
    proc_freepagetable(p->pagetable, p->sz);
  p->pagetable = 0;
  p->sz = 0;
  p->pid = 0;
  p->parent = 0;
  p->name[0] = 0;
  p->chan = 0;
  p->killed = 0;
  p->xstate = 0;
  p->state = UNUSED;
}

// Create a user page table for a given process, with no user memory,
// but with trampoline and trapframe pages.
pagetable_t
proc_pagetable(struct proc *p)
{
  pagetable_t pagetable;

  // An empty page table.
  pagetable = uvmcreate();
  if(pagetable == 0)
    return 0;

  // map the trampoline code (for system call return)
  // at the highest user virtual address.
  // only the supervisor uses it, on the way
  // to/from user space, so not PTE_U.
  if(mappages(pagetable, TRAMPOLINE, PGSIZE,
              (uint64)trampoline, PTE_R | PTE_X) < 0){
    uvmfree(pagetable, 0);
    return 0;
  }

  // map the trapframe page just below the trampoline page, for
  // trampoline.S.
  if(mappages(pagetable, TRAPFRAME, PGSIZE,
              (uint64)(p->trapframe), PTE_R | PTE_W) < 0){
    uvmunmap(pagetable, TRAMPOLINE, 1, 0);
    uvmfree(pagetable, 0);
    return 0;
  }

  return pagetable;
}

// Free a process's page table, and free the
// physical memory it refers to.
void
proc_freepagetable(pagetable_t pagetable, uint64 sz)
{
  uvmunmap(pagetable, TRAMPOLINE, 1, 0);
  uvmunmap(pagetable, TRAPFRAME, 1, 0);
  uvmfree(pagetable, sz);
}

// a user program that calls exec("/init")
// assembled from ../user/initcode.S
// od -t xC ../user/initcode
uchar initcode[] = {
  0x17, 0x05, 0x00, 0x00, 0x13, 0x05, 0x45, 0x02,
  0x97, 0x05, 0x00, 0x00, 0x93, 0x85, 0x35, 0x02,
  0x93, 0x08, 0x70, 0x00, 0x73, 0x00, 0x00, 0x00,
  0x93, 0x08, 0x20, 0x00, 0x73, 0x00, 0x00, 0x00,
  0xef, 0xf0, 0x9f, 0xff, 0x2f, 0x69, 0x6e, 0x69,
  0x74, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x00,
  0x00, 0x00, 0x00, 0x00
};

// Set up first user process.
void
userinit(void)
{
  struct proc *p;

  p = allocproc();
  initproc = p;

  // allocate one user page and copy initcode's instructions
  // and data into it.
  uvmfirst(p->pagetable, initcode, sizeof(initcode));
  p->sz = PGSIZE;

  // prepare for the very first "return" from kernel to user.
  p->trapframe->epc = 0;      // user program counter
  p->trapframe->sp = PGSIZE;  // user stack pointer

  safestrcpy(p->name, "initcode", sizeof(p->name));
  p->cwd = namei("/");

  p->state = RUNNABLE;

  release(&p->lock);
}

// Grow or shrink user memory by n bytes.
// Return 0 on success, -1 on failure.
int
growproc(int n)
{
  uint64 sz;
  struct proc *p = myproc();

  sz = p->sz;
  if(n > 0){
    if((sz = uvmalloc(p->pagetable, sz, sz + n, PTE_W)) == 0) {
      return -1;
    }
  } else if(n < 0){
    sz = uvmdealloc(p->pagetable, sz, sz + n);
  }
  p->sz = sz;
  return 0;
}

// Create a new process, copying the parent.
// Sets up child kernel stack to return as if from fork() system call.
int
fork(void)
{
  int i, pid;
  struct proc *np;
  struct proc *p = myproc();

  // Allocate process.
  if((np = allocproc()) == 0){
    return -1;
  }

  // Copy user memory from parent to child.
  if(uvmcopy(p->pagetable, np->pagetable, p->sz) < 0){
    freeproc(np);
    release(&np->lock);
    return -1;
  }
  np->sz = p->sz;

  // copy saved user registers.
  *(np->trapframe) = *(p->trapframe);

  // Cause fork to return 0 in the child.
  np->trapframe->a0 = 0;

  // increment reference counts on open file descriptors.
  for(i = 0; i < NOFILE; i++)
    if(p->ofile[i])
      np->ofile[i] = filedup(p->ofile[i]);
  np->cwd = idup(p->cwd);

  safestrcpy(np->name, p->name, sizeof(p->name));

  pid = np->pid;

  release(&np->lock);

  acquire(&wait_lock);
  np->parent = p;
  release(&wait_lock);

  acquire(&np->lock);
  np->state = RUNNABLE;
  release(&np->lock);

  return pid;
}

// Pass p's abandoned children to init.
// Caller must hold wait_lock.
void
reparent(struct proc *p)
{
  struct proc *pp;

  for(pp = proc; pp < &proc[NPROC]; pp++){
    if(pp->parent == p){
      pp->parent = initproc;
      wakeup(initproc);
    }
  }
}

// Exit the current process.  Does not return.
// An exited process remains in the zombie state
// until its parent calls wait().
void
exit(int status)
{
  struct proc *p = myproc();

  if(p == initproc)
    panic("init exiting");

  // Close all open files.
  for(int fd = 0; fd < NOFILE; fd++){
    if(p->ofile[fd]){
      struct file *f = p->ofile[fd];
      fileclose(f);
      p->ofile[fd] = 0;
    }
  }

  begin_op();
  iput(p->cwd);
  end_op();
  p->cwd = 0;

  acquire(&wait_lock);

  // Give any children to init.
  reparent(p);

  // Parent might be sleeping in wait().
  wakeup(p->parent);

  acquire(&p->lock);

  p->xstate = status;
  p->state = ZOMBIE;

  release(&wait_lock);

  // Jump into the scheduler, never to return.
  sched();
  panic("zombie exit");
}

// Wait for a child process to exit and return its pid.
// Return -1 if this process has no children.
int
wait(uint64 addr)
{
  struct proc *pp;
  int havekids, pid;
  struct proc *p = myproc();

  acquire(&wait_lock);

  for(;;){
    // Scan through table looking for exited children.
    havekids = 0;
    for(pp = proc; pp < &proc[NPROC]; pp++){
      if(pp->parent == p){
        // make sure the child isn't still in exit() or swtch().
        acquire(&pp->lock);

        havekids = 1;
        if(pp->state == ZOMBIE){
          // Found one.
          pid = pp->pid;
          if(addr != 0 && copyout(p->pagetable, addr, (char *)&pp->xstate,
                                  sizeof(pp->xstate)) < 0) {
            release(&pp->lock);
            release(&wait_lock);
            return -1;
          }
          freeproc(pp);
          release(&pp->lock);
          release(&wait_lock);
          return pid;
        }
        release(&pp->lock);
      }
    }

    // No point waiting if we don't have any children.
    if(!havekids || killed(p)){
      release(&wait_lock);
      return -1;
    }

    // Wait for a child to exit.
    sleep(p, &wait_lock);  //DOC: wait-sleep
  }
}

// Per-CPU process scheduler.
// Each CPU calls scheduler() after setting itself up.
// Scheduler never returns.  It loops, doing:
//  - choose a process to run.
//  - swtch to start running that process.
//  - eventually that process transfers control
//    via swtch back to the scheduler.
void
scheduler(void)
{
  struct proc *p;
  struct cpu *c = mycpu();

  c->proc = 0;
  for(;;){
    // The most recent process to run may have had interrupts
    // turned off; enable them to avoid a deadlock if all
    // processes are waiting.
    intr_on();

    for(p = proc; p < &proc[NPROC]; p++) {
      acquire(&p->lock);
      if(p->state == RUNNABLE) {
        // Switch to chosen process.  It is the process's job
        // to release its lock and then reacquire it
        // before jumping back to us.
        p->state = RUNNING;
        c->proc = p;
        swtch(&c->context, &p->context);

        // Process is done running for now.
        // It should have changed its p->state before coming back.
        c->proc = 0;
      }
      release(&p->lock);
    }
  }
}

// Switch to scheduler.  Must hold only p->lock
// and have changed proc->state. Saves and restores
// intena because intena is a property of this
// kernel thread, not this CPU. It should
// be proc->intena and proc->noff, but that would
// break in the few places where a lock is held but
// there's no process.
void
sched(void)
{
  int intena;
  struct proc *p = myproc();

  if(!holding(&p->lock))
    panic("sched p->lock");
  if(mycpu()->noff != 1)
    panic("sched locks");
  if(p->state == RUNNING)
    panic("sched running");
  if(intr_get())
    panic("sched interruptible");

  intena = mycpu()->intena;
  swtch(&p->context, &mycpu()->context);
  mycpu()->intena = intena;
}

// Give up the CPU for one scheduling round.
void
yield(void)
{
  struct proc *p = myproc();
  acquire(&p->lock);
  p->state = RUNNABLE;
  sched();
  release(&p->lock);
}

// A fork child's very first scheduling by scheduler()
// will swtch to forkret.
void
forkret(void)
{
  static int first = 1;

  // Still holding p->lock from scheduler.
  release(&myproc()->lock);

  if (first) {
    // File system initialization must be run in the context of a
    // regular process (e.g., because it calls sleep), and thus cannot
    // be run from main().
    fsinit(ROOTDEV);

    first = 0;
    // ensure other cores see first=0.
    __sync_synchronize();
  }

  usertrapret();
}

// Atomically release lock and sleep on chan.
// Reacquires lock when awakened.
void
sleep(void *chan, struct spinlock *lk)
{
  struct proc *p = myproc();

  // Must acquire p->lock in order to
  // change p->state and then call sched.
  // Once we hold p->lock, we can be
  // guaranteed that we won't miss any wakeup
  // (wakeup locks p->lock),
  // so it's okay to release lk.

  acquire(&p->lock);  //DOC: sleeplock1
  release(lk);

  // Go to sleep.
  p->chan = chan;
  p->state = SLEEPING;

  sched();

  // Tidy up.
  p->chan = 0;

  // Reacquire original lock.
  release(&p->lock);
  acquire(lk);
}

// Wake up all processes sleeping on chan.
// Must be called without any p->lock.
void
wakeup(void *chan)
{
  struct proc *p;

  for(p = proc; p < &proc[NPROC]; p++) {
    if(p != myproc()){
      acquire(&p->lock);
      if(p->state == SLEEPING && p->chan == chan) {
        p->state = RUNNABLE;
      }
      release(&p->lock);
    }
  }
}

// Kill the process with the given pid.
// The victim won't exit until it tries to return
// to user space (see usertrap() in trap.c).
int
kill(int pid)
{
  struct proc *p;

  for(p = proc; p < &proc[NPROC]; p++){
    acquire(&p->lock);
    if(p->pid == pid){
      p->killed = 1;
      if(p->state == SLEEPING){
        // Wake process from sleep().
        p->state = RUNNABLE;
      }
      release(&p->lock);
      return 0;
    }
    release(&p->lock);
  }
  return -1;
}

void
setkilled(struct proc *p)
{
  acquire(&p->lock);
  p->killed = 1;
  release(&p->lock);
}

int
killed(struct proc *p)
{
  int k;

  acquire(&p->lock);
  k = p->killed;
  release(&p->lock);
  return k;
}

// Copy to either a user address, or kernel address,
// depending on usr_dst.
// Returns 0 on success, -1 on error.
int
either_copyout(int user_dst, uint64 dst, void *src, uint64 len)
{
  struct proc *p = myproc();
  if(user_dst){
    return copyout(p->pagetable, dst, src, len);
  } else {
    memmove((char *)dst, src, len);
    return 0;
  }
}

// Copy from either a user address, or kernel address,
// depending on usr_src.
// Returns 0 on success, -1 on error.
int
either_copyin(void *dst, int user_src, uint64 src, uint64 len)
{
  struct proc *p = myproc();
  if(user_src){
    return copyin(p->pagetable, dst, src, len);
  } else {
    memmove(dst, (char*)src, len);
    return 0;
  }
}

// Print a process listing to console.  For debugging.
// Runs when user types ^P on console.
// No lock to avoid wedging a stuck machine further.
void
procdump(void)
{
  static char *states[] = {
  [UNUSED]    "unused",
  [USED]      "used",
  [SLEEPING]  "sleep ",
  [RUNNABLE]  "runble",
  [RUNNING]   "run   ",
  [ZOMBIE]    "zombie"
  };
  struct proc *p;
  char *state;

  printf("\n");
  for(p = proc; p < &proc[NPROC]; p++){
    if(p->state == UNUSED)
      continue;
    if(p->state >= 0 && p->state < NELEM(states) && states[p->state])
      state = states[p->state];
    else
      state = "???";
    printf("%d %s %s", p->pid, state, p->name);
    printf("\n");
  }
}

To start the lab, switch to the syscall branch:

$ git fetch
$ git checkout syscall
$ make clean 

If you run make grade you will see that the grading script cannot exec trace and sysinfotest. Your job is to add the necessary system calls and stubs to make them work.

Screenshot 2023-10-19 at 20.01.10

Screenshot 2023-10-19 at 20.01.21

Screenshot 2023-10-19 at 20.02.48

Screenshot 2023-10-19 at 20.09.52

Screenshot 2023-10-19 at 20.10.37

Using gdb (easy)

In many cases, print statements will be sufficient to debug your kernel, but sometimes being able to single step through some assembly code or inspecting the variables on the stack is helpful.

To learn more about how to run GDB and the common issues that can arise when using GDB, check out this page.

For M2 gdb is not allowed, so I use parallels with Ubuntu 22.04 ARM64 which can share the documents.

gdb: The x86_64 architecture is required for this software. Error: gdb: An unsatisfied requirement failed this build.

To help you become familiar with gdb, run make qemu-gdb and then fire up gdb in another window (see the gdb bullet on the guidance page). Once you have two windows open, type in the gdb window:

$ make qemu-gdb

Screenshot 2023-10-20 at 12.02.11

make qemu-gdb: 命令通常是用于启动QEMU虚拟机并与GDB (GNU Debugger) 连接以进行调试的命令. 以下是它的主要作用:

  • 启动QEMU虚拟机: make qemu-gdb 命令启动了一个QEMU虚拟机实例. QEMU是一个开源的虚拟化工具, 它可以模拟多种处理器体系结构, 并在模拟环境中运行操作系统和应用程序.
  • 启动GDB调试: 通过运行: make qemu-gdb, 将启用GDB调试模式, 使得QEMU虚拟机处于可调试的状态.
  • 建立GDB连接: 一旦QEMU虚拟机启动, 它将等待GDB连接. 可以使用GDB来连接到虚拟机, 以便执行调试操作, 如断点设置, 变量查看, 但不执行等.
  • 调试目标程序, 通常, make qemu-gdb 命令会与要调试的目标程序先关联.

这时候, 在另一个相同目录下的终端中, 输入 gdb-multiarch -x .gdbinit

gdb-multiarch -x .gdbinit: 这个命令的作用是使用多体系结构(multi-archi) 版本的GDB, 同时执行一个包含GDB命令的脚本文件.gdbinit.:

  • gdb-multiarch:这是GDB的一个命令,表示启动多体系结构版本的GDB。多体系结构版本的GDB支持在不同的CPU架构上进行调试。
  • -x .gdbinit:这是一个选项,指示GDB在启动时执行指定的脚本文件 .gdbinit。脚本文件通常包含GDB命令,这些命令在GDB启动时自动执行,用于配置调试环境、设置断点、查看变量等
$ ls -a
$ cat .gdbinit

.gdbinit: Screenshot 2023-10-20 at 12.59.30

这一系列命令是用于配置GDB(GNU Debugger)以进行RISC-V 64位架构(rv64)的目标程序调试的设置。以下是这些命令的含义:

set confirm off:这个命令将GDB的确认提示设置为关闭。通常,当你执行一些可能影响程序执行的命令时,GDB会要求你确认。通过将确认关闭,可以在不需要确认的情况下执行这些命令。

set architecture riscv:rv64:这个命令设置目标程序的体系结构为RISC-V 64位架构(rv64)。这告诉GDB,将要调试的程序是针对RISC-V 64位架构编译的。

target remote 127.0.0.1:25000:这个命令告诉GDB要连接到本地IP地址 127.0.0.1 和端口号 25000 上的远程调试目标。这通常用于与调试代理或硬件调试器建立连接。这个端口可能不是总是 25000,设置要视情况而定(一般都不需要设定,自动生成)

symbol-file kernel/kernel:这个命令指定了符号文件的路径,该符号文件与要调试的程序相关联。符号文件包含了程序的符号信息,如函数名称、变量名称等,可以帮助GDB更好地理解和调试程序。

set disassemble-next-line auto:这个命令设置GDB在逐行反汇编时自动显示反汇编代码。这使得在单步执行时,GDB会显示每一行的反汇编指令,以便更容易理解程序的执行过程。

set riscv use-compressed-breakpoints yes:这个命令启用了对RISC-V压缩指令集(Compressed Instructions)的断点支持。压缩指令集是RISC-V的一种特性,允许使用更紧凑的指令编码。启用这个选项后,GDB会使用压缩指令集来设置断点,以便与压缩指令集编译的程序一起工作。 https://blog.csdn.net/m0_73651896/article/details/133558742

Screenshot 2023-10-20 at 12.00.39

(gdb) b syscall
Breakpoint 1 at 0x80002142: file kernel/syscall.c, line 243.
(gdb) c
Continuing.
[Switching to Thread 1.2]

Thread 2 hit Breakpoint 1, syscall () at kernel/syscall.c:243
243     {
(gdb) layout src
(gdb) backtrace

Screenshot 2023-10-20 at 13.03.19

The layout command splits the window in two, showing where gdb is in the source code. The backtrace prints out the stack backtrace. See Using the GNU Debugger for helpful GDB commands.

Screenshot 2023-10-20 at 13.03.54

Answer the following questions in answers-syscall.txt.

Looking at the backtrace output, which function called syscall?

Solution: Base on the provided backtrace output, the function that called syscall is usertrap which is found in kernel/trap.c at line 67. The function that called syscall is always the one directly below it in the backtrace, which in this case is the entry at #1.

Type n a few times to step past struct proc *p = myproc(); Once past this statement, type p /x *p, which prints the current process's proc struct (see kernel/proc.h>) in hex.

Screenshot 2023-10-20 at 13.26.01

“value has been optimized out” 是一个在使用 GDB(GNU Debugger)时可能遇到的消息。当你试图在调试会话中检查某个变量的值时,如果收到这个消息,这意味着由于编译器的优化,该变量的值在当前的上下文中不可用.

What is the value of p->trapframe->a7 and what does that value represent? (Hint: look user/initcode.S, the first user program xv6 starts.)

Solution: 7

(gdb) p p->trapframe-a7

The p command in GDB stands for "print."

Screenshot 2023-10-20 at 13.32.27

$2: This represents a GDB internal variable. Each time you evaluate an expression in GDB, it assigns the result to a sequentially numbered internal variable prefixed with a $. In this case, it's the second variable (or the second evaluated expression) in your GDB session. You can refer back to this variable later in your session if you want to use or inspect its value again.

7: This is the value of the evaluated expression. In the context of your previous input, it indicates that the value stored in the memory location p->trapframe->a7 is 7.

So, $2 = 7 means that GDB evaluated the expression p->trapframe->a7, found its value to be 7, and stored this result in its internal variable $2

user/initcode.S:

# Initial process that execs /init.
# This code runs in user space.

#include "syscall.h"

# exec(init, argv) 
.globl start       // start入口点
start:                  // 这是代码的执行入口点。它设置了执行exec系统调用所需的寄存器
        la a0, init     // 将字符串"/init\0"的地址加载到寄存器a0
        la a1, argv     // 将argv数组的地址加载到寄存器a1
        li a7, SYS_exec // 将exec的系统调用号加载到寄存器a7
        ecall           // 触发系统调用

# for(;;) exit();     // exit循环
exit:
        li a7, SYS_exit
        ecall
        jal exit

# char init[] = "/init\0";      // init字符串
init:
  .string "/init\0"         // 这定义了一个空终止字符串"/init\0",代表OS应该运行的第一个程序的名称

# char *argv[] = { init, 0 };  // argv参数向量
.p2align 2
argv:
  .long init
  .long 0

// 这定义了要传递给exec系统调用的参数向量(argv)。它是一个指针数组。第一个元素指向字符串"/init\0",第二个元素是一个空指针,表示argv数组的结束

The processor is running in kernel mode, and we can print privileged registers such as sstatus (see RISC-V privileged instructions for a description):

(gdb) p /x $sstatus

What was the previous mode that the CPU was in?

Screenshot 2023-10-20 at 14.35.09

sstatus : 寄存器在 RISC-V 架构中用于跟踪系统的当前状态。特别地,此寄存器中的模式位可以告诉我们 CPU 的当前和之前的执行模式. 对于值 0x22,我们主要关注 SPP 位以确定进入监控器模式之前的 CPU 模式。

对于 RISC-V,SPP (监视器先前特权) 位是:

  • 如果先前的模式是用户模式 (U-mode),则为 0
  • 如果先前的模式是监视器模式 (S-mode),则为 1

要检查 SPP 位,我们查看 0x22 的二进制表示: 0x22 = 0010 0010 (二进制),SPP 位(即第8位)为 0 在进入当前的监控器模式之前,CPU 所处的模式是 用户模式 (U-mode)

Screenshot 2023-10-20 at 14.42.15

In the subsequent part of this lab (or in following labs), it may happen that you make a programming error that causes the xv6 kernel to panic. For example, replace the statement num = p->trapframe->a7; with num = * (int *) 0; at the beginning of syscall, run make qemu, and you will see something similar to:

xv6 kernel is booting

hart 2 starting
hart 1 starting
scause 0x000000000000000d
sepc=0x000000008000215a stval=0x0000000000000000
panic: kerneltrap

Screenshot 2023-10-20 at 14.49.32

Screenshot 2023-10-20 at 14.49.45

Quit out of qemu.

To track down the source of a kernel page-fault panic, search for the sepc value printed for the panic you just saw in the file kernel/kernel.asm, which contains the assembly for the compiled kernel.

Write down the assembly instruction the kernel is panicing at. Which register corresponds to the variable num?

Screenshot 2023-10-20 at 14.57.31

To inspect the state of the processor and the kernel at the faulting instruction, fire up gdb, and set a breakpoint at the faulting epc, like this:

(gdb) b *0x000000008000215a
Breakpoint 1 at 0x8000215a: file kernel/syscall.c, line 247.
(gdb) layout asm
(gdb) c
Continuing.
[Switching to Thread 1.3]

Thread 3 hit Breakpoint 1, syscall () at kernel/syscall.c:247

Confirm that the faulting assembly instruction is the same as the one you found above.

Why does the kernel crash? Hint: look at figure 3-3 in the text; is address 0 mapped in the kernel address space? Is that confirmed by the value in scause above? (See description of scause in RISC-V privileged instructions)

Screenshot 2023-10-20 at 16.27.03

From 8 we can see environment call from U-mode.

Note that scause was printed by the kernel panic above, but often you need to look at additional info to track down the problem that caused the panic. For example, to find out which user process was running when the kernel paniced, you can print out the process's name:

(gdb) p p->name

What is the name of the binary that was running when the kernel paniced? What is its process id (pid)?

Screenshot 2023-10-20 at 16.29.02

可以看出这个用户进程的名字是initcode,这是 xv6 中第一个运行的进程,

p *p: proc structure

从这个proc结构的信息中可以得出:

  • 进程状态:state字段的值为RUNNING,表示这个进程的状态为运行中,即它当前正在执行.
  • 进程标识:pid字段的值为1,表示这是一个进程的唯一标识符。通常情况下,进程的第一个进程(通常是init进程)的pid为1
  • 父进程:parent字段的值为0x0,表示这个进程没有父进程。这在操作系统中的进程树中通常是初始进程(init)的情况
  • 进程内存信息:sz字段的值为4096,表示这个进程的内存大小为4096字节。
  • 页表信息:pagetable字段的值为0x87f73000,表示这个进程的页表的地址。
  • 陷阱帧信息:trapframe字段的值为0x87f74000,表示这个进程的陷阱帧(trap frame)的地址。陷阱帧通常包含了在进程执行时发生陷阱或异常时的寄存器状态等信息。
  • 上下文信息:context字段包含了一系列寄存器的值,包括返回地址(ra)、栈指针(sp)、一般寄存器(s0到s11)等。这些寄存器状态可能用于在进程切换或陷阱处理中。
  • 当前工作目录:cwd字段的值为0x80016e80,表示当前工作目录的地址。这是进程在执行时的当前工作目录。
  • 进程名:name字段的值为 “initcode”,表示这个进程的名字是 “initcode”

This concludes a brief introduction to tracking down bugs with gdb; it is worth your time to revisit Using the GNU Debugger when tracking down kernel bugs. The guidance page also has some other other useful debugging tips.

Screenshot 2023-10-20 at 16.24.22

CSAPP中的lab可以更加熟悉gdb这个工具.

System call tracing (moderate)

In this assignment you will add a system call tracing feature that may help you when debugging later labs. You'll create a new trace system call that will control tracing. It should take one argument, an integer "mask", whose bits specify which system calls to trace. For example, to trace the fork system call, a program calls trace(1 << SYS_fork), where SYS_fork is a syscall number from kernel/syscall.h. You have to modify the xv6 kernel to print out a line when each system call is about to return, if the system call's number is set in the mask. The line should contain the process id, the name of the system call and the return value; you don't need to print the system call arguments. The trace system call should enable tracing for the process that calls it and any children that it subsequently forks, but should not affect other processes.

kernel/syscall.h:

// System call numbers
#define SYS_fork    1
#define SYS_exit    2
#define SYS_wait    3
#define SYS_pipe    4
#define SYS_read    5
#define SYS_kill    6
#define SYS_exec    7
#define SYS_fstat   8
#define SYS_chdir   9
#define SYS_dup    10
#define SYS_getpid 11
#define SYS_sbrk   12
#define SYS_sleep  13
#define SYS_uptime 14
#define SYS_open   15
#define SYS_write  16
#define SYS_mknod  17
#define SYS_unlink 18
#define SYS_link   19
#define SYS_mkdir  20
#define SYS_close  21

We provide a trace user-level program that runs another program with tracing enabled (see user/trace.c). When you're done, you should see output like this:

$ trace 32 grep hello README    // #define SYS_read    5    From kernel/syscall.h
3: syscall read -> 1023         // 1 往左移动 5  -> 10000 = 32
3: syscall read -> 966
3: syscall read -> 70
3: syscall read -> 0
$                                                                               // 2147483647 ->    
$ trace 2147483647 grep hello README     //二进制0111 1111 1111 1111 1111 1111 1111 1111       
4: syscall trace -> 0                     // 它的31位每个bit都是1
4: syscall exec -> 3                             // 从这个命令里面最终每一个系统调用
4: syscall open -> 3
4: syscall read -> 1023
4: syscall read -> 966
4: syscall read -> 70
4: syscall read -> 0
4: syscall close -> 0
$
$ grep hello README              // 不追踪 就不用返回什么了
$
$ trace 2 usertests forkforkfork
usertests starting
test forkforkfork: 407: syscall fork -> 408
408: syscall fork -> 409
409: syscall fork -> 410
410: syscall fork -> 411
409: syscall fork -> 412
410: syscall fork -> 413
409: syscall fork -> 414
411: syscall fork -> 415
...
$

In the first example above, trace invokes grep tracing just the read system call. The 32 is 1<<SYS_read. In the second example, trace runs grep while tracing all system calls; the 2147483647 has all 31 low bits set. In the third example, the program isn't traced, so no trace output is printed. In the fourth example, the fork system calls of all the descendants of the forkforkfork test in usertests are being traced. Your solution is correct if your program behaves as shown above (though the process IDs may be different).

Some hints:

  • Add $U/_trace to UPROGS in Makefile

Screenshot 2023-10-20 at 17.16.06

  • Run make qemu and you will see that the compiler cannot compile user/trace.c, because the user-space stubs for the system call don't exist yet: add a prototype for the system call to user/user.h, a stub to user/usys.pl, and a syscall number to kernel/syscall.h. The Makefile invokes the perl script user/usys.pl, which produces user/usys.S, the actual system call stubs, which use the RISC-V ecall instruction to transition to the kernel. Once you fix the compilation issues, run trace 32 grep hello README; it will fail because you haven't implemented the system call in the kernel yet.
$ make qemu

Screenshot 2023-10-20 at 17.20.18

Screenshot 2023-10-20 at 17.39.44

Screenshot 2023-10-20 at 17.34.19

Screenshot 2023-10-20 at 17.35.24

Screenshot 2023-10-20 at 17.41.00

  • Add a sys_trace() function in kernel/sysproc.c that implements the new system call by remembering its argument in a new variable in the proc structure (see kernel/proc.h). The functions to retrieve system call arguments from user space are in kernel/syscall.c, and you can see examples of their use in kernel/sysproc.c.
// kernel/sysproc.c
...
// Add a `sys_trace()` function in `kernel/sysproc.c` that
// implements the new system call by remembering its argument in a new variable in the `proc` structure
uint64
sys_trace(void){
//    printf("sys_trace: Hi!\n");
//    return 0;
    int mask;
    argint(0, &mask);
    myproc()->syscall_trace = mask;
    return 0;
}
// kernel/proc.h
...
struct proc {
  ...
  char name[16];               // Process name (debugging)

  uint64 syscall_trace;      // Lab2 system call trace
}
// kernel/syscall.h
...
#define SYS_close  21
#define SYS_trace  22
  • Modify fork() (see kernel/proc.c) to copy the trace mask from the parent to the child process.
// kernel/proc.c
...
int 
fork(void){
  safestrcpy(np->name, p->name, sizeof(p->name));

  np->syscall_trace = p->syscall_trace;    // add
  // Modify `fork()` (see `kernel/proc.c`) to copy the trace mask
  // from the parent to the child process
}
...
  • Modify the syscall() function in kernel/syscall.c to print the trace output. You will need to add an array of syscall names to index into.
// kernel/syscall.c
...
extern uint64 sys_close(void);
extern uint64 sys_trace(void); // Add
...

// An array mapping syscall numbers from syscall.h
// to the function that handles the system call.
static uint64 (*syscalls[])(void) = {
...
[SYS_close]   sys_close,
[SYS_trace]   sys_trace,              // Add
};

// 此处把系统调用的名字给记录下
static char *syscall_names[23] = {"","fork", "exit", "wait", "pipe", "read", "kill", "exec", "fstat",
        "chdir", "dup","getpid", "sbrk", "sleep", "uptime", "open", "write",
        "mknod", "unlink", "link", "mkdir", "close", "trace"
};

void
syscall(void)
{
  int num;
  struct proc *p = myproc();
  num = p->trapframe->a7;
//  num = * (int *) 0;
  if(num > 0 && num < NELEM(syscalls) && syscalls[num]) {
    // Use num to lookup the system call function for num, call it,
    // and store its return value in p->trapframe->a0
    p->trapframe->a0 = syscalls[num]();
    if (p->syscall_trace & (1 << num)) {
        // print trace info
        printf("%d: syscall %s -> %d\n",p->pid, syscall_names[num], p->trapframe->a0);
    }
  } else {
    printf("%d %s: unknown sys call %d\n",
            p->pid, p->name, num);
    p->trapframe->a0 = -1;
  }
}
  • If a test case passes when you run it inside qemu directly but you get a timeout when running the tests using make grade, try testing your implementation on Athena. Some of tests in this lab can be a bit too computationally intensive for your local machine (especially if you use WSL).

Screenshot 2023-10-21 at 12.13.38

Screenshot 2023-10-21 at 12.15.52

Q1: usys.pl是什么文件? usys.pl 是一个Perl脚本文件,用于生成名为 usys.S 的汇编语言源代码文件。这个脚本的目的是生成系统调用的存根(stubs),以便操作系统内核能够处理用户空间程序的系统调用请求。

具体来说,这个Perl脚本执行以下操作: 定义一个名为 entry 的Perl子例程,该子例程接受系统调用的名称作为参数,并生成与该系统调用相关的汇编代码。使用 print 命令输出汇编代码,包括 .global 声明和汇编指令,以将系统调用名称与系统调用号码进行映射,并触发 ecall 指令以执行系统调用。

为一系列系统调用(如 fork、exit、read 等等)调用 entry 子例程,以生成相应的汇编代码。 输出注释,指出该文件是由 usys.pl 自动生成的,不应手动编辑。 生成的 usys.S 文件通常将作为操作系统内核的一部分,用于处理用户空间程序发起的系统调用请求。这种自动生成的方式可以确保系统调用存根的正确性,并简化了内核的开发和维护过程

usys.S:

.global trace
trace:
 li a7, SYS_trace
 ecall
 ret

li a7, SYS_trace:这行代码将立即数 SYS_trace 加载到寄存器 a7 中。a7 寄存器通常用于存储系统调用号码,而 SYS_trace 是一个代表系统调用的符号常量,通常在C代码中定义,用于标识系统调用的类型。

ecall:这是一个特权指令,用于触发系统调用。当执行 ecall 指令时,处理器将进入特权模式(通常是监管模式),并跳转到操作系统内核中的相应系统调用处理程序。系统调用号码 a7 指示了要执行的系统调用类型。

ret:这是一个返回指令,用于返回到调用者的代码。在系统调用完成后,处理器会执行 ret 指令,将控制权返回给调用 SYS_trace 系统调用的代码。

Q2: 从用户态到内核态运行一个系统调用的过程大致是怎么样的?

用户态调用:在用户态运行的程序需要执行一个系统调用时,它会调用一个包含系统调用号码和参数的库函数,例如C库中的syscall函数。

void
syscall(void)
{
  int num;
  struct proc *p = myproc();
  num = p->trapframe->a7;
//  num = * (int *) 0;
  if(num > 0 && num < NELEM(syscalls) && syscalls[num]) {
    // Use num to lookup the system call function for num, call it,
    // and store its return value in p->trapframe->a0
    p->trapframe->a0 = syscalls[num]();
    if (p->syscall_trace & (1 << num)) {
        // print trace info
        printf("%d: syscall %s -> %d\n",p->pid, syscall_names[num], p->trapframe->a0);
      // 将 1 左移 num 位,然后与p->syscall_trace 进行与运算,判断是否满足某个条件
    }
  } else {
    printf("%d %s: unknown sys call %d\n",
            p->pid, p->name, num);
    p->trapframe->a0 = -1;
  }
}

系统调用号码:库函数会将系统调用号码和参数加载到寄存器中,通常使用a7寄存器(或者其他体系结构中的类似寄存器),其中a7用于存储系统调用号码。

触发系统调用:用户态程序执行特殊的指令,例如ecall(在RISC-V架构中)或int 0x80(在x86架构中)。这些指令触发了从用户态到内核态的切换,将控制权传递给操作系统内核。

切换到内核态:当发生系统调用指令时,处理器将当前的用户态上下文保存到特定的寄存器或内存区域中,包括程序计数器(PC)和用户态栈指针(SP)。然后,处理器切换到内核态,切换到操作系统内核的地址空间。

系统调用处理:操作系统内核根据系统调用号码确定要执行的系统调用,并使用传递的参数执行相应的操作。这可能涉及到访问硬件资源、文件系统、网络等等。内核执行完成后,将结果存储在适当的寄存器或内存位置。

返回用户态:内核执行完系统调用后,将保存的用户态上下文恢复回处理器,包括将用户态栈指针(SP)和程序计数器(PC)还原到之前保存的值。然后,处理器切换回用户态,并继续执行用户程序,返回到调用系统调用的下一条指令

xv6 的系统调用过程大致如下所示:

Screenshot 2023-10-21 at 12.46.29

Sysinfo (moderate)

In this assignment you will add a system call, sysinfo, that collects information about the running system. The system call takes one argument: a pointer to a struct sysinfo (see kernel/sysinfo.h). The kernel should fill out the fields of this struct: the freemem field should be set to the number of bytes of free memory, and the nproc field should be set to the number of processes whose state is not UNUSED. We provide a test program sysinfotest; you pass this assignment if it prints "sysinfotest: OK".

Some hints:

  • Add $U/_sysinfotest to UPROGS in Makefile
// Makefile
...
$U/_zombie\
$U/_trace\
$U/_sysinfotest\
  • Run make qemu; user/sysinfotest.c will fail to compile.

Screenshot 2023-10-21 at 12.54.09

Add the system call sysinfo, following the same steps as in the previous assignment. To declare the prototype for sysinfo() in user/user.h you need predeclare the existence of struct sysinfo:

From previous assignment: add a prototype for the system call to user/user.h, a stub to user/usys.pl, and a syscall number to kernel/syscall.h.

struct sysinfo;
int sysinfo(struct sysinfo *);  
// user/user.h
// system calls
...
int trace(int);
struct sysinfo;
int sysinfo(struct sysinfo *);
...
// user/usys.pl
...
entry("trace");
entry("sysinfo");
// kernel/syscall.h
// System call numbers
...
#define SYS_trace  22
#define SYS_sysinfo 23

Add a sys_sysinfo() function in kernel/sysproc.c that implements the new system call by remembering its argument in a new variable in the proc structure (see kernel/proc.h). The functions to retrieve system call arguments from user space are in kernel/syscall.c, and you can see examples of their use in kernel/sysproc.c.

// kernel/sysproc.c
... 
#include "proc.h"
#include "sysinfo.h"     // add
... 
uint64
sys_info(void){
    uint64 addr;
    argaddr(0, &addr);

    struct sysinfo sinfo;
    sinfo.freemem = freemen_size();
    sinfo.nproc = count_proc();
    if (copyout(myproc()->pagetable, addr, (char *)&sinfo, sizeof(sinfo)) < 0)
        return -1;
    return 0;
}  
// kernel/syscall.c
...
extern uint64 sys_close(void);
extern uint64 sys_trace(void); // Add
extern uint64 sys_info(void);

// An array mapping syscall numbers from syscall.h
// to the function that handles the system call.
static uint64 (*syscalls[])(void) = {
...
[SYS_trace]   sys_trace,              // Add
[SYS_sysinfo] sys_info,
};


// 此处把系统调用的名字给记录下
static char *syscall_names[] = {"","fork", "exit", "wait", "pipe", "read", "kill", "exec", "fstat",
        "chdir", "dup","getpid", "sbrk", "sleep", "uptime", "open", "write",
        "mknod", "unlink", "link", "mkdir", "close", "trace", "sysinfo",
};

Once you fix the compilation issues, run sysinfotest; it will fail because you haven't implemented the system call in the kernel yet.

  • sysinfo needs to copy a struct sysinfo back to user space; see sys_fstat() (kernel/sysfile.c) and filestat() (kernel/file.c) for examples of how to do that using copyout().
// `sys_fstat()` (`kernel/sysfile.c`)
uint64
sys_fstat(void)
{
  struct file *f;
  uint64 st; // user pointer to struct stat

  argaddr(1, &st);
  if(argfd(0, 0, &f) < 0)
    return -1;
  return filestat(f, st);
}
// `filestat()` (`kernel/file.c`
// Get metadata about file f.
// addr is a user virtual address, pointing to a struct stat.
int
filestat(struct file *f, uint64 addr)
{
  struct proc *p = myproc();
  struct stat st;
  
  if(f->type == FD_INODE || f->type == FD_DEVICE){
    ilock(f->ip);
    stati(f->ip, &st);
    iunlock(f->ip);
    if(copyout(p->pagetable, addr, (char *)&st, sizeof(st)) < 0)
      return -1;
    return 0;
  }
  return -1;
}
  • To collect the amount of free memory, add a function to kernel/kalloc.c
//kernel/kalloc.c
...
unint64
freemem_size(void){
    acquire(&kmen.lock); // prevent race condition

    uint64 size = 0;
    struct run *r = kmen.freelist;
    while(r){
        size++;
        r = r->next;
    }

    release(&kmen.lock);
    return size * PGSIZE;
}
  • To collect the number of processes, add a function to kernel/proc.c
// kernel/proc.c
unit64
count_proc(void){
    uint64 cnt = 0;
    for (int i = 0; i < NPROC; i++){
        if (proc[i].state != UNUSED){
            cnt++;
        }
    }
    return cnt;
}

在内核的头文件中声明计算空闲内存的函数,因为是内存相关的,所以放在 kalloc、kfree 等函数的的声明之后.

//kernel/defs.h
...
// kalloc.c
void            kinit(void);
uint64          freemem_size(void);
... 
// proc.c
void            procdump(void);
uint64          count_proc(void); // here

Screenshot 2023-10-21 at 15.24.31

Submit the lab

Time spent

Create a new file, time.txt, and put in a single integer, the number of hours you spent on the lab. git add and git commit the file.

Answers

If this lab had questions, write up your answers in answers-*.txt. git add and git commit these files.

Submit

Assignment submissions are handled by Gradescope. You will need an MIT gradescope account. See Piazza for the entry code to join the class. Use this link if you need more help joining.

When you're ready to submit, run make zipball, which will generate lab.zip. Upload this zip file to the corresponding Gradescope assignment.

If you run make zipball and you have either uncomitted changes or untracked files, you will see output similar to the following:

M hello.c
?? bar.c
?? foo.pyc
Untracked files will not be handed in.  Continue? [y/N]

Inspect the above lines and make sure all files that your lab solution needs are tracked, i.e., not listed in a line that begins with ??. You can cause git to track a new file that you create using git add {filename}.

  • Please run make grade to ensure that your code passes all of the tests. The Gradescope autograder will use the same grading program to assign your submission a grade.
  • Commit any modified source code before running make zipball.
  • You can inspect the status of your submission and download the submitted code at Gradescope. The Gradescope lab grade is your final lab grade.

make grade:

Screenshot 2023-10-21 at 15.30.15

Optional challenge exercises

  • Print the system call arguments for traced system calls (easy).
  • Compute the load average and export it through sysinfo(moderate).

https://juejin.cn/post/7276354829043154998

Comments