Skip to content

Lab 1: Xv6 and Unix utilities

sleep (easy)

Implement a user-level sleep program for xv6, along the lines of the UNIX sleep command. Your sleep should pause for a user-specified number of ticks. A tick is a notion of time defined by the xv6 kernel, namely the time between two interrupts from the timer chip. Your solution should be in the file user/sleep.c.

Some hints:

  • Before you start coding, read Chapter 1 of the xv6 book.

  • Put your code in user/sleep.c. Look at some of the other programs in user/ (e.g., user/echo.c, user/grep.c, and user/rm.c) to see how command-line arguments are passed to a program.

user/echo.c:

#include "kernel/types.h"
#include "kernel/stat.h"
#include "user/user.h"

int
main(int argc, char *argv[])
{
  int i;

  for(i = 1; i < argc; i++){
    write(1, argv[i], strlen(argv[i]));
    if(i + 1 < argc){
      write(1, " ", 1);
    } else {
      write(1, "\n", 1);
    }
  }
  exit(0);
}

user/grep.c:

// Simple grep.  Only supports ^ . * $ operators.

#include "kernel/types.h"
#include "kernel/stat.h"
#include "kernel/fcntl.h"
#include "user/user.h"

char buf[1024];
int match(char*, char*);

void
grep(char *pattern, int fd)
{
  int n, m;
  char *p, *q;

  m = 0;
  while((n = read(fd, buf+m, sizeof(buf)-m-1)) > 0){
    m += n;
    buf[m] = '\0';
    p = buf;
    while((q = strchr(p, '\n')) != 0){
      *q = 0;
      if(match(pattern, p)){
        *q = '\n';
        write(1, p, q+1 - p);
      }
      p = q+1;
    }
    if(m > 0){
      m -= p - buf;
      memmove(buf, p, m);
    }
  }
}

int
main(int argc, char *argv[])
{
  int fd, i;
  char *pattern;

  if(argc <= 1){
    fprintf(2, "usage: grep pattern [file ...]\n");
    exit(1);
  }
  pattern = argv[1];

  if(argc <= 2){
    grep(pattern, 0);
    exit(0);
  }

  for(i = 2; i < argc; i++){
    if((fd = open(argv[i], O_RDONLY)) < 0){
      printf("grep: cannot open %s\n", argv[i]);
      exit(1);
    }
    grep(pattern, fd);
    close(fd);
  }
  exit(0);
}

// Regexp matcher from Kernighan & Pike,
// The Practice of Programming, Chapter 9, or
// https://www.cs.princeton.edu/courses/archive/spr09/cos333/beautiful.html

int matchhere(char*, char*);
int matchstar(int, char*, char*);

int
match(char *re, char *text)
{
  if(re[0] == '^')
    return matchhere(re+1, text);
  do{  // must look at empty string
    if(matchhere(re, text))
      return 1;
  }while(*text++ != '\0');
  return 0;
}

// matchhere: search for re at beginning of text
int matchhere(char *re, char *text)
{
  if(re[0] == '\0')
    return 1;
  if(re[1] == '*')
    return matchstar(re[0], re+2, text);
  if(re[0] == '$' && re[1] == '\0')
    return *text == '\0';
  if(*text!='\0' && (re[0]=='.' || re[0]==*text))
    return matchhere(re+1, text+1);
  return 0;
}

// matchstar: search for c*re at beginning of text
int matchstar(int c, char *re, char *text)
{
  do{  // a * matches zero or more instances
    if(matchhere(re, text))
      return 1;
  }while(*text!='\0' && (*text++==c || c=='.'));
  return 0;
}

user/rm.c:

#include "kernel/types.h"
#include "kernel/stat.h"
#include "user/user.h"

int
main(int argc, char *argv[])
{
  int i;

  if(argc < 2){
    fprintf(2, "Usage: rm files...\n");
    exit(1);
  }

  for(i = 1; i < argc; i++){
    if(unlink(argv[i]) < 0){
      fprintf(2, "rm: %s failed to delete\n", argv[i]);
      break;
    }
  }

  exit(0);
}
  • Add your sleep program to UPROGS in Makefile; once you've done that, make qemu will compile your program and you'll be able to run it from the xv6 shell.

Screenshot 2023-10-19 at 02.55.53

  • If the user forgets to pass an argument, sleep should print an error message.

  • The command-line argument is passed as a string; you can convert it to an integer using atoi (see user/ulib.c).

  • Use the system call sleep.

  • See kernel/sysproc.c for the xv6 kernel code that implements the sleep system call (look for sys_sleep), user/user.h for the C definition of sleep callable from a user program, and user/usys.S for the assembler code that jumps from user code into the kernel for sleep.

kernel/sysproc.c:

#include "types.h"
#include "riscv.h"
#include "defs.h"
#include "param.h"
#include "memlayout.h"
#include "spinlock.h"
#include "proc.h"

uint64
sys_exit(void)
{
  int n;
  argint(0, &n);
  exit(n);
  return 0;  // not reached
}

uint64
sys_getpid(void)
{
  return myproc()->pid;
}

uint64
sys_fork(void)
{
  return fork();
}

uint64
sys_wait(void)
{
  uint64 p;
  argaddr(0, &p);
  return wait(p);
}

uint64
sys_sbrk(void)
{
  uint64 addr;
  int n;

  argint(0, &n);
  addr = myproc()->sz;
  if(growproc(n) < 0)
    return -1;
  return addr;
}

uint64
sys_sleep(void)
{
  int n;
  uint ticks0;

  argint(0, &n);
  if(n < 0)
    n = 0;
  acquire(&tickslock);
  ticks0 = ticks;
  while(ticks - ticks0 < n){
    if(killed(myproc())){
      release(&tickslock);
      return -1;
    }
    sleep(&ticks, &tickslock);
  }
  release(&tickslock);
  return 0;
}

uint64
sys_kill(void)
{
  int pid;

  argint(0, &pid);
  return kill(pid);
}

// return how many clock tick interrupts have occurred
// since start.
uint64
sys_uptime(void)
{
  uint xticks;

  acquire(&tickslock);
  xticks = ticks;
  release(&tickslock);
  return xticks;
}

user/user.h:

struct stat;

// system calls
int fork(void);
int exit(int) __attribute__((noreturn));
int wait(int*);
int pipe(int*);
int write(int, const void*, int);
int read(int, void*, int);
int close(int);
int kill(int);
int exec(const char*, char**);
int open(const char*, int);
int mknod(const char*, short, short);
int unlink(const char*);
int fstat(int fd, struct stat*);
int link(const char*, const char*);
int mkdir(const char*);
int chdir(const char*);
int dup(int);
int getpid(void);
char* sbrk(int);
int sleep(int);
int uptime(void);

// ulib.c
int stat(const char*, struct stat*);
char* strcpy(char*, const char*);
void *memmove(void*, const void*, int);
char* strchr(const char*, char c);
int strcmp(const char*, const char*);
void fprintf(int, const char*, ...);
void printf(const char*, ...);
char* gets(char*, int max);
uint strlen(const char*);
void* memset(void*, int, uint);
void* malloc(uint);
void free(void*);
int atoi(const char*);
int memcmp(const void *, const void *, uint);
void *memcpy(void *, const void *, uint);
  • sleep's main should call exit(0) when it is done.

  • Look at Kernighan and Ritchie's book The C programming language (second edition) (K&R) to learn about C.

Solution:

sleep.c:

#include "kernel/types.h"
//  引入`types.h`, 这包含了xv6内核所使用的基本数据类型的定义
#include "kernel/stat.h"
//  引入`stat.h`, 这包含了关于文件状态的数据结构的定义
#include "user/user.h"
// 引入`user.h`, 这包含了用户级别的函数和定义, 例如`sleep`和`exit`

// 主函数的开始。argc代表命令行参数的数量,而argv是一个指向字符串数组的指针,其中包含了这些参数
int
main(int argc, char *argv[]){
    // 如果用户没有提供足够的命令行参数,即只有程序名称而没有额外的参数,这个条件成立。
    if (argc <= 1){
        // 打印错误消息到标准错误输出(文件描述符2代表标准错误)说明如何正确使用这个程序
        fprintf(2, "Error! Usage: sleep seconds\n");
        exit(1); // 结束程序并返回错误代码1
    }

    sleep(atoi(argv[1])); // 调用sleep函数,使进程暂停执行指定的秒数。
    // atoi函数将argv[1](第一个命令行参数)从字符串转换为整数
    exit(0);
    // 正常结束程序并返回代码0
}

Screenshot 2023-10-19 at 02.43.42

Run the program from the xv6 shell:

$ make qemu
...
init: starting sh
$ sleep 10
(nothing happens for a little while)
$    

Screenshot 2023-10-19 at 02.46.37

Screenshot 2023-10-19 at 02.47.07

Your solution is correct if your program pauses when run as shown above. Run make grade to see if you indeed pass the sleep tests.

Note that make grade runs all tests, including the ones for the assignments below. If you want to run the grade tests for one assignment, type:

$ ./grade-lab-util sleep

This will run the grade tests that match "sleep". Or, you can type:

$ make GRADEFLAGS=sleep grade   

which does the same.

Screenshot 2023-10-19 at 02.48.55

pingpong (easy)

Write a user-level program that uses xv6 system calls to ''ping-pong'' a byte between two processes over a pair of pipes, one for each direction. The parent should send a byte to the child; the child should print "<pid>: received ping", where <pid> is its process ID, write the byte on the pipe to the parent, and exit; the parent should read the byte from the child, print "<pid>: received pong", and exit. Your solution should be in the file user/pingpong.c.

Some hints:

  • Add the program to UPROGS in Makefile.

Screenshot 2023-10-19 at 03.25.25

  • Use pipe to create a pipe.

  • Use fork to create a child.

  • Use read to read from a pipe, and write to write to a pipe.

  • Use getpid to find the process ID of the calling process.

  • User programs on xv6 have a limited set of library functions available to them. You can see the list in user/user.h; the source (other than for system calls) is in user/ulib.c, user/printf.c, and user/umalloc.c.

参考一下的图, 更容易理解pingpong和pipe的运作方式.

JPEG image-451E-89D5-AE-0

https://blog.csdn.net/u013577996/article/details/108680888

#include "kernel/types.h"
#include "user/user.h"

int main(int argc, char *argv[]){
    // 定义了两个整数数组 pipechild 和 pipeparent,每个数组有两个整数元素。
    // 这些数组将用于存储管道的文件描述符。pipechild 用于子进程和父进程之间的通信,pipeparent 用于父进程和子进程之间的通信
    int pipechild[2]; // 写端子进程, 读端父进程
    int pipeparent[2]; // 写端父进程, 读端子进程
    // 使用 pipe 函数创建了两个管道,一个是 pipechild 管道,另一个是 pipeparent 管道。
    // 管道用于实现进程之间的通信,pipe 函数会创建一对文件描述符,一个用于读取,另一个用于写入
    pipe(pipechild);   //use pipe to create a pipe
    pipe(pipeparent);

    // 定义了一个字符数组 buf,并初始化为包含单个字符 'X'。length 变量存储了 buf 数组的长度
    char buf[] = {'X'}; // 用来接受
    long length = sizeof(buf); // 传输字符组的长度

    // parent send in pipeparent[1], child receives in pipeparent[0]
    // child send in pipechild[1], parent receives in pipechild[0]

    // fork 函数会复制当前进程,创建一个新的进程,使其成为当前进程的子进程
    if (fork() == 0){ // use fork to create a child
        close(pipeparent[1]); // close parent write
        close(pipechild[0]); // close parent read

        // 在子进程中,它尝试从父进程管道 pipeparent[0] 读取数据,
        // 并将数据存储在 buf 中。如果读取失败,它将输出错误消息并退出
        if (read(pipeparent[0], buf, length) != length){
            fprintf(2, "failed to read in child\n");
            exit(1);
        }
        close(pipeparent[0]);
        printf("%d: received ping\n", getpid());
        if (write(pipechild[1], buf, length) != length){
            fprintf(2, "failed to write in child\n");
            exit(1);
        }
        close(pipechild[1]);
        exit(0);
    } else{ // parent
        close(pipechild[1]); // close child write
        close(pipeparent[0]); // close child read
        if (write(pipeparent[1], buf, length) != length ){
            fprintf(2, "failed to write in parent\n");
            exit(1);
        }
        close(pipeparent[1]);
        wait(0);
        if (read(pipechild[0], buf, length) != length){
            fprintf(2, "failed to read in parent\n");
            exit(1);
        }
        close(pipechild[0]);
        printf("%d: received pong\n", getpid());
        exit(0);
    }
    return 0; // 退出
}

pingpong是利用系统调用函数forkpipe在父进程和子进程前交换一个字节:

  1. 可以向fd[1]写, 并从fd[0]中读出来
int fd[2];
pipe(fd); 
  1. fork创建完子进程后, 在父进程中打开的file descripto会被复制保持打开状态到子进程中, 所以可以利用先开好pipe再fork, 完成父子进程的communication.

Run the program from the xv6 shell and it should produce the following output:

$ make qemu
...
init: starting sh
$ pingpong
4: received ping
3: received pong
$  

Your solution is correct if your program exchanges a byte between two processes and produces output as shown above.

Screenshot 2023-10-19 at 13.29.25

这里应该只需要用一个pipe就可以实现, 先挖个坑回头再补上, 连个pipe逻辑会更清晰点.

primes (moderate)/(hard)

Write a concurrent prime sieve program for xv6 using pipes and the design illustrated in the picture halfway down this page

sieve

and the surrounding text. This idea is due to Doug McIlroy, inventor of Unix pipes. Your solution should be in the file user/primes.c.

Your goal is to use pipe and fork to set up the pipeline. The first process feeds the numbers 2 through 35 into the pipeline. For each prime number, you will arrange to create one process that reads from its left neighbor over a pipe and writes to its right neighbor over another pipe. Since xv6 has limited number of file descriptors and processes, the first process can stop at 35.

将2-35中的素数打印出来,要求利用管道理念

Some hints:

  • Be careful to close file descriptors that a process doesn't need, because otherwise your program will run xv6 out of resources before the first process reaches 35.

  • Once the first process reaches 35, it should wait until the entire pipeline terminates, including all children, grandchildren, &c. Thus the main primes process should only exit after all the output has been printed, and after all the other primes processes have exited.

  • Hint: read returns zero when the write-side of a pipe is closed.

  • It's simplest to directly write 32-bit (4-byte) ints to the pipes, rather than using formatted ASCII I/O.

  • You should create the processes in the pipeline only as they are needed.

  • Add the program to UPROGS in Makefile.

Screenshot 2023-10-19 at 14.46.41

Solution: user/primes.c

#include "kernel/types.h"
#include "user/user.h"

void helper(int *input, int num);

int main(){
    // 定义一个包含34个整数的数组,这34个整数是从2开始的连续整数,也就是2到35
    int input[34];
    for (int i = 0; i < 34; i++){
        input[i] = i + 2;
    }
    // 调用helper函数,传入初始数组和数组的大小。这个函数的工作是识别并打印出所有的素数
    helper(input, 34);
    exit(0);
}

void helper(int *input, int num){
    // 一旦num(当前数组的大小)为1,我们知道数组里只有一个数字,直接打印它作为一个素数
    if (num == 1){
        printf("prime %d\n", *input);
        return;
    }

    // 设置管道
    // 定义了一个包含两个整数的数组(用于管道的两个文件描述符),
    // 一个名为prime的变量来存储当前识别的素数(数组的第一个数字),以及一个临时变量temp
    int p[2];
    int prime = *input;
    int temp;

    printf("prime %d\n", prime);  // 打印当前识别的素数

    pipe(p); // 使用pipe系统调用创建一个新的管道,并将其文件描述符存储在数组p中

    // fork系统调用创建一个子进程。子进程的代码在这个if块中执行
    if (fork() == 0){
        // 子进程将数组中的所有数字写入到管道的写端。
        // 每个数字都转换为其对应的字节表示形式并写入管道
        for (int i = 0; i < num; i++){
            temp = * (input + i); // 正在访问数组input的第i个元素
            // 数组名本质上是一个指针,指向其第一个元素的地址。所以,input是指向数组的第一个元素的指针
            // 这个操作将指针input移动到其第i个元素
            // 这等同于input[i]
            write(p[1], (char *) (&temp), 4);
            // 这是一个系统调用,用于写入数据到文件或其他I/O流
            // p是一个整数数组,大小为2。当我们创建一个管道时,p[0]是管道的读端,p[1]是写端。
            // 这里,我们将数据写入管道的写端
        }
        exit(0);
    }
    close(p[1]);

    // 从管道读取数据, 再次使用fork创建第二个子进程
    // fork()是一个系统调用,用于创建一个新的进程。
    // 这个新进程是当前进程的副本,并称为子进程。fork()在父进程中返回子进程的进程ID,在子进程中返回0
    if (fork() == 0){
        // 定义一个计数器变量和一个大小为4字节的缓冲区
        int counter = 0; // 这个变量用于计数那些不是已识别的素数的倍数的数字
        char buffer[4];

        // 持续从管道的读端读取数据,直到管道为空
        // 它使用read函数从管道p的读端读取数据
        // read函数会尝试读取4字节的数据并将其放入buffer中。
        // 如果read函数返回0,这意味着管道为空
        while(read(p[0], buffer, 4) != 0){
            // 将从管道读取的数据转换回整数,并检查它是否是已识别的素数的倍数。
            // 如果不是,则将其写回到数组,并移动数组的指针
            temp = *((int *) buffer);
            // 这是一个类型转换。
            // 我们将buffer的地址(这是一个char类型的指针)转换为int类型的指针,
            // 然后解引用它来获取buffer中的整数值
            // 这是因为我们之前将整数写入管道,并且现在我们想将其转换回整数。

            // 这个条件检查temp是否不是已识别的素数prime的倍数.
            // 如果不是,那么以下的代码块将被执行。
            if (temp % prime != 0){
                // 将temp的值赋给input所指向的位置.
                // 这实际上是将从管道读取的数字写回到数组的当前位置
                *input = temp;
                input += 1;
                // 移动input指针到数组的下一个位置
                // 这是为了准备在下一个迭代中存储另一个数字
                counter++;
                // 递增counter变量
                // 这是为了跟踪那些不是已识别的素数的倍数的数字的数量
            }
        }

        // 递归地调用helper函数处理筛选后的数字
        helper(input - counter, counter);
        exit(0);
    }
    // 主进程等待两个子进程完成
    wait(0);
    wait(0);
}
// 这个程序的核心逻辑是,每次识别一个新的素数时,
// 都会创建一个新的子进程来筛选并去除该素数的所有倍数,从而确保数组中剩下的都是素数

Your solution is correct if it implements a pipe-based sieve and produces the following output:

$ make qemu
...
init: starting sh
$ primes
prime 2
prime 3
prime 5
prime 7
prime 11
prime 13
prime 17
prime 19
prime 23
prime 29
prime 31
$  

Screenshot 2023-10-19 at 14.48.54

find (moderate)

Write a simple version of the UNIX find program for xv6: find all the files in a directory tree with a specific name. Your solution should be in the file user/find.c.

Some hints:

  • Look at user/ls.c to see how to read directories.

user/ls.c:

#include "kernel/types.h"  // 导入内核定义的类型
#include "kernel/stat.h"   // 为了访问stat结构体,它包含了文件或目录的信息
#include "user/user.h"     // 导入用户模式函数,如printf和open
#include "kernel/fs.h"      // 导入文件系统相关定义,如DIRSIZ和dirent结构体
#include "kernel/fcntl.h"    // 导入文件控制常量,如O_RDONLY

// 用于从路径中提取并格式化文件名
char*
fmtname(char *path)
{
  static char buf[DIRSIZ+1]; // buf用于存储格式化后的文件名
  char *p;    // 定义一个指针,初始化为路径的末尾

  // Find first character after last slash.
  // 从路径末尾开始找到最后一个/字符后的第一个字符
  for(p=path+strlen(path); p >= path && *p != '/'; p--)
    ;
  p++;     // 指针p现在指向文件名

  // Return blank-padded name.
  // 如果文件名长度大于或等于DIRSIZ,则直接返回该名称
  if(strlen(p) >= DIRSIZ)
    return p;
  //否则,将文件名复制到buf中,并在其后添加空格,使其达到DIRSIZ长度
  memmove(buf, p, strlen(p));
  memset(buf+strlen(p), ' ', DIRSIZ-strlen(p));
  return buf; // 返回格式化后的文件名
}

// 列出指定路径的文件或目录
void
ls(char *path)
{
  // 定义缓冲区和其他所需变量
  char buf[512], *p; 
  int fd; 
  struct dirent de;
  struct stat st;

  // 尝试打开给定路径
  if((fd = open(path, O_RDONLY)) < 0){
    fprintf(2, "ls: cannot open %s\n", path);
    return;
  }

  // 获取文件或目录的信息
  if(fstat(fd, &st) < 0){
    fprintf(2, "ls: cannot stat %s\n", path);
    close(fd);
    return;
  }

  // 根据文件类型执行操作
  switch(st.type){
  //如果是设备或文件,打印其信息
  case T_DEVICE:
  case T_FILE:
    printf("%s %d %d %l\n", fmtname(path), st.type, st.ino, st.size);
    break;

  // 如果是目录, 检查路径长度是否过长
  case T_DIR:
    if(strlen(path) + 1 + DIRSIZ + 1 > sizeof buf){
      printf("ls: path too long\n");
      break;
    }
    // 将路径复制到buf
    strcpy(buf, path);
    p = buf+strlen(buf);
    *p++ = '/';
    // 读取目录的每个条目
    while(read(fd, &de, sizeof(de)) == sizeof(de)){
      // 忽略空条目
      if(de.inum == 0)    
        continue;
      // 复制文件名到buf,然后获取并打印其信息
      memmove(p, de.name, DIRSIZ);
      p[DIRSIZ] = 0;
      if(stat(buf, &st) < 0){
        printf("ls: cannot stat %s\n", buf);
        continue;
      }
      printf("%s %d %d %d\n", fmtname(buf), st.type, st.ino, st.size);
    }
    break;
  }
  close(fd); // 关闭文件描述符
}

// 程序入口
int
main(int argc, char *argv[])
{
  int i;
  // 如果没有提供命令行参数,则列出当前目录
  if(argc < 2){
    ls(".");
    exit(0);
  }
  //历并列出每个命令行参数指定的路径
  for(i=1; i<argc; i++)
    ls(argv[i]);
  exit(0);
}

从上述代码中, 主要可以领悟到4个点:

  1. 如何知道一个路径是文件还是目录?

    int fd = open(curr_path, 0);
    fstat(fd, &st);
    st.type 是T_DIR 或者 T_FILE
    
  2. 如何递归地扫描一个目录?

    如果已知一个fd对应的是一个目录, 可以不断读取DIRSIZ大小bytes来迭代每个子文件/子文件目录

    struct dirent de;
    while(read(fd, &de, sizeof(de)) == sizeof(de)){
    // de.name 得到文件名
    ...
    }
    
  3. 我们需要一路上自己不断构造完整路径, 需要保存一个buffer, 并在合适的时候加上‘/[subdirectory]‘ 进入递归

  4. 记得及时关闭不再使用的file descriptor.

  • Use recursion to allow find to descend into sub-directories.

  • Don't recurse into "." and "..".

  • Changes to the file system persist across runs of qemu; to get a clean file system run make clean and then make qemu.

  • You'll need to use C strings. Have a look at K&R (the C book), for example Section 5.5.

  • Note that == does not compare strings like in Python. Use strcmp() instead.

  • Add the program to UPROGS in Makefile.

Screenshot 2023-10-19 at 16.01.58

给定一个初始路径和目标文件名, 要不断递归的扫描找到所有子目录前匹配的文件全路径. 主要熟悉学习下文件系统的操作.

Solution: find.c:

#include "kernel/types.h"  // 导入内核定义的类型
#include "kernel/stat.h"   // 为了访问stat结构体,它包含了文件或目录的信息
#include "user/user.h"     // 导入用户模式函数,如printf和open
#include "kernel/fs.h"      // 导入文件系统相关定义,如DIRSIZ和dirent结构体
#include "kernel/fcntl.h"    // 导入文件控制常量,如O_RDONLY

/*
 * find files in path
 */
void find(char *path, char *findName);
void eq_print(char *fileName, char *findName);
char* fmtname(char *path);
int main(int argc, char *argv[]){
    if (argc < 3){
        fprintf(2, "usage: find <path> <filename> \n");
        exit(0);
    }
    find(argv[1], argv[2]);
    exit(0);
}

void find(char *path, char *findName){
    int fd;
    struct stat st;
    if((fd = open(path, O_RDONLY)) < 0){
        fprintf(2, "find: cannot open %s\n", path);
        close(fd);
        return;
    }

    if(fstat(fd, &st) < 0){
        fprintf(2, "find cannot stat %s\n", path);
        close(fd);
        return;
    }

    char buf[512], *p;
    struct dirent de;
    switch (st.type) {
        case T_FILE:
            eq_print(path, findName);
            break;
        case T_DIR:
            if (strlen(path) + 1 + DIRSIZ + 1 > sizeof buf){
                printf("find: path too long\n");
                break;
            }
            strcpy(buf, path);
            p = buf+strlen(buf);
            *p++ = '/';
            while(read(fd, &de, sizeof(de)) == sizeof(de)){
                if (de.inum == 0 || de.inum == 1 || strcmp(de.name, ".") == 0 || strcmp(de.name, "..") == 0){
                    continue;
                }
                memmove(p, de.name, strlen(de.name));
                p[strlen(de.name)] = 0;
                find(buf, findName);
            }
            break;
    }
    close(fd);
}

void eq_print(char *fileName, char *findName){
    if (strcmp(fmtname(fileName), findName) == 0){
        printf("%s\n", fileName);
    }
}


// 用于从路径中提取并格式化文件名
char*
fmtname(char *path)
{
    static char buf[DIRSIZ+1]; // buf用于存储格式化后的文件名
    char *p;    // 定义一个指针,初始化为路径的末尾

    // Find first character after last slash.
    // 从路径末尾开始找到最后一个/字符后的第一个字符
    for(p=path+strlen(path); p >= path && *p != '/'; p--)
        ;
    p++;     // 指针p现在指向文件名

    // Return blank-padded name.
    // 如果文件名长度大于或等于DIRSIZ,则直接返回该名称
//    if(strlen(p) >= DIRSIZ)
//        return p;
//    //否则,将文件名复制到buf中,并在其后添加空格,使其达到DIRSIZ长度
    memmove(buf, p, strlen(p)+1);
//    memset(buf+strlen(p), ' ', DIRSIZ-strlen(p));
    return buf; // 返回格式化后的文件名
}

/*
 * ls代码中的fmtname函数:

提取的文件名会被格式化为固定长度(DIRSIZ),多余的空间会用空格填充。
这是因为传统的ls输出,尤其是当它以列的形式显示时,希望文件名能够整齐地对齐,所以需要固定长度。

find代码中的fmtname函数:
它只是简单地提取并返回文件名,没有任何额外的格式化。
这对find的输出来说是有意义的,因为find通常会输出完整的路径,而不仅仅是格式化的文件名。当用户想要知道完整路径时,这是非常有用的。
因此,尽管这两个函数的核心功能是类似的(从路径中提取文件名),但由于它们在不同的上下文中使用,所以它们的行为和输出略有不同
 */

Your solution is correct if produces the following output (when the file system contains the files b, a/b and a/aa/b):

$ make qemu
...
init: starting sh
$ echo > b
$ mkdir a
$ echo > a/b
$ mkdir a/aa
$ echo > a/aa/b
$ find . b
./b
./a/b
./a/aa/b
$

Screenshot 2023-10-19 at 17.09.46

xargs (moderate)

Write a simple version of the UNIX xargs program for xv6: its arguments describe a command to run, it reads lines from the standard input, and it runs the command for each line, appending the line to the command's arguments. Your solution should be in the file user/xargs.c.

The following example illustrates xarg's behavior:

$ echo hello too | xargs echo bye
bye hello too
$  

Note that the command here is "echo bye" and the additional arguments are "hello too", making the command "echo bye hello too", which outputs "bye hello too".

Note that the command here is "echo bye" and the additional arguments are "hello too", making the command "echo bye hello too", which outputs "bye hello too".

Please note that xargs on UNIX makes an optimization where it will feed more than argument to the command at a time. We don't expect you to make this optimization. To make xargs on UNIX behave the way we want it to for this lab, please run it with the -n option set to 1. For instance

$ (echo 1 ; echo 2) | xargs -n 1 echo
1
2
$  

Some hints:

  • Use fork and exec to invoke the command on each line of input. Use wait in the parent to wait for the child to complete the command.

  • To read individual lines of input, read a character at a time until a newline ('\n') appears.

  • kernel/param.h declares MAXARG, which may be useful if you need to declare an argv array.

#define MAXARG       32  // max exec arguments
  • Add the program to UPROGS in Makefile.

Screenshot 2023-10-19 at 17.24.35

  • Changes to the file system persist across runs of qemu; to get a clean file system run make clean and then make qemu.

Screenshot 2023-10-19 at 17.27.30

xargs, find, and grep combine well:

$ find . b | xargs grep hello

will run "grep hello" on each file named b in the directories below ".".

Solution xargs.c:

#include "kernel/types.h"
#include "user/user.h"

int main(int argc, char *argv[]){
    // 定义字符数组来存储从标准输入读取的数据块和缓冲区
    char block[32];
    char buf[32];
    char *p = buf; //p是一个指针,指向buf,它将用于填充缓冲区
    char *lineSplit[32];//定义一个指针数组来存储命令及其参数

    //这个循环将命令行参数(即要执行的命令及其参数)复制到lineSplit数组中
    int j = 0;
    for(int i = 1; i < argc; i++){
        lineSplit[j++] = argv[i];
    }

    // 开始从标准输入读取数据,每次读取block数组大小的数据
    int m = 0;
    int k;
    while((k = read(0, block, sizeof(block))) > 0){
        for(int l = 0; l < k; l++){ // 遍历刚读取的数据块
            if(block[l] == '\n'){ // 当检测到换行符时,结束当前行的读取
                buf[m] = 0; // 在缓冲区中的当前位置添加字符串结束符
                // 更新指针和索引,准备执行命令
                m = 0;
                lineSplit[j++] = p;
                p = buf;
                lineSplit[j] = 0;

                // 重置j,以便再次开始读取参数
                j = argc - 1;

                // 通过fork创建一个新进程,然后在子进程中使用exec执行命令
                if(fork() == 0){
                    exec(argv[1], lineSplit);
                }
                wait(0); // 父进程等待子进程完成
            }else if(block[l] == ' ') { //如果检测到空格,视为参数的分隔符,并在buf中存储新参数
                buf[m++] = 0;
                lineSplit[j++] = p;
                p = &buf[m];
            }else { // 将当前字符添加到buf
                buf[m++] = block[l];
            }
        }
    }
    exit(0);
}

To test your solution for xargs, run the shell script xargstest.sh. Your solution is correct if it produces the following output:

$ make qemu
...
init: starting sh
$ sh < xargstest.sh
$ $ $ $ $ $ hello
hello
hello
$ $

You may have to go back and fix bugs in your find program. The output has many $ because the xv6 shell doesn't realize it is processing commands from a file instead of from the console, and prints a $ for each command in the file.

Screenshot 2023-10-19 at 17.27.50

Submit the lab

Time spent

Create a new file, time.txt, and put in a single integer, the number of hours you spent on the lab. git addand git commit the file.

Answers

If this lab had questions, write up your answers in answers-*.txt. git add and git commit these files.

Submit

Assignment submissions are handled by Gradescope. You will need an MIT gradescope account. See Piazza for the entry code to join the class. Use this link if you need more help joining.

When you're ready to submit, run make zipball, which will generate lab.zip. Upload this zip file to the corresponding Gradescope assignment.

If you run make zipball and you have either uncomitted changes or untracked files, you will see output similar to the following:

 M hello.c
?? bar.c
?? foo.pyc
Untracked files will not be handed in.  Continue? [y/N]

Inspect the above lines and make sure all files that your lab solution needs are tracked, i.e., not listed in a line that begins with ??. You can cause git to track a new file that you create using git add {filename}.

  • Please run make grade to ensure that your code passes all of the tests. The Gradescope autograder will use the same grading program to assign your submission a grade.
  • Commit any modified source code before running make zipball.
  • You can inspect the status of your submission and download the submitted code at Gradescope. The Gradescope lab grade is your final lab grade.

Optional challenge exercises

  • Write an uptime program that prints the uptime in terms of ticks using the uptime system call. (easy)
  • Support regular expressions in name matching for find. grep.c has some primitive support for regular expressions. (easy)
  • The xv6 shell (user/sh.c) is just another user program and you can improve it. It is a minimal shell and lacks many features found in real shell. For example, modify the shell to not print a $ when processing shell commands from a file (moderate), modify the shell to support wait (easy), modify the shell to support lists of commands, separated by ";" (moderate), modify the shell to support sub-shells by implementing "(" and ")" (moderate), modify the shell to support tab completion (easy), modify the shell to keep a history of passed shell commands (moderate), or anything else you would like your shell to do. (If you are very ambitious, you may have to modify the kernel to support the kernel features you need; xv6 doesn't support much.)

Comments