Talk: Kernel and Userspace frontier, V4L subsystem

The first thing to do is to enable the VIDEO_VIVID feature on the kernel.

How to enable VIDEO_VIVID on your kernel through menuconfig

To do that, inside the kernel directory, type make menuconfig. Then with the menu open, type / to begin searching and search for VIDEO_VIVID. A screen should appear with the results, with the first one being for the VIDEO_VIVID symbol. There, you should take note of the line starting with Depends on:. The symbols shown in that line need to be enabled before you can enable VIDEO_VIVID. From that line, you should see that you need to enable MEDIA_SUPPORT, V4L_TEST_DRIVERS, VIDEO_DEV and VIDEO_V4L2.

Now that you know the dependencies that you need to enable first, you can exit the results screen by typing Enter and searching for the first dependency: MEDIA_SUPPORT. Note that its only dependency (HAS_IOMEM) is already satisfied (indicated by the [=y] next to it), so you can directly enable it. To do so, first type 1, which is number in parenthesis in front of it. This will take you to the menu entry corresponding to it, which is Multimedia support, then just type y to enable it (you should see a star in front of it meaning it's now enabled). Now you should repeat this process for the remaining dependencies of VIDEO_VIVID and then enable VIDEO_VIVID itself.

With vivid enabled, recompile your kernel with make -j$(nproc).

Now copy the following code into some file and compile it:

/* V4L2 video picture grabber
   Copyright (C) 2009 Mauro Carvalho Chehab <mchehab@kernel.org>

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation version 2 of the License.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/mman.h>
#include <linux/videodev2.h>

#define CLEAR(x) memset(&(x), 0, sizeof(x))

struct buffer {
        void   *start;
        size_t length;
};

static void xioctl(int fh, int request, void *arg)
{
        int r;

        do {
                r = ioctl(fh, request, arg);
        } while (r == -1 && ((errno == EINTR) || (errno == EAGAIN)));

        if (r == -1) {
                fprintf(stderr, "error %d, %s\n", errno, strerror(errno));
                exit(EXIT_FAILURE);
        }
}

int main(int argc, char **argv)
{
        struct v4l2_format              fmt;
        struct v4l2_buffer              buf;
        struct v4l2_requestbuffers      req;
        enum v4l2_buf_type              type;
        fd_set                          fds;
        struct timeval                  tv;
        int                             r, fd = -1;
        unsigned int                    i, n_buffers;
        char                            *dev_name = "/dev/video0";
        char                            out_name[256];
        FILE                            *fout;
        struct buffer                   *buffers;

        fd = open(dev_name, O_RDWR | O_NONBLOCK, 0);
        if (fd < 0) {
                perror("Cannot open device");
                exit(EXIT_FAILURE);
        }

        CLEAR(fmt);
        fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
        fmt.fmt.pix.width       = 640;
        fmt.fmt.pix.height      = 480;
        fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_RGB24;
        fmt.fmt.pix.field       = V4L2_FIELD_INTERLACED;
        xioctl(fd, VIDIOC_S_FMT, &fmt);
        if (fmt.fmt.pix.pixelformat != V4L2_PIX_FMT_RGB24) {
                printf("Device didn't accept RGB24 format (got %x). Can't proceed.\n", fmt.fmt.pix.pixelformat);
                exit(EXIT_FAILURE);
        }
        if ((fmt.fmt.pix.width != 640) || (fmt.fmt.pix.height != 480))
                printf("Warning: driver is sending image at %dx%d\n",
                        fmt.fmt.pix.width, fmt.fmt.pix.height);

        CLEAR(req);
        req.count = 2;
        req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
        req.memory = V4L2_MEMORY_MMAP;
        xioctl(fd, VIDIOC_REQBUFS, &req);

        buffers = calloc(req.count, sizeof(*buffers));
        for (n_buffers = 0; n_buffers < req.count; ++n_buffers) {
                CLEAR(buf);

                buf.type        = V4L2_BUF_TYPE_VIDEO_CAPTURE;
                buf.memory      = V4L2_MEMORY_MMAP;
                buf.index       = n_buffers;

                xioctl(fd, VIDIOC_QUERYBUF, &buf);

                buffers[n_buffers].length = buf.length;
                buffers[n_buffers].start = mmap(NULL, buf.length,
                              PROT_READ | PROT_WRITE, MAP_SHARED,
                              fd, buf.m.offset);

                if (MAP_FAILED == buffers[n_buffers].start) {
                        perror("mmap");
                        exit(EXIT_FAILURE);
                }
        }

        for (i = 0; i < n_buffers; ++i) {
                CLEAR(buf);
                buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
                buf.memory = V4L2_MEMORY_MMAP;
                buf.index = i;
                xioctl(fd, VIDIOC_QBUF, &buf);
        }
        type = V4L2_BUF_TYPE_VIDEO_CAPTURE;

        xioctl(fd, VIDIOC_STREAMON, &type);
        for (i = 0; i < 20; i++) {
                do {
                        FD_ZERO(&fds);
                        FD_SET(fd, &fds);

                        /* Timeout. */
                        tv.tv_sec = 2;
                        tv.tv_usec = 0;

                        r = select(fd + 1, &fds, NULL, NULL, &tv);
                } while ((r == -1 && (errno = EINTR)));
                if (r == -1) {
                        perror("select");
                        return errno;
                }

                CLEAR(buf);
                buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
                buf.memory = V4L2_MEMORY_MMAP;
                xioctl(fd, VIDIOC_DQBUF, &buf);

                sprintf(out_name, "out%03d.ppm", i);
                fout = fopen(out_name, "w");
                if (!fout) {
                        perror("Cannot open image");
                        exit(EXIT_FAILURE);
                }
                fprintf(fout, "P6\n%d %d 255\n",
                        fmt.fmt.pix.width, fmt.fmt.pix.height);
                fwrite(buffers[buf.index].start, buf.bytesused, 1, fout);
                fclose(fout);

                xioctl(fd, VIDIOC_QBUF, &buf);
        }

        type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
        xioctl(fd, VIDIOC_STREAMOFF, &type);
        for (i = 0; i < n_buffers; ++i)
                munmap(buffers[i].start, buffers[i].length);
        close(fd);

        return 0;
}

Run virtme with read-write permissions on the folder containing the compiled code:

virtme-run --kdir=/path/to/kernel --rwdir=/path/to/folder/contaning/code

Inside virtme, execute the code you compiled.

Let it run for some seconds and cancel it with Ctrl-c.

Finally, you should open the folder containing the code you compiled on your host machine and check the files generated by the code!

Read lwn series of articles https://lwn.net/Articles/203924/

Review the things from previous meetings, make questions, check if other students need some help.

Recommended reading: Linux Device Drivers, Chapter 3: Char Drivers and Chapter 6: Advanced Char Driver Operations.

Don’t forget to update the spreadsheet for tracking our progress.


Tutorial: adding a syscall

Linux has a lot of syscalls, but we are going to add one more, in the name of science: a memory copy. We usually don't need the kernel to copy a memory from one place to another in userspace, but we are adding this for learning purposes.

First, this is how the interface will look like:

sys_memcpy(void *src, void *dst, unsigned long int size);

When it succeeds, it will return 0. Otherwise, will return an error code.

If you didn't like my interface, fell free to be creative and try cool ways do do a memcpy.

Note

Creating a syscall is very architecture dependant, since each arch has its own call convention, different registers to use, and different syscall tables. In this tutorial, I'm going to add a syscall to x86-64 ABI. Keep in mind that we will not cover i386 nor x32 ABIs in this tutorial, but this document provides all useful insides to solve compatibility issues: Adding a New System Call - kernel.org

Registering our new syscall

We need to register our syscall in some places, so the kernel knows what to do when userspace asks for it. The first file is arch/x86/entry/syscalls/syscall_64.tbl: add a new line after the last entry in the first table (this is not in the end of the file!). For Linux v5.6, this is after pidfd_getfd:

 437    common  openat2                 __x64_sys_openat2
 438    common  pidfd_getfd             __x64_sys_pidfd_getfd
+439    common  memcpy                  __x64_sys_memcpy

 #
 # x32-specific system call numbers start at 512 to avoid cache impact

Note

A correct multiplatform implementation would require the syscall to be added to the syscall_32.tbl as well. The second table entry is only required when there is a need to treat x32 syscall differently.

439 will be our syscall number. Take note, since we are going to use it in others places as well.

Add a function signature at include/linux/syscalls.h:

 asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg);

+asmlinkage long sys_memcpy(unsigned long __user *src, unsigned long __user *dst,
+                        unsigned long len);

 /*
  * Not a real system call, but a placeholder for syscalls which are

I added after the last syscall signature. Now the kernel knows the number and the signature, let's glue things together at include/uapi/asm-generic/unistd.h:

 #define __NR_pidfd_getfd 438
 __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)

+#define __NR_memcpy 439
+__SYSCALL(__NR_memcpy, sys_memcpy)
+
 #undef __NR_syscalls
-#define __NR_syscalls 439
+#define __NR_syscalls 440

The last place to register our syscall is at kernel/sys_ni.c:

 COND_SYSCALL(setuid16);

+COND_SYSCALL(memcpy);
+
 /* restartable sequence */

This is required to provide fallback stub implementation of our syscall, that returns -ENOSYS.

Writing some code

Finally, let's add the code of our syscall. I created a file kernel/memcpy.c, but you can choose wherever you want, just make sure it gets compiled. I also add obj-y += memcpy.o in kernel/Makefile. Let's see, step by step, what our code needs to do. For now, the only include we need is <linux/syscalls.h>.

To declare the syscall, we are going to use a macro SYSCALL_DEFINE3, that will do some magic for us. Note that the type and the name of each variable are separated by a comma:

SYSCALL_DEFINE3(memcpy, void __user *, src, void __user *, dst,
        unsigned long, len)

3 is for syscalls with three arguments. The maximum of arguments that a syscall can have is six. Some architectures don't have enough registers to deal with a 7th argument. If you need to pass more than six variables, you need to use a pointer to a struct. When this macro is expanded, we will get the same function signature declared at unistd.h.

The __user is an attribute not used by compilers, but used by static analyzers (like sparse) to see if you are not misusing user data.

Note

What kind of misuse is possible to do with user memory? Hint: check out the difference in how kernel memory and user memory are mapped and about memory management unit

Now, we move along defining our function. First, we will need a kernel buffer to temporarily store data:

{
    void *buf;

    buf = kmalloc(len, GFP_KERNEL);
    if (!buf)
        return -ENOMEM;

Note

Exercise: how about defining a maximum value for len and returning an error code if it's bigger than we support?

Now, we need to store the data from the user in the kernel. For that, we are going to use the function copy_from_user(). It's imperative to use this function to copy that from userspace to the kernel, since it checks if the pointer and the size are valid. If you want to have some fun seeing some errors, use the internal memcpy() implementation.

    if (copy_from_user(buf, src, len))
        return -EFAULT;

EFAULT is used for invalid memory access. We are almost done! We just need to copy the data back to the user and finish the syscall. To do that, the kernel also provides a copy_to_user():

    if (copy_to_user(dst, buf, len))
        return -EFAULT;

    kfree(buf);
    return 0;
}

In the end, my kernel/memcpy.c file looks like this:

#include <linux/syscalls.h>

SYSCALL_DEFINE3(memcpy, void __user *, src, void __user *, dst,
        unsigned long, len)
{
    void *buf;

    buf = kmalloc(len, GFP_KERNEL);
    if (!buf)
        return -ENOMEM;

    if (copy_from_user(buf, src, len))
        return -EFAULT;

    if (copy_to_user(dst, buf, len))
        return -EFAULT;

    kfree(buf);
    return 0;
}

The kernel side is ready. Now is time to use our syscall from the userspace.

The user side

Let's test our syscall from the userpace! Remember to recompile your kernel before testing.

Glibc provides a wrapper for calling syscalls, conveniently called syscall(). All we need to do is use the first argument as the syscall number, and the following ones as the syscall's arguments.

Let's include headers to have access to syscall(), printf() and errno:

#include <stdio.h>
#include <unistd.h>
#include <errno.h>

Define the number of our system call, and the size of the test array:

#define __NR_memcpy 439
#define ARR_LEN 10

Note

If you enable CONFIG_HEADERS_INSTALL and run make modules_install with INSTALL_HDR_PATH as the path of the rootfs of your test environment, you don't need to define __NR_memcpy, you can just include <linux/unistd.h>.

Create some variables, and a test array:

int ret, i;

int a[] = {1, 2, 3, 777, '5', 'a', 0x0800, -15500, 42, 1337};
int b[ARR_LEN];

Finally, call the syscall to do the magic:

ret = syscall(__NR_memcpy, a, b, ARR_LEN * sizeof(int)); 
if (ret == -1)
    printf("error: %d\n", errno);

And check if both arrays are identical:

for (i = 0; i < ARR_LEN; i++) {
    if (a[i] != b[i])
        printf("error, value %d is different\n", i);
}

And if nothing was printed, our syscall worked! This is the complete user code:

#include <stdio.h>
#include <unistd.h>
#include <errno.h>

#define __NR_memcpy 439
#define ARR_LEN 10

int main()
{
    int ret, i;

    int a[] = {1, 2, 3, 777, '5', 'a', 0x0800, -15500, 42, 1337};
    int b[ARR_LEN];

    ret = syscall(__NR_memcpy, a, b, ARR_LEN * sizeof(int)); 
    if (ret == -1)
        printf("error: %d\n", errno);

    for (i = 0; i < ARR_LEN; i++) {
        if (a[i] != b[i])
            printf("error, value %d is different\n", i);
    }

    return 0;
}

Exercise

  1. Use strace to see your syscall in action. Use gdb and catch syscall <syscall_number> to stop the program when your syscall is called. Use info reg when the program stops to see the value of the register.

  2. Let's create a syscall that not only copy the data, but also modify it. Have you ever heard of Caesar cipher?

    Implement a syscall that, for a given string, number of rotation and operation, encrypt/decrypt the string.

    #define OP_ENCRYPT 0
    #define OP_DECRYPT 1
    
    sys_caesar(char *in_str, char *out_str, unsigned int op, unsigned int rot);
    
  3. Join all data of our syscall inside a struct, and send this struct to the kernel. Such struct can look like this:

    struct memcpy_data {
        void *src;
        void *dst;
        unsigned long int size;
    };
    

    And modify the syscall signature to be like this:

    sys_memcpy(struct memcpy_data *data);
    

    Check the size of this struct. Will this size be the same in all architectures? Compile a userspace code for i386 ABI (with gcc -m32) and try to use the syscall as is. Do you think your implementation will work? Check the compatibility documentation and try to fix your code.