Skip to content

VFS File 操作

1. 模块架构

1.1 功能概述

File 结构是进程与文件之间的抽象接口。每个打开的文件描述符对应一个 file 结构,包含了文件的打开模式、当前位置和操作函数指针。

1.2 关键源文件

文件作用
fs/file.c文件操作实现
fs/open.c文件打开
fs/read_write.c读写实现
include/linux/fs.hfile 结构定义

2. 核心数据结构

2.1 struct file

c
// include/linux/fs.h:1259
struct file {
    union {
        const struct file_operations *f_op;   // 文件操作
        const struct path *f_path;
    };
    struct address_space    *f_mapping;      // 地址空间
    void                    *private_data;   // 私有数据
    struct inode            *f_inode;         // 关联 inode
    unsigned int            f_flags;          // open 标志
    fmode_t                 f_mode;          // 文件模式
    loff_t                  f_pos;           // 文件位置
    struct fown_struct      *f_owner;        // 文件所有者
    const struct cred      *f_cred;         // 凭证
    struct path             f_path;           // 文件路径
    file_ref_t              f_ref;           // 引用计数
    // ...
};

2.2 struct file_operations

c
// include/linux/fs.h:1926
struct file_operations {
    struct module           *owner;
    loff_t (*llseek)(struct file *filp, loff_t offset, int whence);
    ssize_t (*read)(struct file *filp, char __user *buf,
                     size_t count, loff_t *pos);
    ssize_t (*write)(struct file *filp, const char __user *buf,
                      size_t count, loff_t *pos);
    ssize_t (*read_iter)(struct kiocb *iocb, struct iov_iter *to);
    ssize_t (*write_iter)(struct kiocb *iocb, struct iov_iter *from);
    int (*iterate_shared)(struct file *filp, struct dir_context *ctx);
    __poll_t (*poll)(struct file *filp, struct poll_table_struct *pt);
    long (*unlocked_ioctl)(struct file *filp, unsigned int cmd,
                           unsigned long arg);
    long (*compat_ioctl)(struct file *filp, unsigned int cmd,
                         unsigned long arg);
    int (*mmap)(struct file *filp, struct vm_area_struct *vma);
    int (*open)(struct inode *inode, struct file *filp);
    int (*flush)(struct file *filp, fl_owner_t id);
    int (*release)(struct inode *inode, struct file *filp);
    int (*fsync)(struct file *filp, loff_t start, loff_t end,
                 int datasync);
    int (*fasync)(int fd, struct file *filp, int on);
    ssize_t (*splice_read)(struct file *in, loff_t *ppos,
                           struct pipe_inode_info *pipe,
                           size_t len, unsigned int flags);
    ssize_t (*splice_write)(struct pipe_inode_info *pipe,
                             struct file *out, loff_t *ppos,
                             size_t len, unsigned int flags);
    int (*setlease)(struct file *filp, long arg, struct file_lease **lease,
                    void **priv);
    long (*fallocate)(struct file *filp, int mode, loff_t offset,
                      loff_t len);
    ssize_t (*copy_file_range)(struct file *file_in, loff_t pos_in,
                               struct file *file_out, loff_t pos_out,
                               size_t len, unsigned int flags);
};

3. 文件打开

3.1 do_sys_open()

c
// fs/open.c:1000
long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
{
    struct open_flags op;
    struct open_how how;
    struct filename *tmp;
    int fd;

    // 解析 open flags
    build_open_how(flags, mode, &how);
    if (build_open_flags(flags, mode, &op))
        return -EINVAL;

    // 获取文件名
    tmp = getname(filename);
    if (IS_ERR(tmp))
        return PTR_ERR(tmp);

    // 打开文件
    fd = do_filp_open(dfd, tmp, &op);
    if (fd >= 0)
        fsnotify_open(tmp);

    putname(tmp);
    return fd;
}

3.2 do_filp_open()

c
// fs/open.c:900
struct file *do_filp_open(int dfd, struct filename *pathname,
                          const struct open_flags *op)
{
    struct nameidata nd;
    struct file *filp;

    // 路径查找
    filp = path_lookupat(dfd, pathname->name, op->lookup_flags, &nd);
    if (IS_ERR(filp))
        return filp;

    // 打开文件
    return finish_open(filp, nd.path.dentry, NULL);
}

3.3 finish_open()

c
// fs/open.c:600
struct file *finish_open(struct file *filp, struct dentry *dentry,
                         int (*open)(struct inode *, struct file *))
{
    struct inode *inode = dentry->d_inode;
    int error;

    if (IS_ERR(filp))
        return filp;

    // 设置 inode
    filp->f_inode = inode;
    filp->f_path.dentry = dentry;

    // 调用文件系统的 open
    if (inode->i_op->open)
        error = inode->i_op->open(inode, filp);
    else
        error = generic_file_open(inode, filp);

    if (error) {
        filp_close(filp, NULL);
        return ERR_PTR(error);
    }

    // 设置文件操作
    if (inode->i_fop)
        filp->f_op = inode->i_fop;

    return filp;
}

4. 文件读取

4.1 sys_read()

c
// fs/read_write.c:400
ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count)
{
    struct fd f = fdget_pos(fd);
    ssize_t ret;

    if (f.file) {
        ret = vfs_read(f.file, buf, count, &f.file->f_pos);
        fdput_pos(f);
    } else
        ret = -EBADF;

    return ret;
}

4.2 vfs_read()

c
// fs/read_write.c:150
ssize_t vfs_read(struct file *file, char __user *buf, size_t count,
                  loff_t *pos)
{
    struct inode *inode = file_inode(file);
    ssize_t ret;

    if (!(file->f_mode & FMODE_READ))
        return -EBADF;
    if (count > MAX_RW_COUNT)
        count = MAX_RW_COUNT;

    if (file->f_op->read)
        ret = file->f_op->read(file, buf, count, pos);
    else if (file->f_op->read_iter)
        ret = new_sync_read(file, buf, count, pos);
    else
        ret = -EINVAL;

    if (ret > 0)
        fsnotify_access(file);

    return ret;
}

4.3 new_sync_read()

c
// fs/read_write.c:100
static ssize_t new_sync_read(struct file *filp, char __user *buf,
                             size_t len, loff_t *ppos)
{
    struct kiocb kiocb;
    struct iov_iter iter;
    ssize_t ret;

    init_sync_kiocb(&kiocb, filp);
    iov_iter_init(&iter, ITER_DEST, buf, len, 0);
    ret = filp->f_op->read_iter(&kiocb, &iter);
    if (-EIOCBQUEUED == ret)
        ret = wait_on_sync_kiocb(&kiocb);
    *ppos = kiocb.ki_pos;
    return ret;
}

5. 文件写入

5.1 sys_write()

c
// fs/read_write.c:450
ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count)
{
    struct fd f = fdget_pos(fd);
    ssize_t ret;

    if (f.file) {
        ret = vfs_write(f.file, buf, count, &f.file->f_pos);
        fdput_pos(f);
    } else
        ret = -EBADF;

    return ret;
}

5.2 vfs_write()

c
// fs/read_write.c:200
ssize_t vfs_write(struct file *file, const char __user *buf,
                   size_t count, loff_t *pos)
{
    struct inode *inode = file_inode(file);
    ssize_t ret;

    if (!(file->f_mode & FMODE_WRITE))
        return -EBADF;
    if (count > MAX_RW_COUNT)
        count = MAX_RW_COUNT;

    if (file->f_op->write)
        ret = file->f_op->write(file, buf, count, pos);
    else if (file->f_op->write_iter)
        ret = new_sync_write(file, buf, count, pos);
    else
        ret = -EINVAL;

    if (ret > 0)
        fsnotify_modify(file);

    return ret;
}

6. 文件同步

6.1 sys_fsync()

c
// fs/sync.c:100
int ksys_fsync(unsigned int fd, int datasync)
{
    struct fd f = fdget(fd);
    int ret;

    if (!f.file)
        return -EBADF;

    ret = vfs_fsync(f.file, datasync);
    fdput(f);
    return ret;
}

6.2 vfs_fsync()

c
// fs/sync.c:60
int vfs_fsync(struct file *file, int datasync)
{
    struct inode *inode = file_inode(file);
    int err;

    err = filemap_write_and_wait(inode->i_mapping);
    if (err)
        return err;

    if (!file->f_op->fsync)
        return -EINVAL;

    return file->f_op->fsync(file, 0, LLONG_MAX, datasync);
}

7. 文件关闭

7.1 sys_close()

c
// fs/open.c:1100
int ksys_close(unsigned int fd)
{
    struct fd f = fdget(fd);
    int ret;

    if (!f.file)
        return -EBADF;

    ret = close_fd_get_file(fd);
    if (!ret)
        return -EBADF;

    return filp_close(f.file, current->files);
}

7.2 filp_close()

c
// fs/open.c:500
int filp_close(struct file *filp, fl_owner_t id)
{
    int retval = 0;

    if (!file_count(f.file))
        printk(KERN_ERR "VFS: Close: file count is 0\n");

    if (filp->f_op->flush)
        retval = filp->f_op->flush(filp, id);

    // 刷新并释放
    fput(filp);
    return retval;
}

7.3 fput()

c
// fs/file_table.c:100
void fput(struct file *file)
{
    if (atomic_long_dec_and_test(&file->f_ref)) {
        struct task_struct *task = current;

        if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
            init_task_work(&file->f_u.fu_rcuhead, ____fput);
            task_work_add(task, &file->f_u.fu_rcuhead, TWA_RESUME);
        } else
            __fput(file);
    }
}

8. 文件偏移

8.1 vfs_llseek()

c
// fs/read_write.c:300
loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
{
    if (!file->f_op->llseek)
        return -ESPIPE;

    return file->f_op->llseek(file, offset, whence);
}

8.2 generic_file_llseek()

c
// mm/filemap.c:300
loff_t generic_file_llseek(struct file *file, loff_t offset, int whence)
{
    struct inode *inode = file_inode(file);
    loff_t ppos;

    switch (whence) {
    case SEEK_SET:
        ppos = offset;
        break;
    case SEEK_CUR:
        ppos = file->f_pos + offset;
        break;
    case SEEK_END:
        ppos = inode->i_size + offset;
        break;
    default:
        return -EINVAL;
    }

    if (ppos < 0)
        return -EINVAL;

    file->f_pos = ppos;
    return ppos;
}

9. 文件引用计数

9.1 fget()

c
// fs/file_table.c:60
struct file *fget(unsigned int fd)
{
    struct file *file;

    rcu_read_lock();
    file = fcheck(fd);
    if (file) {
        if (!atomic_long_inc_not_zero(&file->f_ref))
            file = NULL;
    }
    rcu_read_unlock();

    return file;
}

9.2 fget_light()

c
// fs/file_table.c:80
struct file *fget_light(unsigned int fd, int *fput_needed)
{
    struct file *file;

    *fput_needed = 0;
    file = fcheck(fd);
    if (!file)
        return NULL;

    if (!atomic_long_inc_not_zero(&file->f_ref))
        return NULL;

    *fput_needed = 1;
    return file;
}

10. 文件锁

10.1 struct file_lock

c
// include/linux/fs.h:1800
struct file_lock {
    struct file_lock *fl_next;
    struct list_head fl_list;
    struct fasync_struct *fl_fasync;
    unsigned int fl_flags;
    unsigned char fl_type;
    unsigned int fl_pid;
    unsigned long fl_start;
    unsigned long fl_end;
    void (*fl_lmops)(struct file_lock *);
    union {
        struct nlm_lockowner *nlm_owner;
        void *fl_owner;
    } fl_u;
    struct file *fl_file;
    loff_t fl_posix;
};

10.2 flock()

c
// fs/locks.c:500
int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *lck)
{
    struct file *filp = fget(fd);
    struct inode *inode = file_inode(filp);
    struct file_lock *fl;
    int error;

    // 创建文件锁
    fl = locks_alloc_lock();
    if (!fl)
        return -ENOLCK;

    error = flock_to_posix_lock(filp, fl, lck);
    if (error)
        goto out;

    // 获取 inode 锁
    inode_lock(inode);

    // 应用锁
    error = vfs_setlk(inode, fl);

    inode_unlock(inode);

out:
    locks_free_lock(fl);
    return error;
}

11. 文件操作示例

11.1 ext4 文件操作

c
// fs/ext4/file.c
const struct file_operations ext4_file_operations = {
    .llseek         = ext4_llseek,
    .read_iter      = ext4_file_read_iter,
    .write_iter     = ext4_file_write_iter,
    .unlocked_ioctl = ext4_ioctl,
    .mmap           = ext4_file_mmap,
    .open           = ext4_file_open,
    .release        = ext4_release_file,
    .fsync          = ext4_sync_file,
    .splice_read    = generic_file_splice_read,
    .splice_write   = iter_file_splice_write,
};

11.2 pipe 文件操作

c
// fs/pipe.c
const struct file_operations pipefifo_fops = {
    .open           = fifo_open,
    .llseek         = no_llseek,
    .read_iter      = pipe_read_iter,
    .write_iter     = pipe_write_iter,
    .poll           = pipe_poll,
    .unlocked_ioctl = pipe_ioctl,
    .release        = pipe_release,
    .fasync         = pipe_fasync,
};

基于 VitePress 构建