Skip to content

VFS Mount Namespace

1. 模块架构

1.1 功能概述

Mount Namespace 允许每个进程拥有独立的挂载点视图。这是容器技术的基石之一,实现了进程间的文件系统隔离。

1.2 关键源文件

文件作用
fs/namespace.cmount namespace 实现
fs/pnode.c挂载传播
include/linux/mount.hmount 结构定义
include/linux/nsproxy.hnamespace proxy

2. 核心数据结构

2.1 struct vfsmount

c
// include/linux/mount.h:58
struct vfsmount {
    struct dentry           *mnt_root;         // 挂载树根目录
    struct super_block      *mnt_sb;          // 超级块
    int                     mnt_flags;        // 挂载标志
    struct mnt_idmap        *mnt_idmap;       // UID/GID 映射
    const struct seq_operations *mnt_seq_ops; // 序列号操作
    struct path             mnt_ex_mountpoint;// 导出挂载点
    // ...
};

2.2 struct mount

c
// include/linux/mount.h:120
struct mount {
    struct hlist_node mnt_hash;
    struct mount      *mnt_parent;         // 父挂载
    struct dentry     *mnt_mountpoint;     // 挂载点 dentry
    struct vfsmount    mnt;
    struct list_head   mnt_mounts;         // 子挂载链表
    struct list_head   mnt_child;          // 兄弟链表
    int               mnt_id;              // 挂载 ID
    int               mnt_parent_id;       // 父挂载 ID
    unsigned long     mnt_flags;           // 挂载标志
    char              *mnt_devname;        // 设备名
    struct list_head   mnt_list;           // namespace 链表
    struct list_head   mnt_expire_list;    // 到期待删除链表
    struct list_head   mnt_share;          // 共享链表
    struct list_head   mnt_slave_list;     // 从属链表
    struct list_head   mnt_slave_entry;    // 从属条目
    struct mount       *mnt_master;        // 主挂载
    struct mnt_namespace *mnt_ns;         // 所属 namespace
    // ...
};

2.3 struct mnt_namespace

c
// include/linux/mount.h:180
struct mnt_namespace {
    atomic_t                count;
    struct mount           *root;
    struct list_head        list;           // 挂载链表
    struct user_namespace   *user_ns;
    u64                     seq;            // 序列号
    wait_queue_head_t       poll;
    u64                     event;
};

2.4 挂载传播类型

c
// include/linux/mount.h:30
#define CL_MAKE_SHARED               0x100000
#define CL_PRIVATE                   0x200000
#define CL_SLAVE                     0x400000
#define CL_SHARED_TO_SLAVE           0x800000
#define CL_UNBINDABLE               0x1000000
#define CL_MOVE                    (1 << 1)
#define CL_COPY                    (1 << 2)
#define CL_COPY_FLAGS               (CL_COPY | CL_MOVE)

3. 挂载传播

3.1 传播类型说明

类型说明
SHARED挂载点会在所有 namespace 间共享
PRIVATE默认类型,不传播
SLAVE从主挂载点接收挂载/卸载事件
UNBINDABLE不能被绑定

3.2 propagate_mount()

c
// fs/pnode.c:300
static void propagate_mount(struct mount *mnt)
{
    struct mount *parent = mnt->mnt_parent;
    struct mount *m;

    list_for_each_entry(m, &parent->mnt_share, mnt_share) {
        // 共享挂载:传播到所有共享成员
        clone_mnt(mnt, m, CL_MAKE_SHARED);
    }

    list_for_each_entry(m, &parent->mnt_slave_list, mnt_slave_entry) {
        // 从属挂载:传播到所有从属成员
        if (mnt->mnt_master == m->mnt_master)
            clone_mnt(mnt, m, CL_SLAVE);
    }
}

3.3 clone_mnt()

c
// fs/pnode.c:100
static struct mount *clone_mnt(struct mount *old, struct mount *parent,
                              int flag)
{
    struct mount *mnt;

    // 分配新的 mount
    mnt = alloc_mnt(parent);
    if (!mnt)
        return ERR_PTR(-ENOMEM);

    // 复制 vfsmount
    mnt->mnt.mnt_flags = old->mnt.mnt_flags;
    mnt->mnt.mnt_sb = old->mnt.mnt_sb;
    mnt->mnt.mnt_root = old->mnt.mnt_root;
    mnt->mnt.mnt_parent = parent;

    // 设置传播类型
    if (flag & CL_MAKE_SHARED) {
        list_add(&mnt->mnt_share, &old->mnt_share);
        mnt->mnt_share = &mnt->mnt_share;
    }

    return mnt;
}

4. 挂载操作

4.1 do_mount()

c
// fs/namespace.c:2000
long do_mount(const char *dev_name, const char *dir_name,
             const char *type_page, unsigned long flags, void *data_page)
{
    struct path path;
    int ret;

    // 解析路径
    ret = kern_path(dir_name, LOOKUP_FOLLOW, &path);
    if (ret)
        return ret;

    // 执行挂载
    ret = do_new_mount(&path, type_page, flags, dev_name, data_page);

    path_put(&path);
    return ret;
}

4.2 do_new_mount()

c
// fs/namespace.c:1900
int do_new_mount(struct path *path, char *type, int flags,
                char *name, void *data)
{
    struct vfsmount *mnt;

    // 分配新的 vfsmount
    mnt = vfs_kern_mount(type, flags, name, data);
    if (IS_ERR(mnt))
        return PTR_ERR(mnt);

    // 将 vfsmount 挂载到路径
    return do_add_mount(mnt, path, MNTflags, NULL);
}

4.3 vfs_kern_mount()

c
// fs/namespace.c:1500
struct vfsmount *vfs_kern_mount(struct file_system_type *type,
                                int flags, const char *name, void *data)
{
    struct mount *mnt;

    if (!type)
        return ERR_PTR(-ENODEV);

    // 分配 mount 结构
    mnt = alloc_vfsmnt(name);
    if (!mnt)
        return ERR_PTR(-ENOMEM);

    // 获取或创建 superblock
    if (type->read_super)
        mnt->mnt.mnt_sb = type->read_super(type, data, flags);
    else
        mnt->mnt.mnt_sb = get_sb(type, data);

    if (IS_ERR(mnt->mnt.mnt_sb)) {
        mnt_free_id(mnt);
        return ERR_CAST(mnt->mnt.mnt_sb);
    }

    // 初始化 vfsmount
    mnt->mnt.mnt_root = mnt->mnt.mnt_sb->s_root;
    mnt->mnt.mnt_mountpoint = mnt->mnt.mnt_root;

    return &mnt->mnt;
}

4.4 do_add_mount()

c
// fs/namespace.c:1800
static int do_add_mount(struct vfsmount *newmnt, struct path *path,
                        int mnt_flags, struct mount *parent)
{
    int err;

    // 验证挂载点
    err = -EINVAL;
    if (!check_mnt(parent))
        goto unlock;

    // 创建挂载
    err = attach_recursive_mnt(newmnt, path, parent);
    if (err)
        goto unlock;

    // 添加到 namespace
    list_add_tail(&newmnt->mnt_instance, &current->nsproxy->mnt_ns->list);

unlock:
    return err;
}

5. 卸载操作

5.1 sys_umount()

c
// fs/namespace.c:1600
long sys_umount(char __user *name, int flags)
{
    struct path path;
    int ret;

    ret = kern_path(name, LOOKUP_FOLLOW, &path);
    if (ret)
        return ret;

    ret = do_umount(&path.mnt, flags);

    path_put(&path);
    return ret;
}

5.2 do_umount()

c
// fs/namespace.c:1550
int do_umount(struct vfsmount *mnt, int flags)
{
    struct mount *p;
    int ret;

    // 检查权限
    if (!capable(CAP_SYS_ADMIN))
        return -EPERM;

    // 获取 mount
    p = real_mount(mnt);

    // 如果有子挂载,不能卸载
    if (!list_empty(&p->mnt_mounts))
        return -EBUSY;

    // 如果是 busy 的,不能卸载
    if (!namespace_locked(mnt))
        return -EBUSY;

    // 执行卸载
    ret = umount_tree(p, flags);
    return ret;
}

5.3 umount_tree()

c
// fs/namespace.c:1400
static int umount_tree(struct mount *mnt, int flags)
{
    LIST_HEAD(umount_list);
    struct mount *p;
    int ret = 0;

    // 收集要卸载的挂载
    collect_mounts(mnt, &umount_list);

    // 传播卸载到从属挂载
    propagate_umount(&umount_list);

    // 实际卸载
    list_for_each_entry(p, &umount_list, mnt_umount_list) {
        struct super_block *sb = p->mnt.mnt_sb;

        if (sb->s_op->umount_begin)
            sb->s_op->umount_begin(sb);

        shrink_dcache_for_umount(sb);
        evict_inodes(sb);

        if (sb->s_op->put_super)
            sb->s_op->put_super(sb);
    }

    return ret;
}

6. Namespace 复制

6.1 copy_namespace()

c
// fs/namespace.c:2500
struct mnt_namespace *copy_namespace(unsigned long flags,
                                    struct user_namespace *user_ns,
                                    struct mnt_namespace *old_ns)
{
    struct mnt_namespace *ns;
    struct mount *old;
    struct mount *new;
    int ret;

    // 分配新的 namespace
    ns = create_mnt_ns(old_ns->root);
    if (IS_ERR(ns))
        return ns;

    // 复制所有挂载
    list_for_each_entry(old, &old_ns->list, mnt_list) {
        if (flags & CLONE_NS) {
            // 完全共享
            new = old;
        } else if (old->mnt.mnt_flags & MNT_SHARED) {
            // 复制共享挂载
            new = copy_tree(old, old->mnt_parent);
        } else {
            // 复制私有挂载
            new = copy_tree(old, NULL);
        }

        if (IS_ERR(new)) {
            put_mnt_ns(ns);
            return ERR_CAST(new);
        }

        list_add(&new->mnt_instance, &ns->list);
    }

    return ns;
}

6.2 copy_tree()

c
// fs/pnode.c:200
static struct mount *copy_tree(struct mount *mnt, struct mount *parent)
{
    struct mount *child, *p;
    int ret;

    // 复制自己
    child = clone_mnt(mnt, parent, 0);
    if (IS_ERR(child))
        return child;

    // 递归复制子挂载
    list_for_each_entry(p, &mnt->mnt_mounts, mnt_child) {
        struct mount *copy = copy_tree(p, child);
        if (IS_ERR(copy))
            return copy;
    }

    return child;
}

7. pivot_root

7.1 sys_pivot_root()

c
// fs/namespace.c:2200
long sys_pivot_root(const char __user *new_root, const char __user *put_old)
{
    struct path new_path, old_path;
    struct mount *new_mnt;
    int ret;

    // 获取新根路径
    ret = kern_path(new_root, LOOKUP_FOLLOW, &new_path);
    if (ret)
        return ret;

    // 获取旧根路径
    ret = kern_path(put_old, LOOKUP_FOLLOW, &old_path);
    if (ret)
        goto out1;

    // 执行 pivot_root
    ret = do_pivot_root(&new_path, &old_path);

    path_put(&old_path);
out1:
    path_put(&new_path);
    return ret;
}

7.2 do_pivot_root()

c
// fs/namespace.c:2100
static int do_pivot_root(struct path *new_path, struct path *old_path)
{
    struct mount *old_mnt = real_mount(old_path->mnt);
    struct mount *new_mnt = real_mount(new_path->mnt);

    // 交换挂载点
    swap(&new_mnt->mnt.mnt_root, &old_mnt->mnt_root);

    // 更新父指针
    old_mnt->mnt.mnt_parent = new_mnt;
    old_mnt->mnt_mountpoint = new_path->dentry;

    return 0;
}

8. 挂载属性

8.1 MNT 标志

c
// include/linux/mount.h:50
#define MNT_NOSUID      0x01   // 不执行 setuid
#define MNT_NODEV       0x02   // 不允许访问设备
#define MNT_NOEXEC      0x04   // 不允许执行
#define MNT_NOATIME     0x08   // 不更新访问时间
#define MNT_NODIRATIME  0x10   // 不更新目录访问时间
#define MNT_RELATIME    0x20   // 相对 atime
#define MNT_READONLY    0x40   // 只读
#define MNT_SHRINKABLE  0x100  // 可收缩
#define MNT_WRITE_HOLD  0x200  // 写持有

9. 挂载IDR管理

9.1 mnt_alloc_id()

c
// fs/namespace.c:100
static int mnt_alloc_id(struct mount *mnt)
{
    int id;

    id = idr_alloc(&mnt_idr, mnt, 0, 0, GFP_KERNEL);
    if (id < 0)
        return id;

    mnt->mnt_id = id;
    return 0;
}

9.2 mnt_free_id()

c
// fs/namespace.c:120
static void mnt_free_id(struct mount *mnt)
{
    idr_remove(&mnt_idr, mnt->mnt_id);
}

10. 挂载传播流程

创建共享挂载:
mount --bind /mnt/some/path /mnt/other/path

1. do_add_mount()
   |
2. attach_recursive_mnt()
   |
3. propagate_mount()
      |
      +---> 对于每个共享组中的成员
            clone_mnt(mnt, member, CL_MAKE_SHARED)
卸载传播:
umount /mnt/some/path

1. umount_tree()
   |
2. propagate_umount()
      |
      +---> 对于每个从属挂载
            递归卸载

基于 VitePress 构建