forked from Qortal/Brooklyn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
256 lines
5.5 KiB
256 lines
5.5 KiB
// SPDX-License-Identifier: GPL-2.0 |
|
/* |
|
* Copyright IBM Corporation, 2021 |
|
* |
|
* Author: Mike Rapoport <[email protected]> |
|
*/ |
|
|
|
#include <linux/mm.h> |
|
#include <linux/fs.h> |
|
#include <linux/swap.h> |
|
#include <linux/mount.h> |
|
#include <linux/memfd.h> |
|
#include <linux/bitops.h> |
|
#include <linux/printk.h> |
|
#include <linux/pagemap.h> |
|
#include <linux/syscalls.h> |
|
#include <linux/pseudo_fs.h> |
|
#include <linux/secretmem.h> |
|
#include <linux/set_memory.h> |
|
#include <linux/sched/signal.h> |
|
#include <linux/refcount.h> |
|
|
|
#include <uapi/linux/magic.h> |
|
|
|
#include <asm/tlbflush.h> |
|
|
|
#include "internal.h" |
|
|
|
#undef pr_fmt |
|
#define pr_fmt(fmt) "secretmem: " fmt |
|
|
|
/* |
|
* Define mode and flag masks to allow validation of the system call |
|
* parameters. |
|
*/ |
|
#define SECRETMEM_MODE_MASK (0x0) |
|
#define SECRETMEM_FLAGS_MASK SECRETMEM_MODE_MASK |
|
|
|
static bool secretmem_enable __ro_after_init; |
|
module_param_named(enable, secretmem_enable, bool, 0400); |
|
MODULE_PARM_DESC(secretmem_enable, |
|
"Enable secretmem and memfd_secret(2) system call"); |
|
|
|
static refcount_t secretmem_users; |
|
|
|
bool secretmem_active(void) |
|
{ |
|
return !!refcount_read(&secretmem_users); |
|
} |
|
|
|
static vm_fault_t secretmem_fault(struct vm_fault *vmf) |
|
{ |
|
struct address_space *mapping = vmf->vma->vm_file->f_mapping; |
|
struct inode *inode = file_inode(vmf->vma->vm_file); |
|
pgoff_t offset = vmf->pgoff; |
|
gfp_t gfp = vmf->gfp_mask; |
|
unsigned long addr; |
|
struct page *page; |
|
int err; |
|
|
|
if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) |
|
return vmf_error(-EINVAL); |
|
|
|
retry: |
|
page = find_lock_page(mapping, offset); |
|
if (!page) { |
|
page = alloc_page(gfp | __GFP_ZERO); |
|
if (!page) |
|
return VM_FAULT_OOM; |
|
|
|
err = set_direct_map_invalid_noflush(page); |
|
if (err) { |
|
put_page(page); |
|
return vmf_error(err); |
|
} |
|
|
|
__SetPageUptodate(page); |
|
err = add_to_page_cache_lru(page, mapping, offset, gfp); |
|
if (unlikely(err)) { |
|
put_page(page); |
|
/* |
|
* If a split of large page was required, it |
|
* already happened when we marked the page invalid |
|
* which guarantees that this call won't fail |
|
*/ |
|
set_direct_map_default_noflush(page); |
|
if (err == -EEXIST) |
|
goto retry; |
|
|
|
return vmf_error(err); |
|
} |
|
|
|
addr = (unsigned long)page_address(page); |
|
flush_tlb_kernel_range(addr, addr + PAGE_SIZE); |
|
} |
|
|
|
vmf->page = page; |
|
return VM_FAULT_LOCKED; |
|
} |
|
|
|
static const struct vm_operations_struct secretmem_vm_ops = { |
|
.fault = secretmem_fault, |
|
}; |
|
|
|
static int secretmem_release(struct inode *inode, struct file *file) |
|
{ |
|
refcount_dec(&secretmem_users); |
|
return 0; |
|
} |
|
|
|
static int secretmem_mmap(struct file *file, struct vm_area_struct *vma) |
|
{ |
|
unsigned long len = vma->vm_end - vma->vm_start; |
|
|
|
if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0) |
|
return -EINVAL; |
|
|
|
if (mlock_future_check(vma->vm_mm, vma->vm_flags | VM_LOCKED, len)) |
|
return -EAGAIN; |
|
|
|
vma->vm_flags |= VM_LOCKED | VM_DONTDUMP; |
|
vma->vm_ops = &secretmem_vm_ops; |
|
|
|
return 0; |
|
} |
|
|
|
bool vma_is_secretmem(struct vm_area_struct *vma) |
|
{ |
|
return vma->vm_ops == &secretmem_vm_ops; |
|
} |
|
|
|
static const struct file_operations secretmem_fops = { |
|
.release = secretmem_release, |
|
.mmap = secretmem_mmap, |
|
}; |
|
|
|
static bool secretmem_isolate_page(struct page *page, isolate_mode_t mode) |
|
{ |
|
return false; |
|
} |
|
|
|
static int secretmem_migratepage(struct address_space *mapping, |
|
struct page *newpage, struct page *page, |
|
enum migrate_mode mode) |
|
{ |
|
return -EBUSY; |
|
} |
|
|
|
static void secretmem_freepage(struct page *page) |
|
{ |
|
set_direct_map_default_noflush(page); |
|
clear_highpage(page); |
|
} |
|
|
|
const struct address_space_operations secretmem_aops = { |
|
.set_page_dirty = __set_page_dirty_no_writeback, |
|
.freepage = secretmem_freepage, |
|
.migratepage = secretmem_migratepage, |
|
.isolate_page = secretmem_isolate_page, |
|
}; |
|
|
|
static struct vfsmount *secretmem_mnt; |
|
|
|
static struct file *secretmem_file_create(unsigned long flags) |
|
{ |
|
struct file *file = ERR_PTR(-ENOMEM); |
|
struct inode *inode; |
|
|
|
inode = alloc_anon_inode(secretmem_mnt->mnt_sb); |
|
if (IS_ERR(inode)) |
|
return ERR_CAST(inode); |
|
|
|
file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem", |
|
O_RDWR, &secretmem_fops); |
|
if (IS_ERR(file)) |
|
goto err_free_inode; |
|
|
|
mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); |
|
mapping_set_unevictable(inode->i_mapping); |
|
|
|
inode->i_mapping->a_ops = &secretmem_aops; |
|
|
|
/* pretend we are a normal file with zero size */ |
|
inode->i_mode |= S_IFREG; |
|
inode->i_size = 0; |
|
|
|
return file; |
|
|
|
err_free_inode: |
|
iput(inode); |
|
return file; |
|
} |
|
|
|
SYSCALL_DEFINE1(memfd_secret, unsigned int, flags) |
|
{ |
|
struct file *file; |
|
int fd, err; |
|
|
|
/* make sure local flags do not confict with global fcntl.h */ |
|
BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC); |
|
|
|
if (!secretmem_enable) |
|
return -ENOSYS; |
|
|
|
if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC)) |
|
return -EINVAL; |
|
|
|
fd = get_unused_fd_flags(flags & O_CLOEXEC); |
|
if (fd < 0) |
|
return fd; |
|
|
|
file = secretmem_file_create(flags); |
|
if (IS_ERR(file)) { |
|
err = PTR_ERR(file); |
|
goto err_put_fd; |
|
} |
|
|
|
file->f_flags |= O_LARGEFILE; |
|
|
|
fd_install(fd, file); |
|
refcount_inc(&secretmem_users); |
|
return fd; |
|
|
|
err_put_fd: |
|
put_unused_fd(fd); |
|
return err; |
|
} |
|
|
|
static int secretmem_init_fs_context(struct fs_context *fc) |
|
{ |
|
return init_pseudo(fc, SECRETMEM_MAGIC) ? 0 : -ENOMEM; |
|
} |
|
|
|
static struct file_system_type secretmem_fs = { |
|
.name = "secretmem", |
|
.init_fs_context = secretmem_init_fs_context, |
|
.kill_sb = kill_anon_super, |
|
}; |
|
|
|
static int secretmem_init(void) |
|
{ |
|
int ret = 0; |
|
|
|
if (!secretmem_enable) |
|
return ret; |
|
|
|
secretmem_mnt = kern_mount(&secretmem_fs); |
|
if (IS_ERR(secretmem_mnt)) |
|
ret = PTR_ERR(secretmem_mnt); |
|
|
|
/* prevent secretmem mappings from ever getting PROT_EXEC */ |
|
secretmem_mnt->mnt_flags |= MNT_NOEXEC; |
|
|
|
return ret; |
|
} |
|
fs_initcall(secretmem_init);
|
|
|