mirror of
https://github.com/torvalds/linux.git
synced 2026-01-24 23:16:46 +00:00
mm: introduce copy-on-fork VMAs and make VM_MAYBE_GUARD one
Gather all the VMA flags whose presence implies that page tables must be copied on fork into a single bitmap - VM_COPY_ON_FORK - and use this rather than specifying individual flags in vma_needs_copy(). We also add VM_MAYBE_GUARD to this list, as it being set on a VMA implies that there may be metadata contained in the page tables (that is - guard markers) which would will not and cannot be propagated upon fork. This was already being done manually previously in vma_needs_copy(), but this makes it very explicit, alongside VM_PFNMAP, VM_MIXEDMAP and VM_UFFD_WP all of which imply the same. Note that VM_STICKY flags ought generally to be marked VM_COPY_ON_FORK too - because equally a flag being VM_STICKY indicates that the VMA contains metadat that is not propagated by being faulted in - i.e. that the VMA metadata does not fully describe the VMA alone, and thus we must propagate whatever metadata there is on a fork. However, for maximum flexibility, we do not make this necessarily the case here. Link: https://lkml.kernel.org/r/5d41b24e7bc622cda0af92b6d558d7f4c0d1bc8c.1763460113.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Reviewed-by: Pedro Falcato <pfalcato@suse.de> Reviewed-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: David Hildenbrand (Red Hat) <david@kernel.org> Cc: Andrei Vagin <avagin@gmail.com> Cc: Baolin Wang <baolin.wang@linux.alibaba.com> Cc: Barry Song <baohua@kernel.org> Cc: Dev Jain <dev.jain@arm.com> Cc: Jann Horn <jannh@google.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Lance Yang <lance.yang@linux.dev> Cc: Liam Howlett <liam.howlett@oracle.com> Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Rapoport <rppt@kernel.org> Cc: Nico Pache <npache@redhat.com> Cc: Ryan Roberts <ryan.roberts@arm.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Zi Yan <ziy@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
committed by
Andrew Morton
parent
64212ba02e
commit
ab04b530e7
@@ -555,6 +555,32 @@ extern unsigned int kobjsize(const void *objp);
|
||||
*/
|
||||
#define VM_IGNORE_MERGE (VM_SOFTDIRTY | VM_STICKY)
|
||||
|
||||
/*
|
||||
* Flags which should result in page tables being copied on fork. These are
|
||||
* flags which indicate that the VMA maps page tables which cannot be
|
||||
* reconsistuted upon page fault, so necessitate page table copying upon
|
||||
*
|
||||
* VM_PFNMAP / VM_MIXEDMAP - These contain kernel-mapped data which cannot be
|
||||
* reasonably reconstructed on page fault.
|
||||
*
|
||||
* VM_UFFD_WP - Encodes metadata about an installed uffd
|
||||
* write protect handler, which cannot be
|
||||
* reconstructed on page fault.
|
||||
*
|
||||
* We always copy pgtables when dst_vma has uffd-wp
|
||||
* enabled even if it's file-backed
|
||||
* (e.g. shmem). Because when uffd-wp is enabled,
|
||||
* pgtable contains uffd-wp protection information,
|
||||
* that's something we can't retrieve from page cache,
|
||||
* and skip copying will lose those info.
|
||||
*
|
||||
* VM_MAYBE_GUARD - Could contain page guard region markers which
|
||||
* by design are a property of the page tables
|
||||
* only and thus cannot be reconstructed on page
|
||||
* fault.
|
||||
*/
|
||||
#define VM_COPY_ON_FORK (VM_PFNMAP | VM_MIXEDMAP | VM_UFFD_WP | VM_MAYBE_GUARD)
|
||||
|
||||
/*
|
||||
* mapping from the currently active vm_flags protection bits (the
|
||||
* low four bits) to a page protection mask..
|
||||
|
||||
18
mm/memory.c
18
mm/memory.c
@@ -1463,25 +1463,15 @@ copy_p4d_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
|
||||
static bool
|
||||
vma_needs_copy(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
|
||||
{
|
||||
if (src_vma->vm_flags & VM_COPY_ON_FORK)
|
||||
return true;
|
||||
/*
|
||||
* Always copy pgtables when dst_vma has uffd-wp enabled even if it's
|
||||
* file-backed (e.g. shmem). Because when uffd-wp is enabled, pgtable
|
||||
* contains uffd-wp protection information, that's something we can't
|
||||
* retrieve from page cache, and skip copying will lose those info.
|
||||
* The presence of an anon_vma indicates an anonymous VMA has page
|
||||
* tables which naturally cannot be reconstituted on page fault.
|
||||
*/
|
||||
if (userfaultfd_wp(dst_vma))
|
||||
return true;
|
||||
|
||||
if (src_vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
|
||||
return true;
|
||||
|
||||
if (src_vma->anon_vma)
|
||||
return true;
|
||||
|
||||
/* Guard regions have modified page tables that require copying. */
|
||||
if (src_vma->vm_flags & VM_MAYBE_GUARD)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Don't copy ptes where a page fault will fill them correctly. Fork
|
||||
* becomes much lighter when there are big shared or private readonly
|
||||
|
||||
@@ -145,6 +145,32 @@ extern unsigned long dac_mmap_min_addr;
|
||||
*/
|
||||
#define VM_IGNORE_MERGE (VM_SOFTDIRTY | VM_STICKY)
|
||||
|
||||
/*
|
||||
* Flags which should result in page tables being copied on fork. These are
|
||||
* flags which indicate that the VMA maps page tables which cannot be
|
||||
* reconsistuted upon page fault, so necessitate page table copying upon
|
||||
*
|
||||
* VM_PFNMAP / VM_MIXEDMAP - These contain kernel-mapped data which cannot be
|
||||
* reasonably reconstructed on page fault.
|
||||
*
|
||||
* VM_UFFD_WP - Encodes metadata about an installed uffd
|
||||
* write protect handler, which cannot be
|
||||
* reconstructed on page fault.
|
||||
*
|
||||
* We always copy pgtables when dst_vma has uffd-wp
|
||||
* enabled even if it's file-backed
|
||||
* (e.g. shmem). Because when uffd-wp is enabled,
|
||||
* pgtable contains uffd-wp protection information,
|
||||
* that's something we can't retrieve from page cache,
|
||||
* and skip copying will lose those info.
|
||||
*
|
||||
* VM_MAYBE_GUARD - Could contain page guard region markers which
|
||||
* by design are a property of the page tables
|
||||
* only and thus cannot be reconstructed on page
|
||||
* fault.
|
||||
*/
|
||||
#define VM_COPY_ON_FORK (VM_PFNMAP | VM_MIXEDMAP | VM_UFFD_WP | VM_MAYBE_GUARD)
|
||||
|
||||
#define FIRST_USER_ADDRESS 0UL
|
||||
#define USER_PGTABLES_CEILING 0UL
|
||||
|
||||
|
||||
Reference in New Issue
Block a user