Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
a134e7e
userfaultfd: opportunistic TLB-flush batching for present pages in MOVE
SENSEIIIII Aug 13, 2025
4758ee6
arm64/mm: Elide TLB flush in certain pte protection transitions
Oct 17, 2025
4a8cd89
mm/huge_memory: move more common code into insert_pmd()
davidhildenbrand Aug 11, 2025
98a9172
mm/huge_memory: move more common code into insert_pud()
davidhildenbrand Aug 11, 2025
40610fa
mm/huge_memory: support huge zero folio in vmf_insert_folio_pmd()
davidhildenbrand Aug 11, 2025
5addf72
fs/dax: use vmf_insert_folio_pmd() to insert the huge zero folio
davidhildenbrand Aug 11, 2025
b151934
mm/huge_memory: mark PMD mappings of the huge zero folio special
davidhildenbrand Aug 11, 2025
a27a6d2
powerpc/ptdump: rename "struct pgtable_level" to "struct ptdump_pg_le…
davidhildenbrand Aug 11, 2025
9982277
mm/rmap: do __folio_mod_stat() in __folio_add_rmap()
RichardWeiYang Aug 4, 2025
c2d372c
mm/rmap: convert "enum rmap_level" to "enum pgtable_level"
davidhildenbrand Aug 11, 2025
87b6f7c
mm/memory: convert print_bad_pte() to print_bad_page_map()
davidhildenbrand Aug 11, 2025
9e087c0
mm/memory: factor out common code from vm_normal_page_*()
davidhildenbrand Aug 11, 2025
3b527c6
mm: introduce and use vm_normal_page_pud()
davidhildenbrand Aug 11, 2025
03dffc4
mm: rename vm_ops->find_special_page() to vm_ops->find_normal_page()
davidhildenbrand Aug 11, 2025
f805672
mm/hugetlb: fix hugetlb_pmd_shared()
Dec 23, 2025
027798f
mm/hugetlb: fix two comments related to huge_pmd_unshare()
Dec 23, 2025
f20d122
mm/rmap: fix two comments related to huge_pmd_unshare()
Dec 23, 2025
c012f56
mm/hugetlb: fix excessive IPI broadcasts when unsharing PMD tables us…
Dec 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions arch/arm64/include/asm/tlbflush.h
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,33 @@ static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *b
{
__flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, true, 3);
}

static inline bool __pte_flags_need_flush(ptdesc_t oldval, ptdesc_t newval)
{
ptdesc_t diff = oldval ^ newval;

/* invalid to valid transition requires no flush */
if (!(oldval & PTE_VALID))
return false;

/* Transition in the SW bits requires no flush */
diff &= ~PTE_SWBITS_MASK;

return diff;
}

static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte)
{
return __pte_flags_need_flush(pte_val(oldpte), pte_val(newpte));
}
#define pte_needs_flush pte_needs_flush

static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
{
return __pte_flags_need_flush(pmd_val(oldpmd), pmd_val(newpmd));
}
#define huge_pmd_needs_flush huge_pmd_needs_flush

#endif

#endif
2 changes: 1 addition & 1 deletion arch/powerpc/mm/ptdump/8xx.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ static const struct flag_info flag_array[] = {
}
};

struct pgtable_level pg_level[5] = {
struct ptdump_pg_level pg_level[5] = {
{ /* pgd */
.flag = flag_array,
.num = ARRAY_SIZE(flag_array),
Expand Down
2 changes: 1 addition & 1 deletion arch/powerpc/mm/ptdump/book3s64.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ static const struct flag_info flag_array[] = {
}
};

struct pgtable_level pg_level[5] = {
struct ptdump_pg_level pg_level[5] = {
{ /* pgd */
.flag = flag_array,
.num = ARRAY_SIZE(flag_array),
Expand Down
4 changes: 2 additions & 2 deletions arch/powerpc/mm/ptdump/ptdump.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ struct flag_info {
int shift;
};

struct pgtable_level {
struct ptdump_pg_level {
const struct flag_info *flag;
size_t num;
u64 mask;
};

extern struct pgtable_level pg_level[5];
extern struct ptdump_pg_level pg_level[5];

void pt_dump_size(struct seq_file *m, unsigned long delta);
2 changes: 1 addition & 1 deletion arch/powerpc/mm/ptdump/shared.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ static const struct flag_info flag_array[] = {
}
};

struct pgtable_level pg_level[5] = {
struct ptdump_pg_level pg_level[5] = {
{ /* pgd */
.flag = flag_array,
.num = ARRAY_SIZE(flag_array),
Expand Down
1 change: 1 addition & 0 deletions drivers/xen/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ config XEN_GNTDEV
depends on XEN
default m
select MMU_NOTIFIER
select FIND_NORMAL_PAGE
help
Allows userspace processes to use grants.

Expand Down
5 changes: 3 additions & 2 deletions drivers/xen/gntdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,7 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
BUG_ON(pgnr >= map->count);
pte_maddr = arbitrary_virt_to_machine(pte).maddr;

/* Note: this will perform a pte_mkspecial() through the hypercall. */
gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags,
map->grants[pgnr].ref,
map->grants[pgnr].domid);
Expand Down Expand Up @@ -528,7 +529,7 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
gntdev_put_map(priv, map);
}

static struct page *gntdev_vma_find_special_page(struct vm_area_struct *vma,
static struct page *gntdev_vma_find_normal_page(struct vm_area_struct *vma,
unsigned long addr)
{
struct gntdev_grant_map *map = vma->vm_private_data;
Expand All @@ -539,7 +540,7 @@ static struct page *gntdev_vma_find_special_page(struct vm_area_struct *vma,
static const struct vm_operations_struct gntdev_vmops = {
.open = gntdev_vma_open,
.close = gntdev_vma_close,
.find_special_page = gntdev_vma_find_special_page,
.find_normal_page = gntdev_vma_find_normal_page,
};

/* ------------------------------------------------------------------ */
Expand Down
47 changes: 10 additions & 37 deletions fs/dax.c
Original file line number Diff line number Diff line change
Expand Up @@ -1375,51 +1375,24 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
const struct iomap_iter *iter, void **entry)
{
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
unsigned long pmd_addr = vmf->address & PMD_MASK;
struct vm_area_struct *vma = vmf->vma;
struct inode *inode = mapping->host;
pgtable_t pgtable = NULL;
struct folio *zero_folio;
spinlock_t *ptl;
pmd_t pmd_entry;
unsigned long pfn;
vm_fault_t ret;

zero_folio = mm_get_huge_zero_folio(vmf->vma->vm_mm);

if (unlikely(!zero_folio))
goto fallback;

pfn = page_to_pfn(&zero_folio->page);
*entry = dax_insert_entry(xas, vmf, iter, *entry, pfn,
DAX_PMD | DAX_ZERO_PAGE);

if (arch_needs_pgtable_deposit()) {
pgtable = pte_alloc_one(vma->vm_mm);
if (!pgtable)
return VM_FAULT_OOM;
}

ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
if (!pmd_none(*(vmf->pmd))) {
spin_unlock(ptl);
goto fallback;
if (unlikely(!zero_folio)) {
trace_dax_pmd_load_hole_fallback(inode, vmf, zero_folio, *entry);
return VM_FAULT_FALLBACK;
}

if (pgtable) {
pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
mm_inc_nr_ptes(vma->vm_mm);
}
pmd_entry = folio_mk_pmd(zero_folio, vmf->vma->vm_page_prot);
set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
spin_unlock(ptl);
trace_dax_pmd_load_hole(inode, vmf, zero_folio, *entry);
return VM_FAULT_NOPAGE;
*entry = dax_insert_entry(xas, vmf, iter, *entry, folio_pfn(zero_folio),
DAX_PMD | DAX_ZERO_PAGE);

fallback:
if (pgtable)
pte_free(vma->vm_mm, pgtable);
trace_dax_pmd_load_hole_fallback(inode, vmf, zero_folio, *entry);
return VM_FAULT_FALLBACK;
ret = vmf_insert_folio_pmd(vmf, zero_folio, false);
if (ret == VM_FAULT_NOPAGE)
trace_dax_pmd_load_hole(inode, vmf, zero_folio, *entry);
return ret;
}
#else
static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
Expand Down
77 changes: 75 additions & 2 deletions include/asm-generic/tlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@
*
* The mmu_gather API consists of:
*
* - tlb_gather_mmu() / tlb_gather_mmu_fullmm() / tlb_finish_mmu()
* - tlb_gather_mmu() / tlb_gather_mmu_fullmm() / tlb_gather_mmu_vma() /
* tlb_finish_mmu()
*
* start and finish a mmu_gather
*
Expand Down Expand Up @@ -364,6 +365,20 @@ struct mmu_gather {
unsigned int vma_huge : 1;
unsigned int vma_pfn : 1;

/*
* Did we unshare (unmap) any shared page tables? For now only
* used for hugetlb PMD table sharing.
*/
unsigned int unshared_tables : 1;

/*
* Did we unshare any page tables such that they are now exclusive
* and could get reused+modified by the new owner? When setting this
* flag, "unshared_tables" will be set as well. For now only used
* for hugetlb PMD table sharing.
*/
unsigned int fully_unshared_tables : 1;

unsigned int batch_count;

#ifndef CONFIG_MMU_GATHER_NO_GATHER
Expand Down Expand Up @@ -400,6 +415,7 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb)
tlb->cleared_pmds = 0;
tlb->cleared_puds = 0;
tlb->cleared_p4ds = 0;
tlb->unshared_tables = 0;
/*
* Do not reset mmu_gather::vma_* fields here, we do not
* call into tlb_start_vma() again to set them if there is an
Expand Down Expand Up @@ -484,7 +500,7 @@ static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
* these bits.
*/
if (!(tlb->freed_tables || tlb->cleared_ptes || tlb->cleared_pmds ||
tlb->cleared_puds || tlb->cleared_p4ds))
tlb->cleared_puds || tlb->cleared_p4ds || tlb->unshared_tables))
return;

tlb_flush(tlb);
Expand Down Expand Up @@ -773,6 +789,63 @@ static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
}
#endif

#ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING
static inline void tlb_unshare_pmd_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt,
unsigned long addr)
{
/*
* The caller must make sure that concurrent unsharing + exclusive
* reuse is impossible until tlb_flush_unshared_tables() was called.
*/
VM_WARN_ON_ONCE(!ptdesc_pmd_is_shared(pt));
ptdesc_pmd_pts_dec(pt);

/* Clearing a PUD pointing at a PMD table with PMD leaves. */
tlb_flush_pmd_range(tlb, addr & PUD_MASK, PUD_SIZE);

/*
* If the page table is now exclusively owned, we fully unshared
* a page table.
*/
if (!ptdesc_pmd_is_shared(pt))
tlb->fully_unshared_tables = true;
tlb->unshared_tables = true;
}

static inline void tlb_flush_unshared_tables(struct mmu_gather *tlb)
{
/*
* As soon as the caller drops locks to allow for reuse of
* previously-shared tables, these tables could get modified and
* even reused outside of hugetlb context, so we have to make sure that
* any page table walkers (incl. TLB, GUP-fast) are aware of that
* change.
*
* Even if we are not fully unsharing a PMD table, we must
* flush the TLB for the unsharer now.
*/
if (tlb->unshared_tables)
tlb_flush_mmu_tlbonly(tlb);

/*
* Similarly, we must make sure that concurrent GUP-fast will not
* walk previously-shared page tables that are getting modified+reused
* elsewhere. So broadcast an IPI to wait for any concurrent GUP-fast.
*
* We only perform this when we are the last sharer of a page table,
* as the IPI will reach all CPUs: any GUP-fast.
*
* Note that on configs where tlb_remove_table_sync_one() is a NOP,
* the expectation is that the tlb_flush_mmu_tlbonly() would have issued
* required IPIs already for us.
*/
if (tlb->fully_unshared_tables) {
tlb_remove_table_sync_one();
tlb->fully_unshared_tables = false;
}
}
#endif /* CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING */

#endif /* CONFIG_MMU */

#endif /* _ASM_GENERIC__TLB_H */
17 changes: 11 additions & 6 deletions include/linux/hugetlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,8 +241,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
pte_t *huge_pte_offset(struct mm_struct *mm,
unsigned long addr, unsigned long sz);
unsigned long hugetlb_mask_last_page(struct hstate *h);
int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep);
int huge_pmd_unshare(struct mmu_gather *tlb, struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep);
void huge_pmd_unshare_flush(struct mmu_gather *tlb, struct vm_area_struct *vma);
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end);

Expand Down Expand Up @@ -302,13 +303,17 @@ static inline struct address_space *hugetlb_folio_mapping_lock_write(
return NULL;
}

static inline int huge_pmd_unshare(struct mm_struct *mm,
struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
static inline int huge_pmd_unshare(struct mmu_gather *tlb,
struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
{
return 0;
}

static inline void huge_pmd_unshare_flush(struct mmu_gather *tlb,
struct vm_area_struct *vma)
{
}

static inline void adjust_range_if_pmd_sharing_possible(
struct vm_area_struct *vma,
unsigned long *start, unsigned long *end)
Expand Down Expand Up @@ -1318,7 +1323,7 @@ static inline __init void hugetlb_cma_reserve(int order)
#ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING
static inline bool hugetlb_pmd_shared(pte_t *pte)
{
return page_count(virt_to_page(pte)) > 1;
return ptdesc_pmd_is_shared(virt_to_ptdesc(pte));
}
#else
static inline bool hugetlb_pmd_shared(pte_t *pte)
Expand Down
20 changes: 15 additions & 5 deletions include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -648,13 +648,21 @@ struct vm_operations_struct {
struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
unsigned long addr, pgoff_t *ilx);
#endif
#ifdef CONFIG_FIND_NORMAL_PAGE
/*
* Called by vm_normal_page() for special PTEs to find the
* page for @addr. This is useful if the default behavior
* (using pte_page()) would not find the correct page.
* Called by vm_normal_page() for special PTEs in @vma at @addr. This
* allows for returning a "normal" page from vm_normal_page() even
* though the PTE indicates that the "struct page" either does not exist
* or should not be touched: "special".
*
* Do not add new users: this really only works when a "normal" page
* was mapped, but then the PTE got changed to something weird (+
* marked special) that would not make pte_pfn() identify the originally
* inserted page.
*/
struct page *(*find_special_page)(struct vm_area_struct *vma,
unsigned long addr);
struct page *(*find_normal_page)(struct vm_area_struct *vma,
unsigned long addr);
#endif /* CONFIG_FIND_NORMAL_PAGE */
};

#ifdef CONFIG_NUMA_BALANCING
Expand Down Expand Up @@ -2351,6 +2359,8 @@ struct folio *vm_normal_folio_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t pmd);
struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t pmd);
struct page *vm_normal_page_pud(struct vm_area_struct *vma, unsigned long addr,
pud_t pud);

void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
unsigned long size);
Expand Down
1 change: 1 addition & 0 deletions include/linux/mm_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -1444,6 +1444,7 @@ static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumas
struct mmu_gather;
extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm);
extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
void tlb_gather_mmu_vma(struct mmu_gather *tlb, struct vm_area_struct *vma);
extern void tlb_finish_mmu(struct mmu_gather *tlb);

struct vm_fault;
Expand Down
Loading