Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Documentation/sysctl/vm.txt
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,8 @@ To free reclaimable slab objects (includes dentries and inodes):
echo 2 > /proc/sys/vm/drop_caches
To free slab objects and pagecache:
echo 3 > /proc/sys/vm/drop_caches
To scrape LRU pages from offlined memcgs:
echo 8 > /proc/sys/vm/drop_caches

This is a non-destructive operation and will not free any dirty objects.
To increase the number of objects freed by this operation, the user may run
Expand All @@ -249,6 +251,14 @@ used:
These are informational only. They do not mean that anything is wrong
with your system. To disable them, echo 4 (bit 3) into drop_caches.

Note that for offlined memcgs, kmem (slab) is reparented so that it
does not hold refcnts which would in turn prevent those memcgs from
being released. However, reparenting does not apply to LRU pages
(pagecache), and therefore they need to be scraped as well for
offlined memcgs. "echo 8" was introduced for this reason. And unlike
"echo 1", it does not have performance impact on online memcgs in
terms of zapping pagecache.

==============================================================

extfrag_threshold
Expand Down
20 changes: 20 additions & 0 deletions fs/drop_caches.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#include <linux/writeback.h>
#include <linux/sysctl.h>
#include <linux/gfp.h>
#include <linux/memcontrol.h>
#include <linux/backing-dev.h>
#include "internal.h"

/* A global variable is a bit ugly, but it keeps the code simple */
Expand Down Expand Up @@ -66,6 +68,24 @@ int drop_caches_sysctl_handler(struct ctl_table *table, int write,
drop_slab();
count_vm_event(DROP_SLAB);
}
if (sysctl_drop_caches & 8) {
int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
unsigned long target = offlined_memcg_nr_pages();

while (nr_retries) {
unsigned long progress = scrape_offlined_memcgs(target);

if (progress >= target)
break;

if (!progress) {
congestion_wait(BLK_RW_ASYNC, HZ / 10);
nr_retries--;
}

target -= progress;
}
}
if (!stfu) {
pr_info("%s (%d): drop_caches: %d\n",
current->comm, task_pid_nr(current),
Expand Down
22 changes: 22 additions & 0 deletions include/linux/memcontrol.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ struct mem_cgroup_reclaim_cookie {
unsigned int generation;
};

#define MEM_CGROUP_RECLAIM_RETRIES 5

#ifdef CONFIG_MEMCG

#define MEM_CGROUP_ID_SHIFT 16
Expand Down Expand Up @@ -1150,6 +1152,15 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
gfp_t gfp_mask,
unsigned long *total_scanned);

static inline unsigned long offlined_memcg_nr_pages(void)
{
extern atomic_t nr_offlined_memcgs;

return atomic_read(&nr_offlined_memcgs) * MEMCG_CHARGE_BATCH;
}

unsigned long scrape_offlined_memcgs(unsigned long nr_to_reclaim);

#else /* CONFIG_MEMCG */

#define MEM_CGROUP_ID_SHIFT 0
Expand Down Expand Up @@ -1526,6 +1537,17 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
{
return 0;
}

static inline unsigned long offlined_memcg_nr_pages(void)
{
return 0;
}

static inline unsigned long scrape_offlined_memcgs(unsigned long nr_to_reclaim)
{
return 0;
}

#endif /* CONFIG_MEMCG */

static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx)
Expand Down
3 changes: 2 additions & 1 deletion kernel/sysctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ static int sixty = 60;
static int __maybe_unused neg_one = -1;
static int __maybe_unused two = 2;
static int __maybe_unused four = 4;
static int __maybe_unused eight = 8;
static unsigned long zero_ul;
static unsigned long one_ul = 1;
static unsigned long long_max = LONG_MAX;
Expand Down Expand Up @@ -1483,7 +1484,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = drop_caches_sysctl_handler,
.extra1 = SYSCTL_ONE,
.extra2 = &four,
.extra2 = &eight,
},
#ifdef CONFIG_COMPACTION
{
Expand Down
6 changes: 6 additions & 0 deletions mm/memcontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -5594,6 +5594,8 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
return 0;
}

atomic_t nr_offlined_memcgs = ATOMIC_INIT(0);

static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
Expand Down Expand Up @@ -5621,13 +5623,17 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
drain_all_stock(memcg);

memcg_percpu_stats_disable(memcg);

atomic_inc(&nr_offlined_memcgs);
}

static void mem_cgroup_css_released(struct cgroup_subsys_state *css)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);

invalidate_reclaim_iterators(memcg);

atomic_dec(&nr_offlined_memcgs);
}

static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
Expand Down
34 changes: 34 additions & 0 deletions mm/vmscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@ struct scan_control {
/* The file pages on the current node are dangerously low */
unsigned int file_is_tiny:1;

/* Scrape LRU pages from offlined memcgs */
unsigned int scrape_offlined_memcgs:1;

/* Allocation order */
s8 order;

Expand Down Expand Up @@ -3034,6 +3037,9 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
memcg_memory_event(memcg, MEMCG_LOW);
}

if (sc->scrape_offlined_memcgs && mem_cgroup_online(memcg))
continue;

reclaimed = sc->nr_reclaimed;
scanned = sc->nr_scanned;

Expand Down Expand Up @@ -4736,3 +4742,31 @@ void check_move_unevictable_pages(struct pagevec *pvec)
}
}
EXPORT_SYMBOL_GPL(check_move_unevictable_pages);

#ifdef CONFIG_MEMCG
unsigned long scrape_offlined_memcgs(unsigned long nr_to_reclaim)
{
unsigned int flags;
unsigned long nr_reclaimed;
struct scan_control sc = {
.nr_to_reclaim = max(nr_to_reclaim, SWAP_CLUSTER_MAX),
.gfp_mask = GFP_KERNEL,
.target_mem_cgroup = root_mem_cgroup,
.reclaim_idx = MAX_NR_ZONES - 1,
.may_writepage = true,
.may_unmap = true,
.scrape_offlined_memcgs = true,
};
struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);

set_task_reclaim_state(current, &sc.reclaim_state);
flags = memalloc_noreclaim_save();

nr_reclaimed = do_try_to_free_pages(zonelist, &sc);

memalloc_noreclaim_restore(flags);
set_task_reclaim_state(current, NULL);

return nr_reclaimed;
}
#endif