c7285cb6cfdd730df171b0b159975c19c8f38ea9483b95556c11a954.patch - chromiumos/third_party/kernel-rebase-patches - Git at Google

 From 940411fdf169501b442397853e470ba66cc3aa00 Mon Sep 17 00:00:00 2001
 From: Brian Geffon <bgeffon@chromium.org>
 Date: Tue, 17 Aug 2021 17:41:38 +0000
 Subject: [PATCH] CHROMIUM: mm: per-process reclaim

 These day, there are many platforms available in the embedded market
 and they are smarter than kernel which has very limited information
 about working set so they want to involve memory management more heavily
 like android's lowmemory killer and ashmem or recent many lowmemory
 notifier.

 One of the simple imagine scenario about userspace's intelligence is that
 platform can manage tasks as forground and background so it would be
 better to reclaim background's task pages for end-user's *responsibility*
 although it has frequent referenced pages.

 This patch adds new knob "reclaim under proc/<pid>/" so task manager
 can reclaim any target process anytime, anywhere. It could give another
 method to platform for using memory efficiently.

 It can avoid process killing for getting free memory, which was really
 terrible experience because I lost my best score of game I had ever
 after I switch the phone call while I enjoyed the game.

 Reclaim file-backed pages only.
 	echo file > /proc/PID/reclaim
 Reclaim anonymous pages only.
 	echo anon > /proc/PID/reclaim
 Reclaim shmem pages
 	echo shmem > /proc/PID/reclaim
 Reclaim all pages
 	echo all > /proc/PID/reclaim
 	Note: for historical reasons "all" is file and anon only, it
 	does not include shmem.

 [Squashes in 75472663, 13a4a5c]

 BUG=b:195001087
 TEST=build 5.10 for x86_64 test on eve

 Signed-off-by: Brian Geffon <bgeffon@chromium.org>
 Change-Id: I8967866f06ac866f8f5291c585c172756e5a180b
 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/3100666
 Reviewed-by: Joel Fernandes <joelaf@google.com>
 ---
  fs/proc/base.c       |   3 +
  fs/proc/internal.h   |   1 +
  fs/proc/task_mmu.c   | 337 +++++++++++++++++++++++++++++++++++++++++++
  include/linux/rmap.h |   4 +
  mm/Kconfig           |  11 ++
  mm/swap.c            |   1 +
  6 files changed, 357 insertions(+)

 diff --git a/fs/proc/base.c b/fs/proc/base.c
 index 926f07ce11b7aa7c0ab91ac50faf1f3a85f7b370..b4efbb9576b1ee70bd3fca7eed581eb33ddfd284 100644
 --- a/fs/proc/base.c
 +++ b/fs/proc/base.c
 @@ -3286,6 +3286,9 @@ static const struct pid_entry tgid_base_stuff[] = {
  	REG("mounts",     S_IRUGO, proc_mounts_operations),
  	REG("mountinfo",  S_IRUGO, proc_mountinfo_operations),
  	REG("mountstats", S_IRUSR, proc_mountstats_operations),
 +#ifdef CONFIG_PROCESS_RECLAIM
 +	REG("reclaim",    S_IWUGO, proc_reclaim_operations),
 +#endif
  #ifdef CONFIG_PROC_PAGE_MONITOR
  	REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
  	REG("smaps",      S_IRUGO, proc_pid_smaps_operations),
 diff --git a/fs/proc/internal.h b/fs/proc/internal.h
 index b701d0207edf098814a70400850e3675aee448f1..ec95cc8da708a69fe4d61fcf307479b97afe1ac1 100644
 --- a/fs/proc/internal.h
 +++ b/fs/proc/internal.h
 @@ -216,6 +216,7 @@ struct pde_opener {
  extern const struct inode_operations proc_link_inode_operations;
  extern const struct inode_operations proc_pid_link_inode_operations;
  extern const struct super_operations proc_sops;
 +extern const struct file_operations proc_reclaim_operations;

  void proc_init_kmemcache(void);
  void proc_invalidate_siblings_dcache(struct hlist_head *inodes, spinlock_t *lock);
 diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
 index 8a74cdcc9af00f7217f8a5ab3700537b5af6913d..b317bfd43fdeb1b61c346bc4d2ef6b485bffe884 100644
 --- a/fs/proc/task_mmu.c
 +++ b/fs/proc/task_mmu.c
 @@ -19,6 +19,7 @@
  #include <linux/shmem_fs.h>
  #include <linux/uaccess.h>
  #include <linux/pkeys.h>
 +#include <linux/mm_inline.h>

  #include <asm/elf.h>
  #include <asm/tlb.h>
 @@ -1763,6 +1764,342 @@ const struct file_operations proc_pagemap_operations = {
  };
  #endif /* CONFIG_PROC_PAGE_MONITOR */

 +#ifdef CONFIG_PROCESS_RECLAIM
 +enum reclaim_type {
 +	RECLAIM_FILE = 1,
 +	RECLAIM_ANON,
 +	RECLAIM_ALL,
 +	/*
 +	 * For safety and backwards compatability, shmem reclaim mode
 +	 * is only possible by directly using 'shmem', 'all' does not
 +	 * inlcude shmem.
 +	 */
 +	RECLAIM_SHMEM,
 +};
 +
 +struct walk_data {
 +	enum reclaim_type type;
 +};
 +
 +static int deactivate_pte_range(pmd_t *pmd, unsigned long addr,
 +				unsigned long end, struct mm_walk *walk)
 +{
 +	pte_t *orig_pte, *pte, ptent;
 +	spinlock_t *ptl;
 +	struct page *page;
 +	struct vm_area_struct *vma = walk->vma;
 +	struct mm_struct *mm = vma->vm_mm;
 +	unsigned long next = pmd_addr_end(addr, end);
 +
 +	ptl = pmd_trans_huge_lock(pmd, vma);
 +	if (ptl) {
 +		if (!pmd_present(*pmd))
 +			goto huge_unlock;
 +
 +		if (is_huge_zero_pmd(*pmd))
 +			goto huge_unlock;
 +
 +		page = pmd_page(*pmd);
 +		if (page_mapcount(page) > 1)
 +			goto huge_unlock;
 +
 +		if (next - addr != HPAGE_PMD_SIZE) {
 +			int err;
 +
 +			get_page(page);
 +			spin_unlock(ptl);
 +			lock_page(page);
 +			err = split_huge_page(page);
 +			unlock_page(page);
 +			put_page(page);
 +			if (!err)
 +				goto regular_page;
 +			return 0;
 +		}
 +
 +		pmdp_test_and_clear_young(vma, addr, pmd);
 +		deactivate_page(page);
 +huge_unlock:
 +		spin_unlock(ptl);
 +		return 0;
 +	}
 +
 +	if (pmd_trans_unstable(pmd))
 +		return 0;
 +
 +regular_page:
 +	orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 +	for (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) {
 +		ptent = *pte;
 +
 +		if (!pte_present(ptent))
 +			continue;
 +
 +		page = vm_normal_page(vma, addr, ptent);
 +		if (!page)
 +			continue;
 +
 +		if (PageTransCompound(page))  {
 +			if (page_mapcount(page) != 1)
 +				break;
 +			get_page(page);
 +			if (!trylock_page(page)) {
 +				put_page(page);
 +				break;
 +			}
 +			pte_unmap_unlock(orig_pte, ptl);
 +			if (split_huge_page(page)) {
 +				unlock_page(page);
 +				put_page(page);
 +				pte_offset_map_lock(mm, pmd, addr, &ptl);
 +				break;
 +			}
 +			unlock_page(page);
 +			put_page(page);
 +			pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 +			pte--;
 +			addr -= PAGE_SIZE;
 +			continue;
 +		}
 +
 +		VM_BUG_ON_PAGE(PageTransCompound(page), page);
 +
 +		if (page_mapcount(page) > 1)
 +			continue;
 +
 +		ptep_test_and_clear_young(vma, addr, pte);
 +		deactivate_page(page);
 +	}
 +	pte_unmap_unlock(orig_pte, ptl);
 +	cond_resched();
 +	return 0;
 +}
 +
 +
 +static int reclaim_pte_range(pmd_t *pmd, unsigned long addr,
 +				unsigned long end, struct mm_walk *walk)
 +{
 +	pte_t *orig_pte, *pte, ptent;
 +	spinlock_t *ptl;
 +	LIST_HEAD(page_list);
 +	struct page *page;
 +	int isolated = 0;
 +	struct vm_area_struct *vma = walk->vma;
 +	struct walk_data *data = (struct walk_data*)walk->private;
 +	enum reclaim_type type = 0;
 +	struct mm_struct *mm = vma->vm_mm;
 +	unsigned long next = pmd_addr_end(addr, end);
 +
 +	if (data)
 +		type = data->type;
 +
 +	ptl = pmd_trans_huge_lock(pmd, vma);
 +	if (ptl) {
 +		if (!pmd_present(*pmd))
 +			goto huge_unlock;
 +
 +		if (is_huge_zero_pmd(*pmd))
 +			goto huge_unlock;
 +
 +		page = pmd_page(*pmd);
 +		if (type != RECLAIM_SHMEM && page_mapcount(page) > 1)
 +			goto huge_unlock;
 +
 +		if (next - addr != HPAGE_PMD_SIZE) {
 +			int err;
 +
 +			get_page(page);
 +			spin_unlock(ptl);
 +			lock_page(page);
 +			err = split_huge_page(page);
 +			unlock_page(page);
 +			put_page(page);
 +			if (!err)
 +				goto regular_page;
 +			return 0;
 +		}
 +
 +		if (isolate_lru_page(page))
 +			goto huge_unlock;
 +
 +		/* Clear all the references to make sure it gets reclaimed */
 +		pmdp_test_and_clear_young(vma, addr, pmd);
 +		ClearPageReferenced(page);
 +		test_and_clear_page_young(page);
 +		list_add(&page->lru, &page_list);
 +huge_unlock:
 +		spin_unlock(ptl);
 +		reclaim_pages(&page_list);
 +		return 0;
 +	}
 +
 +	if (pmd_trans_unstable(pmd))
 +		return 0;
 +
 +regular_page:
 +	orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 +	for (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) {
 +		ptent = *pte;
 +		if (!pte_present(ptent))
 +			continue;
 +
 +		page = vm_normal_page(vma, addr, ptent);
 +		if (!page)
 +			continue;
 +
 +		if (PageTransCompound(page)) {
 +			if (type != RECLAIM_SHMEM && page_mapcount(page) != 1)
 +				break;
 +			get_page(page);
 +			if (!trylock_page(page)) {
 +				put_page(page);
 +				break;
 +			}
 +			pte_unmap_unlock(orig_pte, ptl);
 +
 +			if (split_huge_page(page)) {
 +				unlock_page(page);
 +				put_page(page);
 +				pte_offset_map_lock(mm, pmd, addr, &ptl);
 +				break;
 +			}
 +			unlock_page(page);
 +			put_page(page);
 +			pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 +			pte--;
 +			addr -= PAGE_SIZE;
 +			continue;
 +		}
 +
 +		VM_BUG_ON_PAGE(PageTransCompound(page), page);
 +
 +		if (!PageLRU(page))
 +			continue;
 +
 +		if (type != RECLAIM_SHMEM && page_mapcount(page) > 1)
 +			continue;
 +
 +		if (isolate_lru_page(page))
 +			continue;
 +
 +		isolated++;
 +		list_add(&page->lru, &page_list);
 +		/* Clear all the references to make sure it gets reclaimed */
 +		ptep_test_and_clear_young(vma, addr, pte);
 +		ClearPageReferenced(page);
 +		test_and_clear_page_young(page);
 +		if (isolated >= SWAP_CLUSTER_MAX) {
 +			pte_unmap_unlock(orig_pte, ptl);
 +			reclaim_pages(&page_list);
 +			isolated = 0;
 +			pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 +			orig_pte = pte;
 +		}
 +	}
 +
 +	pte_unmap_unlock(orig_pte, ptl);
 +	reclaim_pages(&page_list);
 +
 +	cond_resched();
 +	return 0;
 +}
 +
 +static ssize_t reclaim_write(struct file *file, const char __user *buf,
 +				size_t count, loff_t *ppos)
 +{
 +	struct task_struct *task;
 +	char buffer[PROC_NUMBUF];
 +	struct mm_struct *mm;
 +	struct vm_area_struct *vma;
 +	enum reclaim_type type;
 +	char *type_buf;
 +
 +	memset(buffer, 0, sizeof(buffer));
 +	if (count > sizeof(buffer) - 1)
 +		count = sizeof(buffer) - 1;
 +
 +	if (copy_from_user(buffer, buf, count))
 +		return -EFAULT;
 +
 +	type_buf = strstrip(buffer);
 +	if (!strcmp(type_buf, "file"))
 +		type = RECLAIM_FILE;
 +	else if (!strcmp(type_buf, "anon"))
 +		type = RECLAIM_ANON;
 +#ifdef CONFIG_SHMEM
 +	else if (!strcmp(type_buf, "shmem"))
 +		type = RECLAIM_SHMEM;
 +#endif
 +	else if (!strcmp(type_buf, "all"))
 +		type = RECLAIM_ALL;
 +	else
 +		return -EINVAL;
 +
 +	task = get_proc_task(file->f_path.dentry->d_inode);
 +	if (!task)
 +		return -ESRCH;
 +
 +	mm = get_task_mm(task);
 +	if (mm) {
 +		struct mm_walk_ops reclaim_walk = {
 +			.pmd_entry = reclaim_pte_range,
 +		};
 +
 +		struct walk_data reclaim_data = {
 +			.type = type,
 +		};
 +
 +		mmap_read_lock(mm);
 +		for (vma = mm->mmap; vma; vma = vma->vm_next) {
 +			if (is_vm_hugetlb_page(vma))
 +				continue;
 +
 +			if (vma->vm_flags & VM_LOCKED)
 +				continue;
 +
 +			if (type == RECLAIM_ANON && !vma_is_anonymous(vma))
 +				continue;
 +			if ((type == RECLAIM_FILE || type == RECLAIM_SHMEM)
 +					&& vma_is_anonymous(vma)) {
 +				continue;
 +			}
 +
 +			if (vma_is_anonymous(vma) || shmem_file(vma->vm_file)) {
 +				if (get_nr_swap_pages() <= 0 ||
 +					get_mm_counter(mm, MM_ANONPAGES) == 0) {
 +					if (type == RECLAIM_ALL)
 +						continue;
 +					else
 +						break;
 +				}
 +
 +				if (shmem_file(vma->vm_file) && type != RECLAIM_SHMEM) {
 +					continue;
 +				}
 +
 +				reclaim_walk.pmd_entry = reclaim_pte_range;
 +			} else {
 +				reclaim_walk.pmd_entry = deactivate_pte_range;
 +			}
 +
 +			walk_page_range(mm, vma->vm_start, vma->vm_end,
 +					&reclaim_walk, (void*)&reclaim_data);
 +		}
 +		flush_tlb_mm(mm);
 +		mmap_read_unlock(mm);
 +		mmput(mm);
 +	}
 +	put_task_struct(task);
 +
 +	return count;
 +}
 +
 +const struct file_operations proc_reclaim_operations = {
 +	.write		= reclaim_write,
 +	.llseek		= noop_llseek,
 +};
 +#endif
 +
  #ifdef CONFIG_NUMA

  struct numa_maps {
 diff --git a/include/linux/rmap.h b/include/linux/rmap.h
 index bd3504d11b15590f0e41232e234eebc1149baad4..b5e06da7e323d2d363110dafad14659f8e38454f 100644
 --- a/include/linux/rmap.h
 +++ b/include/linux/rmap.h
 @@ -14,6 +14,10 @@
  #include <linux/pagemap.h>
  #include <linux/memremap.h>

 +extern int isolate_lru_page(struct page *page);
 +extern void putback_lru_page(struct page *page);
 +extern unsigned long reclaim_pages(struct list_head *page_list);
 +
  /*
   * The anon_vma heads a list of private "related" vmas, to scan if
   * an anonymous page pointing to this anon_vma needs to be unmapped:
 diff --git a/mm/Kconfig b/mm/Kconfig
 index 14fb4f54df3af0d3cb9d632b525d518bce4bfa57..d5aae867e99d993893e8a1eaec1dcf19079752bd 100644
 --- a/mm/Kconfig
 +++ b/mm/Kconfig
 @@ -590,6 +590,17 @@ config PAGE_REPORTING
  	  those pages to another entity, such as a hypervisor, so that the
  	  memory can be freed within the host for other uses.

 +config PROCESS_RECLAIM
 +	bool "Enable process reclaim"
 +	depends on PROC_FS && MMU
 +	help
 +	 It allows to reclaim pages of the process by /proc/pid/reclaim.
 +
 +	 (echo file > /proc/PID/reclaim) reclaims file-backed pages only.
 +	 (echo anon > /proc/PID/reclaim) reclaims anonymous pages only.
 +	 (echo all > /proc/PID/reclaim) reclaims all pages.
 +
 +	 Any other value is ignored.
  #
  # support for page migration
  #
 diff --git a/mm/swap.c b/mm/swap.c
 index 955930f41d20c6d491bb3cf0347086443f16b4c6..711aa04043f433cb413325874dca3da39a943630 100644
 --- a/mm/swap.c
 +++ b/mm/swap.c
 @@ -624,6 +624,7 @@ static void lru_deactivate_fn(struct lruvec *lruvec, struct folio *folio)
  		lruvec_del_folio(lruvec, folio);
  		folio_clear_active(folio);
  		folio_clear_referenced(folio);
 +		folio_test_clear_young(folio);
  		lruvec_add_folio(lruvec, folio);

  		__count_vm_events(PGDEACTIVATE, nr_pages);
 --
 2.38.1.584.g0f3c55d4c2-goog
	From 940411fdf169501b442397853e470ba66cc3aa00 Mon Sep 17 00:00:00 2001
	From: Brian Geffon <bgeffon@chromium.org>
	Date: Tue, 17 Aug 2021 17:41:38 +0000
	Subject: [PATCH] CHROMIUM: mm: per-process reclaim

	These day, there are many platforms available in the embedded market
	and they are smarter than kernel which has very limited information
	about working set so they want to involve memory management more heavily
	like android's lowmemory killer and ashmem or recent many lowmemory
	notifier.

	One of the simple imagine scenario about userspace's intelligence is that
	platform can manage tasks as forground and background so it would be
	better to reclaim background's task pages for end-user's responsibility
	although it has frequent referenced pages.

	This patch adds new knob "reclaim under proc/<pid>/" so task manager
	can reclaim any target process anytime, anywhere. It could give another
	method to platform for using memory efficiently.

	It can avoid process killing for getting free memory, which was really
	terrible experience because I lost my best score of game I had ever
	after I switch the phone call while I enjoyed the game.

	Reclaim file-backed pages only.
	echo file > /proc/PID/reclaim
	Reclaim anonymous pages only.
	echo anon > /proc/PID/reclaim
	Reclaim shmem pages
	echo shmem > /proc/PID/reclaim
	Reclaim all pages
	echo all > /proc/PID/reclaim
	Note: for historical reasons "all" is file and anon only, it
	does not include shmem.

	[Squashes in 75472663, 13a4a5c]

	BUG=b:195001087
	TEST=build 5.10 for x86_64 test on eve

	Signed-off-by: Brian Geffon <bgeffon@chromium.org>
	Change-Id: I8967866f06ac866f8f5291c585c172756e5a180b
	Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/3100666
	Reviewed-by: Joel Fernandes <joelaf@google.com>
	---
	fs/proc/base.c \| 3 +
	fs/proc/internal.h \| 1 +
	fs/proc/task_mmu.c \| 337 +++++++++++++++++++++++++++++++++++++++++++
	include/linux/rmap.h \| 4 +
	mm/Kconfig \| 11 ++
	mm/swap.c \| 1 +
	6 files changed, 357 insertions(+)

	diff --git a/fs/proc/base.c b/fs/proc/base.c
	index 926f07ce11b7aa7c0ab91ac50faf1f3a85f7b370..b4efbb9576b1ee70bd3fca7eed581eb33ddfd284 100644
	--- a/fs/proc/base.c
	+++ b/fs/proc/base.c
	@@ -3286,6 +3286,9 @@ static const struct pid_entry tgid_base_stuff[] = {
	REG("mounts", S_IRUGO, proc_mounts_operations),
	REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
	REG("mountstats", S_IRUSR, proc_mountstats_operations),
	+#ifdef CONFIG_PROCESS_RECLAIM
	+ REG("reclaim", S_IWUGO, proc_reclaim_operations),
	+#endif
	#ifdef CONFIG_PROC_PAGE_MONITOR
	REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
	REG("smaps", S_IRUGO, proc_pid_smaps_operations),
	diff --git a/fs/proc/internal.h b/fs/proc/internal.h
	index b701d0207edf098814a70400850e3675aee448f1..ec95cc8da708a69fe4d61fcf307479b97afe1ac1 100644
	--- a/fs/proc/internal.h
	+++ b/fs/proc/internal.h
	@@ -216,6 +216,7 @@ struct pde_opener {
	extern const struct inode_operations proc_link_inode_operations;
	extern const struct inode_operations proc_pid_link_inode_operations;
	extern const struct super_operations proc_sops;
	+extern const struct file_operations proc_reclaim_operations;

	void proc_init_kmemcache(void);
	void proc_invalidate_siblings_dcache(struct hlist_head inodes, spinlock_t lock);
	diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
	index 8a74cdcc9af00f7217f8a5ab3700537b5af6913d..b317bfd43fdeb1b61c346bc4d2ef6b485bffe884 100644
	--- a/fs/proc/task_mmu.c
	+++ b/fs/proc/task_mmu.c
	@@ -19,6 +19,7 @@
	#include <linux/shmem_fs.h>
	#include <linux/uaccess.h>
	#include <linux/pkeys.h>
	+#include <linux/mm_inline.h>

	#include <asm/elf.h>
	#include <asm/tlb.h>
	@@ -1763,6 +1764,342 @@ const struct file_operations proc_pagemap_operations = {
	};
	#endif /* CONFIG_PROC_PAGE_MONITOR */

	+#ifdef CONFIG_PROCESS_RECLAIM
	+enum reclaim_type {
	+ RECLAIM_FILE = 1,
	+ RECLAIM_ANON,
	+ RECLAIM_ALL,
	+ /*
	+ * For safety and backwards compatability, shmem reclaim mode
	+ * is only possible by directly using 'shmem', 'all' does not
	+ * inlcude shmem.
	+ */
	+ RECLAIM_SHMEM,
	+};
	+
	+struct walk_data {
	+ enum reclaim_type type;
	+};
	+
	+static int deactivate_pte_range(pmd_t *pmd, unsigned long addr,
	+ unsigned long end, struct mm_walk *walk)
	+{
	+ pte_t orig_pte, pte, ptent;
	+ spinlock_t *ptl;
	+ struct page *page;
	+ struct vm_area_struct *vma = walk->vma;
	+ struct mm_struct *mm = vma->vm_mm;
	+ unsigned long next = pmd_addr_end(addr, end);
	+
	+ ptl = pmd_trans_huge_lock(pmd, vma);
	+ if (ptl) {
	+ if (!pmd_present(*pmd))
	+ goto huge_unlock;
	+
	+ if (is_huge_zero_pmd(*pmd))
	+ goto huge_unlock;
	+
	+ page = pmd_page(*pmd);
	+ if (page_mapcount(page) > 1)
	+ goto huge_unlock;
	+
	+ if (next - addr != HPAGE_PMD_SIZE) {
	+ int err;
	+
	+ get_page(page);
	+ spin_unlock(ptl);
	+ lock_page(page);
	+ err = split_huge_page(page);
	+ unlock_page(page);
	+ put_page(page);
	+ if (!err)
	+ goto regular_page;
	+ return 0;
	+ }
	+
	+ pmdp_test_and_clear_young(vma, addr, pmd);
	+ deactivate_page(page);
	+huge_unlock:
	+ spin_unlock(ptl);
	+ return 0;
	+ }
	+
	+ if (pmd_trans_unstable(pmd))
	+ return 0;
	+
	+regular_page:
	+ orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
	+ for (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) {
	+ ptent = *pte;
	+
	+ if (!pte_present(ptent))
	+ continue;
	+
	+ page = vm_normal_page(vma, addr, ptent);
	+ if (!page)
	+ continue;
	+
	+ if (PageTransCompound(page)) {
	+ if (page_mapcount(page) != 1)
	+ break;
	+ get_page(page);
	+ if (!trylock_page(page)) {
	+ put_page(page);
	+ break;
	+ }
	+ pte_unmap_unlock(orig_pte, ptl);
	+ if (split_huge_page(page)) {
	+ unlock_page(page);
	+ put_page(page);
	+ pte_offset_map_lock(mm, pmd, addr, &ptl);
	+ break;
	+ }
	+ unlock_page(page);
	+ put_page(page);
	+ pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
	+ pte--;
	+ addr -= PAGE_SIZE;
	+ continue;
	+ }
	+
	+ VM_BUG_ON_PAGE(PageTransCompound(page), page);
	+
	+ if (page_mapcount(page) > 1)
	+ continue;
	+
	+ ptep_test_and_clear_young(vma, addr, pte);
	+ deactivate_page(page);
	+ }
	+ pte_unmap_unlock(orig_pte, ptl);
	+ cond_resched();
	+ return 0;
	+}
	+
	+
	+static int reclaim_pte_range(pmd_t *pmd, unsigned long addr,
	+ unsigned long end, struct mm_walk *walk)
	+{
	+ pte_t orig_pte, pte, ptent;
	+ spinlock_t *ptl;
	+ LIST_HEAD(page_list);
	+ struct page *page;
	+ int isolated = 0;
	+ struct vm_area_struct *vma = walk->vma;
	+ struct walk_data data = (struct walk_data)walk->private;
	+ enum reclaim_type type = 0;
	+ struct mm_struct *mm = vma->vm_mm;
	+ unsigned long next = pmd_addr_end(addr, end);
	+
	+ if (data)
	+ type = data->type;
	+
	+ ptl = pmd_trans_huge_lock(pmd, vma);
	+ if (ptl) {
	+ if (!pmd_present(*pmd))
	+ goto huge_unlock;
	+
	+ if (is_huge_zero_pmd(*pmd))
	+ goto huge_unlock;
	+
	+ page = pmd_page(*pmd);
	+ if (type != RECLAIM_SHMEM && page_mapcount(page) > 1)
	+ goto huge_unlock;
	+
	+ if (next - addr != HPAGE_PMD_SIZE) {
	+ int err;
	+
	+ get_page(page);
	+ spin_unlock(ptl);
	+ lock_page(page);
	+ err = split_huge_page(page);
	+ unlock_page(page);
	+ put_page(page);
	+ if (!err)
	+ goto regular_page;
	+ return 0;
	+ }
	+
	+ if (isolate_lru_page(page))
	+ goto huge_unlock;
	+
	+ /* Clear all the references to make sure it gets reclaimed */
	+ pmdp_test_and_clear_young(vma, addr, pmd);
	+ ClearPageReferenced(page);
	+ test_and_clear_page_young(page);
	+ list_add(&page->lru, &page_list);
	+huge_unlock:
	+ spin_unlock(ptl);
	+ reclaim_pages(&page_list);
	+ return 0;
	+ }
	+
	+ if (pmd_trans_unstable(pmd))
	+ return 0;
	+
	+regular_page:
	+ orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
	+ for (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) {
	+ ptent = *pte;
	+ if (!pte_present(ptent))
	+ continue;
	+
	+ page = vm_normal_page(vma, addr, ptent);
	+ if (!page)
	+ continue;
	+
	+ if (PageTransCompound(page)) {
	+ if (type != RECLAIM_SHMEM && page_mapcount(page) != 1)
	+ break;
	+ get_page(page);
	+ if (!trylock_page(page)) {
	+ put_page(page);
	+ break;
	+ }
	+ pte_unmap_unlock(orig_pte, ptl);
	+
	+ if (split_huge_page(page)) {
	+ unlock_page(page);
	+ put_page(page);
	+ pte_offset_map_lock(mm, pmd, addr, &ptl);
	+ break;
	+ }
	+ unlock_page(page);
	+ put_page(page);
	+ pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
	+ pte--;
	+ addr -= PAGE_SIZE;
	+ continue;
	+ }
	+
	+ VM_BUG_ON_PAGE(PageTransCompound(page), page);
	+
	+ if (!PageLRU(page))
	+ continue;
	+
	+ if (type != RECLAIM_SHMEM && page_mapcount(page) > 1)
	+ continue;
	+
	+ if (isolate_lru_page(page))
	+ continue;
	+
	+ isolated++;
	+ list_add(&page->lru, &page_list);
	+ /* Clear all the references to make sure it gets reclaimed */
	+ ptep_test_and_clear_young(vma, addr, pte);
	+ ClearPageReferenced(page);
	+ test_and_clear_page_young(page);
	+ if (isolated >= SWAP_CLUSTER_MAX) {
	+ pte_unmap_unlock(orig_pte, ptl);
	+ reclaim_pages(&page_list);
	+ isolated = 0;
	+ pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
	+ orig_pte = pte;
	+ }
	+ }
	+
	+ pte_unmap_unlock(orig_pte, ptl);
	+ reclaim_pages(&page_list);
	+
	+ cond_resched();
	+ return 0;
	+}
	+
	+static ssize_t reclaim_write(struct file file, const char __user buf,
	+ size_t count, loff_t *ppos)
	+{
	+ struct task_struct *task;
	+ char buffer[PROC_NUMBUF];
	+ struct mm_struct *mm;
	+ struct vm_area_struct *vma;
	+ enum reclaim_type type;
	+ char *type_buf;
	+
	+ memset(buffer, 0, sizeof(buffer));
	+ if (count > sizeof(buffer) - 1)
	+ count = sizeof(buffer) - 1;
	+
	+ if (copy_from_user(buffer, buf, count))
	+ return -EFAULT;
	+
	+ type_buf = strstrip(buffer);
	+ if (!strcmp(type_buf, "file"))
	+ type = RECLAIM_FILE;
	+ else if (!strcmp(type_buf, "anon"))
	+ type = RECLAIM_ANON;
	+#ifdef CONFIG_SHMEM
	+ else if (!strcmp(type_buf, "shmem"))
	+ type = RECLAIM_SHMEM;
	+#endif
	+ else if (!strcmp(type_buf, "all"))
	+ type = RECLAIM_ALL;
	+ else
	+ return -EINVAL;
	+
	+ task = get_proc_task(file->f_path.dentry->d_inode);
	+ if (!task)
	+ return -ESRCH;
	+
	+ mm = get_task_mm(task);
	+ if (mm) {
	+ struct mm_walk_ops reclaim_walk = {
	+ .pmd_entry = reclaim_pte_range,
	+ };
	+
	+ struct walk_data reclaim_data = {
	+ .type = type,
	+ };
	+
	+ mmap_read_lock(mm);
	+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
	+ if (is_vm_hugetlb_page(vma))
	+ continue;
	+
	+ if (vma->vm_flags & VM_LOCKED)
	+ continue;
	+
	+ if (type == RECLAIM_ANON && !vma_is_anonymous(vma))
	+ continue;
	+ if ((type == RECLAIM_FILE \|\| type == RECLAIM_SHMEM)
	+ && vma_is_anonymous(vma)) {
	+ continue;
	+ }
	+
	+ if (vma_is_anonymous(vma) \|\| shmem_file(vma->vm_file)) {
	+ if (get_nr_swap_pages() <= 0 \|\|
	+ get_mm_counter(mm, MM_ANONPAGES) == 0) {
	+ if (type == RECLAIM_ALL)
	+ continue;
	+ else
	+ break;
	+ }
	+
	+ if (shmem_file(vma->vm_file) && type != RECLAIM_SHMEM) {
	+ continue;
	+ }
	+
	+ reclaim_walk.pmd_entry = reclaim_pte_range;
	+ } else {
	+ reclaim_walk.pmd_entry = deactivate_pte_range;
	+ }
	+
	+ walk_page_range(mm, vma->vm_start, vma->vm_end,
	+ &reclaim_walk, (void*)&reclaim_data);
	+ }
	+ flush_tlb_mm(mm);
	+ mmap_read_unlock(mm);
	+ mmput(mm);
	+ }
	+ put_task_struct(task);
	+
	+ return count;
	+}
	+
	+const struct file_operations proc_reclaim_operations = {
	+ .write = reclaim_write,
	+ .llseek = noop_llseek,
	+};
	+#endif
	+
	#ifdef CONFIG_NUMA

	struct numa_maps {
	diff --git a/include/linux/rmap.h b/include/linux/rmap.h
	index bd3504d11b15590f0e41232e234eebc1149baad4..b5e06da7e323d2d363110dafad14659f8e38454f 100644
	--- a/include/linux/rmap.h
	+++ b/include/linux/rmap.h
	@@ -14,6 +14,10 @@
	#include <linux/pagemap.h>
	#include <linux/memremap.h>

	+extern int isolate_lru_page(struct page *page);
	+extern void putback_lru_page(struct page *page);
	+extern unsigned long reclaim_pages(struct list_head *page_list);
	+
	/*
	* The anon_vma heads a list of private "related" vmas, to scan if
	* an anonymous page pointing to this anon_vma needs to be unmapped:
	diff --git a/mm/Kconfig b/mm/Kconfig
	index 14fb4f54df3af0d3cb9d632b525d518bce4bfa57..d5aae867e99d993893e8a1eaec1dcf19079752bd 100644
	--- a/mm/Kconfig
	+++ b/mm/Kconfig
	@@ -590,6 +590,17 @@ config PAGE_REPORTING
	those pages to another entity, such as a hypervisor, so that the
	memory can be freed within the host for other uses.

	+config PROCESS_RECLAIM
	+ bool "Enable process reclaim"
	+ depends on PROC_FS && MMU
	+ help
	+ It allows to reclaim pages of the process by /proc/pid/reclaim.
	+
	+ (echo file > /proc/PID/reclaim) reclaims file-backed pages only.
	+ (echo anon > /proc/PID/reclaim) reclaims anonymous pages only.
	+ (echo all > /proc/PID/reclaim) reclaims all pages.
	+
	+ Any other value is ignored.
	#
	# support for page migration
	#
	diff --git a/mm/swap.c b/mm/swap.c
	index 955930f41d20c6d491bb3cf0347086443f16b4c6..711aa04043f433cb413325874dca3da39a943630 100644
	--- a/mm/swap.c
	+++ b/mm/swap.c
	@@ -624,6 +624,7 @@ static void lru_deactivate_fn(struct lruvec lruvec, struct folio folio)
	lruvec_del_folio(lruvec, folio);
	folio_clear_active(folio);
	folio_clear_referenced(folio);
	+ folio_test_clear_young(folio);
	lruvec_add_folio(lruvec, folio);

	__count_vm_events(PGDEACTIVATE, nr_pages);
	--
	2.38.1.584.g0f3c55d4c2-goog