From e4d131aaa669b78184ef919136b27a618896299b Mon Sep 17 00:00:00 2001 From: Masahito S Date: Sun, 27 Jul 2025 01:49:55 +0900 Subject: [PATCH] linux6.12.44-kcompressd-unofficial-0.5 --- include/linux/mmzone.h | 7 +++ include/linux/swap.h | 2 + kernel/sysctl.c | 12 ++++ mm/mm_init.c | 1 + mm/page_io.c | 137 +++++++++++++++++++++++++++++++++++++++++ mm/swap.h | 6 ++ mm/vmscan.c | 27 ++++++++ 7 files changed, 192 insertions(+) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 80bc5640bb..3e23a92228 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -23,6 +23,7 @@ #include #include #include +#include #include /* Free memory management - zoned buddy allocator. */ @@ -1355,6 +1356,12 @@ typedef struct pglist_data { int kswapd_failures; /* Number of 'reclaimed == 0' runs */ +#define KCOMPRESS_FIFO_SIZE 256 + wait_queue_head_t kcompressd_wait; + struct task_struct *kcompressd; + struct kfifo kcompress_fifo; + spinlock_t kcompress_fifo_lock; + #ifdef CONFIG_COMPACTION int kcompactd_max_order; enum zone_type kcompactd_highest_zoneidx; diff --git a/include/linux/swap.h b/include/linux/swap.h index f3e0ac20c2..d9049ee1ed 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -699,5 +699,7 @@ static inline bool mem_cgroup_swap_full(struct folio *folio) } #endif +extern int vm_kcompressd; + #endif /* __KERNEL__*/ #endif /* _LINUX_SWAP_H */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 79e6cb1d5c..bd856f5aa3 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2031,6 +2031,9 @@ static struct ctl_table kern_table[] = { #endif }; +int vm_kcompressd = 24; +static int SYSCTL_KCOMPRESS_FIFO_SIZE = KCOMPRESS_FIFO_SIZE; + static struct ctl_table vm_table[] = { { .procname = "overcommit_memory", @@ -2081,6 +2084,15 @@ static struct ctl_table vm_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO_HUNDRED, }, + { + .procname = "kcompressd", + .data = &vm_kcompressd, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &SYSCTL_KCOMPRESS_FIFO_SIZE, + }, #ifdef CONFIG_NUMA { .procname = "numa_stat", diff --git a/mm/mm_init.c b/mm/mm_init.c index 4ba5607aaf..770ed9cf7d 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1371,6 +1371,7 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat) pgdat_init_kcompactd(pgdat); init_waitqueue_head(&pgdat->kswapd_wait); + init_waitqueue_head(&pgdat->kcompressd_wait); init_waitqueue_head(&pgdat->pfmemalloc_wait); for (i = 0; i < NR_VMSCAN_THROTTLE; i++) diff --git a/mm/page_io.c b/mm/page_io.c index 01749b99fb..dfb9668653 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include "swap.h" static void __end_swap_bio_write(struct bio *bio) @@ -234,6 +236,101 @@ static void swap_zeromap_folio_clear(struct folio *folio) } } +/* + * do_swapout() - Write a folio to swap space + * @folio: The folio to write out + * + * This function writes the folio to swap space, either using zswap or + * synchronous write. It ensures that the folio is unlocked and the + * reference count is decremented after the operation. + */ +static inline void do_swapout(struct folio *folio) +{ + struct writeback_control wbc = { + .sync_mode = WB_SYNC_NONE, + .nr_to_write = SWAP_CLUSTER_MAX, + .range_start = 0, + .range_end = LLONG_MAX, + .for_reclaim = 1, + }; + + if (zswap_store(folio)) { + folio_unlock(folio); + } else + __swap_writepage(folio, &wbc); /* Implies folio_unlock(folio) */ + + /* Decrement the folio reference count */ + folio_put(folio); +} + +/* + * kcompressd_store() - Off-load folio compression to kcompressd + * @folio: The folio to compress + * + * This function attempts to off-load the compression of the folio to + * kcompressd. If kcompressd is not available or the folio cannot be + * compressed, it falls back to synchronous write. + * + * Returns true if the folio was successfully queued for compression, + * false otherwise. + */ +static bool kcompressd_store(struct folio *folio) +{ + pg_data_t *pgdat = NODE_DATA(numa_node_id()); + unsigned int ret, sysctl_kcompressd = vm_kcompressd; + struct folio *head = NULL; + + /* Only kswapd can use kcompressd */ + if (!current_is_kswapd()) + return false; + + /* kcompressd must be enabled and running */ + if (!sysctl_kcompressd || unlikely(!pgdat->kcompressd)) + return false; + + /* We can only off-load anon folios */ + if (!folio_test_anon(folio)) + return false; + + /* Fall back to synchronously return AOP_WRITEPAGE_ACTIVATE */ + if (!mem_cgroup_zswap_writeback_enabled(folio_memcg(folio))) + return false; + + /* Swap device must be sync-efficient */ + if (!zswap_is_enabled() && + !data_race(swp_swap_info(folio->swap)->flags & SWP_SYNCHRONOUS_IO)) + return false; + + /* If the kcompress_fifo is full, we must swap out the head + * folio to make space for the new folio. + */ + scoped_guard(spinlock_irqsave, &pgdat->kcompress_fifo_lock) + if (kfifo_len(&pgdat->kcompress_fifo) >= sysctl_kcompressd * sizeof(folio) && + unlikely(!kfifo_out(&pgdat->kcompress_fifo, &head, sizeof(folio)))) + /* Can't dequeue the head folio. Fall back to synchronous write. */ + return false; + + /* Increment the folio reference count to avoid it being freed */ + folio_get(folio); + + /* Enqueue the folio for compression */ + ret = kfifo_in(&pgdat->kcompress_fifo, &folio, sizeof(folio)); + if (likely(ret)) + /* We successfully enqueued the folio. wake up kcompressd */ + wake_up_interruptible(&pgdat->kcompressd_wait); + else + /* Enqueue failed, so we must cancel the reference count */ + folio_put(folio); + + /* If we had to swap out the head folio, do it now. + * This will block until the folio is written out. + */ + if (head) + do_swapout(head); + + return ret; +} + /* * We may have stale swap cache pages in memory: notice * them here and get rid of the unnecessary final write. @@ -276,6 +373,15 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) */ swap_zeromap_folio_clear(folio); } + + /* + * Compression within zswap and zram might block rmap, unmap + * of both file and anon pages, try to do compression async + * if possible + */ + if (kcompressd_store(folio)) + return 0; + if (zswap_store(folio)) { folio_unlock(folio); return 0; @@ -289,6 +395,37 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) return 0; } +/* + * kcompressd() - Kernel thread for compressing folios + * @p: Pointer to pg_data_t structure + * + * This function runs in a kernel thread and waits for folios to be + * queued for compression. It processes the folios by calling do_swapout() + * on them, which handles the actual writing to swap space. + */ +int kcompressd(void *p) +{ + pg_data_t *pgdat = (pg_data_t *)p; + struct folio *folio; + /* * kcompressd runs with PF_MEMALLOC and PF_KSWAPD flags set to + * allow it to allocate memory for compression without being + * restricted by the current memory allocation context. + * Also PF_KSWAPD prevents Intel Graphics driver from crashing + * the system in i915_gem_shrinker.c:i915_gem_shrinker_scan() + */ + current->flags |= PF_MEMALLOC | PF_KSWAPD; + + while (!kthread_should_stop()) { + wait_event_interruptible(pgdat->kcompressd_wait, + !kfifo_is_empty(&pgdat->kcompress_fifo)); + + while (kfifo_out_locked(&pgdat->kcompress_fifo, + &folio, sizeof(folio), &pgdat->kcompress_fifo_lock)) + do_swapout(folio); + } + return 0; +} + static inline void count_swpout_vm_event(struct folio *folio) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/mm/swap.h b/mm/swap.h index ad2f121de9..f2fcf2bbf9 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -21,6 +21,7 @@ static inline void swap_read_unplug(struct swap_iocb *plug) void swap_write_unplug(struct swap_iocb *sio); int swap_writepage(struct page *page, struct writeback_control *wbc); void __swap_writepage(struct folio *folio, struct writeback_control *wbc); +int kcompressd(void *p); /* linux/mm/swap_state.c */ /* One swap address space for each 64M swap space */ @@ -146,6 +147,11 @@ static inline int swap_writepage(struct page *p, struct writeback_control *wbc) return 0; } +static inline int kcompressd(void *p) +{ + return 0; +} + static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr) { } diff --git a/mm/vmscan.c b/mm/vmscan.c index e3c1e2e156..86fa28b4f7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -7363,6 +7363,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) void __meminit kswapd_run(int nid) { pg_data_t *pgdat = NODE_DATA(nid); + int ret; pgdat_kswapd_lock(pgdat); if (!pgdat->kswapd) { @@ -7374,7 +7375,28 @@ void __meminit kswapd_run(int nid) BUG_ON(system_state < SYSTEM_RUNNING); pgdat->kswapd = NULL; } + ret = kfifo_alloc(&pgdat->kcompress_fifo, + KCOMPRESS_FIFO_SIZE * sizeof(struct folio *), + GFP_KERNEL); + if (ret) { + pr_err("%s: fail to kfifo_alloc\n", __func__); + goto out; + } + + printk(KERN_INFO "Kcompressd-Unofficial 0.5 by Masahito Suzuki (forked from Kcompressd by Qun-Wei Lin from MediaTek)"); + spin_lock_init(&pgdat->kcompress_fifo_lock); + pgdat->kcompressd = kthread_create_on_node(kcompressd, pgdat, nid, + "kcompressd%d", nid); + if (IS_ERR(pgdat->kcompressd)) { + pr_err("Failed to start kcompressd on node %d,ret=%ld\n", + nid, PTR_ERR(pgdat->kcompressd)); + pgdat->kcompressd = NULL; + kfifo_free(&pgdat->kcompress_fifo); + } else { + wake_up_process(pgdat->kcompressd); + } } +out: pgdat_kswapd_unlock(pgdat); } @@ -7393,6 +7415,11 @@ void __meminit kswapd_stop(int nid) kthread_stop(kswapd); pgdat->kswapd = NULL; } + if (pgdat->kcompressd) { + kthread_stop(pgdat->kcompressd); + pgdat->kcompressd = NULL; + kfifo_free(&pgdat->kcompress_fifo); + } pgdat_kswapd_unlock(pgdat); } -- 2.34.1