From 569e0b22009b4987eb939bf43131392e92304e3f Mon Sep 17 00:00:00 2001 From: Masahito S Date: Tue, 30 Sep 2025 02:15:00 +0900 Subject: [PATCH] linux6.17-kcompressd-unofficial-0.5 --- include/linux/mmzone.h | 7 +++ include/linux/swap.h | 2 + mm/mm_init.c | 1 + mm/page_io.c | 129 +++++++++++++++++++++++++++++++++++++++++ mm/swap.h | 6 ++ mm/vmscan.c | 39 +++++++++++++ 6 files changed, 184 insertions(+) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 0c5da91419..bbfdd78d8c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -23,6 +23,7 @@ #include #include #include +#include #include /* Free memory management - zoned buddy allocator. */ @@ -1417,6 +1418,12 @@ typedef struct pglist_data { int kswapd_failures; /* Number of 'reclaimed == 0' runs */ +#define KCOMPRESS_FIFO_SIZE 256 + wait_queue_head_t kcompressd_wait; + struct task_struct *kcompressd; + struct kfifo kcompress_fifo; + spinlock_t kcompress_fifo_lock; + #ifdef CONFIG_COMPACTION int kcompactd_max_order; enum zone_type kcompactd_highest_zoneidx; diff --git a/include/linux/swap.h b/include/linux/swap.h index 7012a0f758..7e1a1b12b4 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -711,5 +711,7 @@ static inline bool mem_cgroup_swap_full(struct folio *folio) } #endif +extern int vm_kcompressd; + #endif /* __KERNEL__*/ #endif /* _LINUX_SWAP_H */ diff --git a/mm/mm_init.c b/mm/mm_init.c index 5c21b3af21..3c34cb05f4 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1413,6 +1413,7 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat) pgdat_init_kcompactd(pgdat); init_waitqueue_head(&pgdat->kswapd_wait); + init_waitqueue_head(&pgdat->kcompressd_wait); init_waitqueue_head(&pgdat->pfmemalloc_wait); for (i = 0; i < NR_VMSCAN_THROTTLE; i++) diff --git a/mm/page_io.c b/mm/page_io.c index a2056a5ecb..5f51bf93cf 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include "swap.h" static void __end_swap_bio_write(struct bio *bio) @@ -233,6 +235,94 @@ static void swap_zeromap_folio_clear(struct folio *folio) } } +/* + * do_swapout() - Write a folio to swap space + * @folio: The folio to write out + * + * This function writes the folio to swap space, either using zswap or + * synchronous write. It ensures that the folio is unlocked and the + * reference count is decremented after the operation. + */ +static inline void do_swapout(struct folio *folio, struct swap_iocb **swap_plug) +{ + if (zswap_store(folio)) { + count_mthp_stat(folio_order(folio), MTHP_STAT_ZSWPOUT); + folio_unlock(folio); + } else + __swap_writepage(folio, swap_plug); /* Implies folio_unlock(folio) */ + + /* Decrement the folio reference count */ + folio_put(folio); +} + +/* + * kcompressd_store() - Off-load folio compression to kcompressd + * @folio: The folio to compress + * + * This function attempts to off-load the compression of the folio to + * kcompressd. If kcompressd is not available or the folio cannot be + * compressed, it falls back to synchronous write. + * + * Returns true if the folio was successfully queued for compression, + * false otherwise. + */ +static bool kcompressd_store(struct folio *folio, struct swap_iocb **swap_plug) +{ + pg_data_t *pgdat = NODE_DATA(numa_node_id()); + unsigned int ret, sysctl_kcompressd = vm_kcompressd; + struct folio *head = NULL; + + /* Only kswapd can use kcompressd */ + if (!current_is_kswapd()) + return false; + + /* kcompressd must be enabled and running */ + if (!sysctl_kcompressd || unlikely(!pgdat->kcompressd)) + return false; + + /* We can only off-load anon folios */ + if (!folio_test_anon(folio)) + return false; + + /* Fall back to synchronously return AOP_WRITEPAGE_ACTIVATE */ + if (!mem_cgroup_zswap_writeback_enabled(folio_memcg(folio))) + return false; + + /* Swap device must be sync-efficient */ + if (!zswap_is_enabled() && + !data_race(swp_swap_info(folio->swap)->flags & SWP_SYNCHRONOUS_IO)) + return false; + + /* If the kcompress_fifo is full, we must swap out the head + * folio to make space for the new folio. + */ + scoped_guard(spinlock_irqsave, &pgdat->kcompress_fifo_lock) + if (kfifo_len(&pgdat->kcompress_fifo) >= sysctl_kcompressd * sizeof(folio) && + unlikely(!kfifo_out(&pgdat->kcompress_fifo, &head, sizeof(folio)))) + /* Can't dequeue the head folio. Fall back to synchronous write. */ + return false; + + /* Increment the folio reference count to avoid it being freed */ + folio_get(folio); + + /* Enqueue the folio for compression */ + ret = kfifo_in(&pgdat->kcompress_fifo, &folio, sizeof(folio)); + if (likely(ret)) + /* We successfully enqueued the folio. wake up kcompressd */ + wake_up_interruptible(&pgdat->kcompressd_wait); + else + /* Enqueue failed, so we must cancel the reference count */ + folio_put(folio); + + /* If we had to swap out the head folio, do it now. + * This will block until the folio is written out. + */ + if (head) + do_swapout(head, swap_plug); + + return ret; +} + /* * We may have stale swap cache pages in memory: notice * them here and get rid of the unnecessary final write. @@ -272,6 +362,14 @@ int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug) */ swap_zeromap_folio_clear(folio); + /* + * Compression within zswap and zram might block rmap, unmap + * of both file and anon pages, try to do compression async + * if possible + */ + if (kcompressd_store(folio, swap_plug)) + return 0; + if (zswap_store(folio)) { count_mthp_stat(folio_order(folio), MTHP_STAT_ZSWPOUT); goto out_unlock; @@ -288,6 +386,37 @@ int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug) return ret; } +/* + * kcompressd() - Kernel thread for compressing folios + * @p: Pointer to pg_data_t structure + * + * This function runs in a kernel thread and waits for folios to be + * queued for compression. It processes the folios by calling do_swapout() + * on them, which handles the actual writing to swap space. + */ +int kcompressd(void *p) +{ + pg_data_t *pgdat = (pg_data_t *)p; + struct folio *folio; + /* * kcompressd runs with PF_MEMALLOC and PF_KSWAPD flags set to + * allow it to allocate memory for compression without being + * restricted by the current memory allocation context. + * Also PF_KSWAPD prevents Intel Graphics driver from crashing + * the system in i915_gem_shrinker.c:i915_gem_shrinker_scan() + */ + current->flags |= PF_MEMALLOC | PF_KSWAPD; + + while (!kthread_should_stop()) { + wait_event_interruptible(pgdat->kcompressd_wait, + !kfifo_is_empty(&pgdat->kcompress_fifo)); + + while (kfifo_out_locked(&pgdat->kcompress_fifo, + &folio, sizeof(folio), &pgdat->kcompress_fifo_lock)) + do_swapout(folio, NULL); + } + return 0; +} + static inline void count_swpout_vm_event(struct folio *folio) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/mm/swap.h b/mm/swap.h index 911ad5ff0f..8685c6853b 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -24,6 +24,7 @@ static inline void swap_read_unplug(struct swap_iocb *plug) void swap_write_unplug(struct swap_iocb *sio); int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug); void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug); +int kcompressd(void *p); /* linux/mm/swap_state.c */ /* One swap address space for each 64M swap space */ @@ -168,6 +169,11 @@ static inline int swap_writeout(struct folio *folio, return 0; } +static inline int kcompressd(void *p) +{ + return 0; +} + static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr) { } diff --git a/mm/vmscan.c b/mm/vmscan.c index 674999999c..59447d6b8c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -7491,6 +7491,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) void __meminit kswapd_run(int nid) { pg_data_t *pgdat = NODE_DATA(nid); + int ret; pgdat_kswapd_lock(pgdat); if (!pgdat->kswapd) { @@ -7504,7 +7505,28 @@ void __meminit kswapd_run(int nid) } else { wake_up_process(pgdat->kswapd); } + ret = kfifo_alloc(&pgdat->kcompress_fifo, + KCOMPRESS_FIFO_SIZE * sizeof(struct folio *), + GFP_KERNEL); + if (ret) { + pr_err("%s: fail to kfifo_alloc\n", __func__); + goto out; + } + + printk(KERN_INFO "Kcompressd-Unofficial 0.5 by Masahito Suzuki (forked from Kcompressd by Qun-Wei Lin from MediaTek)"); + spin_lock_init(&pgdat->kcompress_fifo_lock); + pgdat->kcompressd = kthread_create_on_node(kcompressd, pgdat, nid, + "kcompressd%d", nid); + if (IS_ERR(pgdat->kcompressd)) { + pr_err("Failed to start kcompressd on node %d,ret=%ld\n", + nid, PTR_ERR(pgdat->kcompressd)); + pgdat->kcompressd = NULL; + kfifo_free(&pgdat->kcompress_fifo); + } else { + wake_up_process(pgdat->kcompressd); + } } +out: pgdat_kswapd_unlock(pgdat); } @@ -7523,9 +7545,17 @@ void __meminit kswapd_stop(int nid) kthread_stop(kswapd); pgdat->kswapd = NULL; } + if (pgdat->kcompressd) { + kthread_stop(pgdat->kcompressd); + pgdat->kcompressd = NULL; + kfifo_free(&pgdat->kcompress_fifo); + } pgdat_kswapd_unlock(pgdat); } +int vm_kcompressd = 24; +static int SYSCTL_KCOMPRESS_FIFO_SIZE = KCOMPRESS_FIFO_SIZE; + static const struct ctl_table vmscan_sysctl_table[] = { { .procname = "swappiness", @@ -7536,6 +7566,15 @@ static const struct ctl_table vmscan_sysctl_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO_HUNDRED, }, + { + .procname = "kcompressd", + .data = &vm_kcompressd, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &SYSCTL_KCOMPRESS_FIFO_SIZE, + }, #ifdef CONFIG_NUMA { .procname = "zone_reclaim_mode", -- 2.34.1