diff --git a/Makefile.rhelver b/Makefile.rhelver index 47a6bb1e4f1b9..76dfd0e62db5d 100644 --- a/Makefile.rhelver +++ b/Makefile.rhelver @@ -12,7 +12,7 @@ RHEL_MINOR = 10 # # Use this spot to avoid future merge conflicts. # Do not trim this comment. -RHEL_RELEASE = 553.87.1 +RHEL_RELEASE = 553.89.1 # # ZSTREAM diff --git a/ciq/ciq_backports/kernel-4.18.0-553.89.1.el8_10/3b8abb32.failed b/ciq/ciq_backports/kernel-4.18.0-553.89.1.el8_10/3b8abb32.failed new file mode 100644 index 0000000000000..f88bd657e0d24 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.89.1.el8_10/3b8abb32.failed @@ -0,0 +1,203 @@ +mm: kmem: fix a NULL pointer dereference in obj_stock_flush_required() + +jira KERNEL-325 +cve CVE-2023-53401 +Rebuild_History Non-Buildable kernel-4.18.0-553.89.1.el8_10 +commit-author Roman Gushchin +commit 3b8abb3239530c423c0b97e42af7f7e856e1ee96 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.89.1.el8_10/3b8abb32.failed + +KCSAN found an issue in obj_stock_flush_required(): +stock->cached_objcg can be reset between the check and dereference: + +================================================================== +BUG: KCSAN: data-race in drain_all_stock / drain_obj_stock + +write to 0xffff888237c2a2f8 of 8 bytes by task 19625 on cpu 0: + drain_obj_stock+0x408/0x4e0 mm/memcontrol.c:3306 + refill_obj_stock+0x9c/0x1e0 mm/memcontrol.c:3340 + obj_cgroup_uncharge+0xe/0x10 mm/memcontrol.c:3408 + memcg_slab_free_hook mm/slab.h:587 [inline] + __cache_free mm/slab.c:3373 [inline] + __do_kmem_cache_free mm/slab.c:3577 [inline] + kmem_cache_free+0x105/0x280 mm/slab.c:3602 + __d_free fs/dcache.c:298 [inline] + dentry_free fs/dcache.c:375 [inline] + __dentry_kill+0x422/0x4a0 fs/dcache.c:621 + dentry_kill+0x8d/0x1e0 + dput+0x118/0x1f0 fs/dcache.c:913 + __fput+0x3bf/0x570 fs/file_table.c:329 + ____fput+0x15/0x20 fs/file_table.c:349 + task_work_run+0x123/0x160 kernel/task_work.c:179 + resume_user_mode_work include/linux/resume_user_mode.h:49 [inline] + exit_to_user_mode_loop+0xcf/0xe0 kernel/entry/common.c:171 + exit_to_user_mode_prepare+0x6a/0xa0 kernel/entry/common.c:203 + __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline] + syscall_exit_to_user_mode+0x26/0x140 kernel/entry/common.c:296 + do_syscall_64+0x4d/0xc0 arch/x86/entry/common.c:86 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + +read to 0xffff888237c2a2f8 of 8 bytes by task 19632 on cpu 1: + obj_stock_flush_required mm/memcontrol.c:3319 [inline] + drain_all_stock+0x174/0x2a0 mm/memcontrol.c:2361 + try_charge_memcg+0x6d0/0xd10 mm/memcontrol.c:2703 + try_charge mm/memcontrol.c:2837 [inline] + mem_cgroup_charge_skmem+0x51/0x140 mm/memcontrol.c:7290 + sock_reserve_memory+0xb1/0x390 net/core/sock.c:1025 + sk_setsockopt+0x800/0x1e70 net/core/sock.c:1525 + udp_lib_setsockopt+0x99/0x6c0 net/ipv4/udp.c:2692 + udp_setsockopt+0x73/0xa0 net/ipv4/udp.c:2817 + sock_common_setsockopt+0x61/0x70 net/core/sock.c:3668 + __sys_setsockopt+0x1c3/0x230 net/socket.c:2271 + __do_sys_setsockopt net/socket.c:2282 [inline] + __se_sys_setsockopt net/socket.c:2279 [inline] + __x64_sys_setsockopt+0x66/0x80 net/socket.c:2279 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + +value changed: 0xffff8881382d52c0 -> 0xffff888138893740 + +Reported by Kernel Concurrency Sanitizer on: +CPU: 1 PID: 19632 Comm: syz-executor.0 Not tainted 6.3.0-rc2-syzkaller-00387-g534293368afa #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/02/2023 + +Fix it by using READ_ONCE()/WRITE_ONCE() for all accesses to +stock->cached_objcg. + +Link: https://lkml.kernel.org/r/20230502160839.361544-1-roman.gushchin@linux.dev +Fixes: bf4f059954dc ("mm: memcg/slab: obj_cgroup API") + Signed-off-by: Roman Gushchin + Reported-by: syzbot+774c29891415ab0fd29d@syzkaller.appspotmail.com + Reported-by: Dmitry Vyukov + Link: https://lore.kernel.org/linux-mm/CACT4Y+ZfucZhM60YPphWiCLJr6+SGFhT+jjm8k1P-a_8Kkxsjg@mail.gmail.com/T/#t + Reviewed-by: Yosry Ahmed + Acked-by: Shakeel Butt + Reviewed-by: Dmitry Vyukov + Signed-off-by: Andrew Morton +(cherry picked from commit 3b8abb3239530c423c0b97e42af7f7e856e1ee96) + Signed-off-by: Jonathan Maple + +# Conflicts: +# mm/memcontrol.c +diff --cc mm/memcontrol.c +index 6e2a077af4c1,c823c35c2ed4..000000000000 +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@@ -3272,8 -3208,8 +3272,13 @@@ void mod_objcg_state(struct obj_cgroup + * accumulating over a page of vmstat data or when pgdat or idx + * changes. + */ +++<<<<<<< HEAD + + if (stock->cached_objcg != objcg) { + + drain_obj_stock(stock); +++======= ++ if (READ_ONCE(stock->cached_objcg) != objcg) { ++ old = drain_obj_stock(stock); +++>>>>>>> 3b8abb323953 (mm: kmem: fix a NULL pointer dereference in obj_stock_flush_required()) + obj_cgroup_get(objcg); + stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes) + ? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0; +@@@ -3322,11 -3260,14 +3327,18 @@@ + + static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) + { + - struct memcg_stock_pcp *stock; + unsigned long flags; + + struct obj_stock *stock = get_obj_stock(&flags); + bool ret = false; + +++<<<<<<< HEAD + + if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) { +++======= ++ local_lock_irqsave(&memcg_stock.stock_lock, flags); ++ ++ stock = this_cpu_ptr(&memcg_stock); ++ if (objcg == READ_ONCE(stock->cached_objcg) && stock->nr_bytes >= nr_bytes) { +++>>>>>>> 3b8abb323953 (mm: kmem: fix a NULL pointer dereference in obj_stock_flush_required()) + stock->nr_bytes -= nr_bytes; + ret = true; + } +@@@ -3336,12 -3277,12 +3348,12 @@@ + return ret; + } + + -static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock) + +static void drain_obj_stock(struct obj_stock *stock) + { +- struct obj_cgroup *old = stock->cached_objcg; ++ struct obj_cgroup *old = READ_ONCE(stock->cached_objcg); + + if (!old) + - return NULL; + + return; + + if (stock->nr_bytes) { + unsigned int nr_pages = stock->nr_bytes >> PAGE_SHIFT; +@@@ -3383,22 -3332,22 +3395,37 @@@ + stock->cached_pgdat = NULL; + } + +++<<<<<<< HEAD + + obj_cgroup_put(old); + + stock->cached_objcg = NULL; +++======= ++ WRITE_ONCE(stock->cached_objcg, NULL); ++ /* ++ * The `old' objects needs to be released by the caller via ++ * obj_cgroup_put() outside of memcg_stock_pcp::stock_lock. ++ */ ++ return old; +++>>>>>>> 3b8abb323953 (mm: kmem: fix a NULL pointer dereference in obj_stock_flush_required()) + } + + static bool obj_stock_flush_required(struct memcg_stock_pcp *stock, + struct mem_cgroup *root_memcg) + { ++ struct obj_cgroup *objcg = READ_ONCE(stock->cached_objcg); + struct mem_cgroup *memcg; + +++<<<<<<< HEAD + + if (in_task() && stock->task_obj.cached_objcg) { + + memcg = obj_cgroup_memcg(stock->task_obj.cached_objcg); + + if (memcg && mem_cgroup_is_descendant(memcg, root_memcg)) + + return true; + + } + + if (stock->irq_obj.cached_objcg) { + + memcg = obj_cgroup_memcg(stock->irq_obj.cached_objcg); +++======= ++ if (objcg) { ++ memcg = obj_cgroup_memcg(objcg); +++>>>>>>> 3b8abb323953 (mm: kmem: fix a NULL pointer dereference in obj_stock_flush_required()) + if (memcg && mem_cgroup_is_descendant(memcg, root_memcg)) + return true; + } +@@@ -3409,14 -3358,18 +3436,22 @@@ + static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes, + bool allow_uncharge) + { + - struct memcg_stock_pcp *stock; + - struct obj_cgroup *old = NULL; + unsigned long flags; + + struct obj_stock *stock = get_obj_stock(&flags); + unsigned int nr_pages = 0; + +++<<<<<<< HEAD + + if (stock->cached_objcg != objcg) { /* reset if necessary */ + + drain_obj_stock(stock); +++======= ++ local_lock_irqsave(&memcg_stock.stock_lock, flags); ++ ++ stock = this_cpu_ptr(&memcg_stock); ++ if (READ_ONCE(stock->cached_objcg) != objcg) { /* reset if necessary */ ++ old = drain_obj_stock(stock); +++>>>>>>> 3b8abb323953 (mm: kmem: fix a NULL pointer dereference in obj_stock_flush_required()) + obj_cgroup_get(objcg); +- stock->cached_objcg = objcg; ++ WRITE_ONCE(stock->cached_objcg, objcg); + stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes) + ? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0; + allow_uncharge = true; /* Allow uncharge when objcg changes */ +* Unmerged path mm/memcontrol.c diff --git a/ciq/ciq_backports/kernel-4.18.0-553.89.1.el8_10/5d122db2.failed b/ciq/ciq_backports/kernel-4.18.0-553.89.1.el8_10/5d122db2.failed new file mode 100644 index 0000000000000..501f011d3f1ec --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.89.1.el8_10/5d122db2.failed @@ -0,0 +1,90 @@ +RDMA/rxe: Fix incomplete state save in rxe_requester + +jira KERNEL-325 +cve CVE-2023-53539 +Rebuild_History Non-Buildable kernel-4.18.0-553.89.1.el8_10 +commit-author Bob Pearson +commit 5d122db2ff80cd2aed4dcd630befb56b51ddf947 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.89.1.el8_10/5d122db2.failed + +If a send packet is dropped by the IP layer in rxe_requester() +the call to rxe_xmit_packet() can fail with err == -EAGAIN. +To recover, the state of the wqe is restored to the state before +the packet was sent so it can be resent. However, the routines +that save and restore the state miss a significnt part of the +variable state in the wqe, the dma struct which is used to process +through the sge table. And, the state is not saved before the packet +is built which modifies the dma struct. + +Under heavy stress testing with many QPs on a fast node sending +large messages to a slow node dropped packets are observed and +the resent packets are corrupted because the dma struct was not +restored. This patch fixes this behavior and allows the test cases +to succeed. + +Fixes: 3050b9985024 ("IB/rxe: Fix race condition between requester and completer") +Link: https://lore.kernel.org/r/20230721200748.4604-1-rpearsonhpe@gmail.com + Signed-off-by: Bob Pearson + Signed-off-by: Jason Gunthorpe +(cherry picked from commit 5d122db2ff80cd2aed4dcd630befb56b51ddf947) + Signed-off-by: Jonathan Maple + +# Conflicts: +# drivers/infiniband/sw/rxe/rxe_req.c +diff --cc drivers/infiniband/sw/rxe/rxe_req.c +index f63771207970,d8c41fd626a9..000000000000 +--- a/drivers/infiniband/sw/rxe/rxe_req.c ++++ b/drivers/infiniband/sw/rxe/rxe_req.c +@@@ -746,9 -799,12 +748,12 @@@ int rxe_requester(void *arg + pkt.mask = rxe_opcode[opcode].mask; + pkt.wqe = wqe; + ++ /* save wqe state before we build and send packet */ ++ save_state(wqe, qp, &rollback_wqe, &rollback_psn); ++ + av = rxe_get_av(&pkt, &ah); + if (unlikely(!av)) { + - rxe_dbg_qp(qp, "Failed no address vector\n"); + + pr_err("qp#%d Failed no address vector\n", qp_num(qp)); + wqe->status = IB_WC_LOC_QP_OP_ERR; + goto err; + } +@@@ -790,17 -840,23 +789,33 @@@ + + err = rxe_xmit_packet(qp, &pkt, skb); + if (err) { +++<<<<<<< HEAD + + qp->need_req_skb = 1; + + + + rollback_state(wqe, qp, &rollback_wqe, rollback_psn); + + + + if (err == -EAGAIN) { + + rxe_run_task(&qp->req.task, 1); + + goto exit; +++======= ++ if (err != -EAGAIN) { ++ wqe->status = IB_WC_LOC_QP_OP_ERR; ++ goto err; +++>>>>>>> 5d122db2ff80 (RDMA/rxe: Fix incomplete state save in rxe_requester) + } + +- wqe->status = IB_WC_LOC_QP_OP_ERR; +- goto err; ++ /* the packet was dropped so reset wqe to the state ++ * before we sent it so we can try to resend ++ */ ++ rollback_state(wqe, qp, &rollback_wqe, rollback_psn); ++ ++ /* force a delay until the dropped packet is freed and ++ * the send queue is drained below the low water mark ++ */ ++ qp->need_req_skb = 1; ++ ++ rxe_sched_task(&qp->req.task); ++ goto exit; + } + + update_state(qp, &pkt); +* Unmerged path drivers/infiniband/sw/rxe/rxe_req.c diff --git a/ciq/ciq_backports/kernel-4.18.0-553.89.1.el8_10/6ab26555.failed b/ciq/ciq_backports/kernel-4.18.0-553.89.1.el8_10/6ab26555.failed new file mode 100644 index 0000000000000..bd20c597dd752 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.89.1.el8_10/6ab26555.failed @@ -0,0 +1,155 @@ +gfs2: Add proper lockspace locking + +jira KERNEL-325 +Rebuild_History Non-Buildable kernel-4.18.0-553.89.1.el8_10 +commit-author Andreas Gruenbacher +commit 6ab26555c9ffef96c56ca16356e55ac5ab61ec93 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.89.1.el8_10/6ab26555.failed + +GFS2 has been calling functions like dlm_lock() even after the lockspace +that these functions operate on has been released with +dlm_release_lockspace(). It has always assumed that those functions +would return -EINVAL in that case, but that was never guaranteed, and it +certainly is no longer the case since commit 4db41bf4f04f ("dlm: remove +ls_local_handle from struct dlm_ls"). + +To fix that, add proper lockspace locking. + +Fixes: 3e11e5304150 ("GFS2: ignore unlock failures after withdraw") + Signed-off-by: Andreas Gruenbacher + Reviewed-by: Andrew Price +(cherry picked from commit 6ab26555c9ffef96c56ca16356e55ac5ab61ec93) + Signed-off-by: Jonathan Maple + +# Conflicts: +# fs/gfs2/file.c +# fs/gfs2/lock_dlm.c +diff --cc fs/gfs2/file.c +index dd07e0f60888,bc67fa058c84..000000000000 +--- a/fs/gfs2/file.c ++++ b/fs/gfs2/file.c +@@@ -1414,28 -1442,44 +1414,46 @@@ static int gfs2_lock(struct file *file + struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); + struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); + struct lm_lockstruct *ls = &sdp->sd_lockstruct; ++ int ret; + + - if (!(fl->c.flc_flags & FL_POSIX)) + + if (!(fl->fl_flags & FL_POSIX)) + + return -ENOLCK; + + if (__mandatory_lock(&ip->i_inode) && fl->fl_type != F_UNLCK) + return -ENOLCK; + - if (gfs2_withdrawing_or_withdrawn(sdp)) { + - if (lock_is_unlock(fl)) + + + + if (cmd == F_CANCELLK) { + + /* Hack: */ + + cmd = F_SETLK; + + fl->fl_type = F_UNLCK; + + } + + if (unlikely(gfs2_withdrawn(sdp))) { + + if (fl->fl_type == F_UNLCK) + locks_lock_file_wait(file, fl); + return -EIO; + } +++<<<<<<< HEAD + + if (IS_GETLK(cmd)) + + return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl); + + else if (fl->fl_type == F_UNLCK) + + return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl); + + else + + return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl); +++======= ++ down_read(&ls->ls_sem); ++ ret = -ENODEV; ++ if (likely(ls->ls_dlm != NULL)) { ++ if (cmd == F_CANCELLK) ++ ret = dlm_posix_cancel(ls->ls_dlm, ip->i_no_addr, file, fl); ++ else if (IS_GETLK(cmd)) ++ ret = dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl); ++ else if (lock_is_unlock(fl)) ++ ret = dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl); ++ else ++ ret = dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl); ++ } ++ up_read(&ls->ls_sem); ++ return ret; + -} + - + -static void __flock_holder_uninit(struct file *file, struct gfs2_holder *fl_gh) + -{ + - struct gfs2_glock *gl = gfs2_glock_hold(fl_gh->gh_gl); + - + - /* + - * Make sure gfs2_glock_put() won't sleep under the file->f_lock + - * spinlock. + - */ + - + - spin_lock(&file->f_lock); + - gfs2_holder_uninit(fl_gh); + - spin_unlock(&file->f_lock); + - gfs2_glock_put(gl); +++>>>>>>> 6ab26555c9ff (gfs2: Add proper lockspace locking) + } + + static int do_flock(struct file *file, int cmd, struct file_lock *fl) +diff --cc fs/gfs2/lock_dlm.c +index 0579fdbc9c63,3c7db20ce564..000000000000 +--- a/fs/gfs2/lock_dlm.c ++++ b/fs/gfs2/lock_dlm.c +@@@ -334,9 -363,17 +339,19 @@@ static void gdlm_put_lock(struct gfs2_g + return; + } + + - if (gl->gl_lksb.sb_lvbptr) + - flags |= DLM_LKF_VALBLK; + - + again: +++<<<<<<< HEAD + + error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, + + NULL, gl); +++======= ++ down_read(&ls->ls_sem); ++ error = -ENODEV; ++ if (likely(ls->ls_dlm != NULL)) { ++ error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, flags, ++ NULL, gl); ++ } ++ up_read(&ls->ls_sem); +++>>>>>>> 6ab26555c9ff (gfs2: Add proper lockspace locking) + if (error == -EBUSY) { + msleep(20); + goto again; +* Unmerged path fs/gfs2/file.c +diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c +index de182757764b..8581006a1023 100644 +--- a/fs/gfs2/glock.c ++++ b/fs/gfs2/glock.c +@@ -827,9 +827,8 @@ __acquires(&gl->gl_lockref.lock) + } + clear_bit(GLF_PENDING_REPLY, &gl->gl_flags); + +- if (ret == -EINVAL && gl->gl_target == LM_ST_UNLOCKED && +- target == LM_ST_UNLOCKED && +- test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) { ++ if (ret == -ENODEV && gl->gl_target == LM_ST_UNLOCKED && ++ target == LM_ST_UNLOCKED) { + /* + * The lockspace has been released and the lock has + * been unlocked implicitly. +diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h +index 6eca783f0691..36018c48b8f2 100644 +--- a/fs/gfs2/incore.h ++++ b/fs/gfs2/incore.h +@@ -665,6 +665,8 @@ struct lm_lockstruct { + struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */ + char *ls_lvb_bits; + ++ struct rw_semaphore ls_sem; ++ + spinlock_t ls_recover_spin; /* protects following fields */ + unsigned long ls_recover_flags; /* DFL_ */ + uint32_t ls_recover_mount; /* gen in first recover_done cb */ +* Unmerged path fs/gfs2/lock_dlm.c diff --git a/ciq/ciq_backports/kernel-4.18.0-553.89.1.el8_10/fead2b86.failed b/ciq/ciq_backports/kernel-4.18.0-553.89.1.el8_10/fead2b86.failed new file mode 100644 index 0000000000000..9bc5e719fb4a7 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.89.1.el8_10/fead2b86.failed @@ -0,0 +1,167 @@ +mm/memcg: revert ("mm/memcg: optimize user context object stock access") + +jira KERNEL-325 +cve CVE-2023-53401 +Rebuild_History Non-Buildable kernel-4.18.0-553.89.1.el8_10 +commit-author Michal Hocko +commit fead2b869764f89d524b79dc8862e61d5191be55 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.89.1.el8_10/fead2b86.failed + +Patch series "mm/memcg: Address PREEMPT_RT problems instead of disabling it", v5. + +This series aims to address the memcg related problem on PREEMPT_RT. + +I tested them on CONFIG_PREEMPT and CONFIG_PREEMPT_RT with the +tools/testing/selftests/cgroup/* tests and I haven't observed any +regressions (other than the lockdep report that is already there). + +This patch (of 6): + +The optimisation is based on a micro benchmark where local_irq_save() is +more expensive than a preempt_disable(). There is no evidence that it +is visible in a real-world workload and there are CPUs where the +opposite is true (local_irq_save() is cheaper than preempt_disable()). + +Based on micro benchmarks, the optimisation makes sense on PREEMPT_NONE +where preempt_disable() is optimized away. There is no improvement with +PREEMPT_DYNAMIC since the preemption counter is always available. + +The optimization makes also the PREEMPT_RT integration more complicated +since most of the assumption are not true on PREEMPT_RT. + +Revert the optimisation since it complicates the PREEMPT_RT integration +and the improvement is hardly visible. + +[bigeasy@linutronix.de: patch body around Michal's diff] + +Link: https://lkml.kernel.org/r/20220226204144.1008339-1-bigeasy@linutronix.de +Link: https://lore.kernel.org/all/YgOGkXXCrD%2F1k+p4@dhcp22.suse.cz +Link: https://lkml.kernel.org/r/YdX+INO9gQje6d0S@linutronix.de +Link: https://lkml.kernel.org/r/20220226204144.1008339-2-bigeasy@linutronix.de + Signed-off-by: Michal Hocko + Signed-off-by: Sebastian Andrzej Siewior + Acked-by: Roman Gushchin + Acked-by: Johannes Weiner + Reviewed-by: Shakeel Butt + Acked-by: Michal Hocko + Cc: Johannes Weiner + Cc: Peter Zijlstra + Cc: Thomas Gleixner + Cc: Vladimir Davydov + Cc: Waiman Long + Cc: kernel test robot + Cc: Michal Hocko + Cc: Michal Koutný + Signed-off-by: Andrew Morton + Signed-off-by: Linus Torvalds +(cherry picked from commit fead2b869764f89d524b79dc8862e61d5191be55) + Signed-off-by: Jonathan Maple + +# Conflicts: +# mm/memcontrol.c +diff --cc mm/memcontrol.c +index 6e2a077af4c1,7bf204b2b053..000000000000 +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@@ -2232,18 -2061,27 +2232,21 @@@ static void __unlock_page_memcg(struct + } + + /** + - * folio_memcg_unlock - Release the binding between a folio and its memcg. + - * @folio: The folio. + - * + - * This releases the binding created by folio_memcg_lock(). This does + - * not change the accounting of this folio to its memcg, but it does + - * permit others to change it. + + * unlock_page_memcg - unlock a page and memcg binding + + * @page: the page + */ + -void folio_memcg_unlock(struct folio *folio) + -{ + - __folio_memcg_unlock(folio_memcg(folio)); + -} + - + void unlock_page_memcg(struct page *page) + { + - folio_memcg_unlock(page_folio(page)); + + struct page *head = compound_head(page); + + + + __unlock_page_memcg(page_memcg(head)); + } + +EXPORT_SYMBOL(unlock_page_memcg); + +- struct obj_stock { ++ struct memcg_stock_pcp { ++ struct mem_cgroup *cached; /* this never be root cgroup */ ++ unsigned int nr_pages; ++ + #ifdef CONFIG_MEMCG_KMEM + struct obj_cgroup *cached_objcg; + struct pglist_data *cached_pgdat; +@@@ -2269,12 -2098,13 +2263,12 @@@ static DEFINE_PER_CPU(struct memcg_stoc + static DEFINE_MUTEX(percpu_charge_mutex); + + #ifdef CONFIG_MEMCG_KMEM +- static void drain_obj_stock(struct obj_stock *stock); ++ static void drain_obj_stock(struct memcg_stock_pcp *stock); + static bool obj_stock_flush_required(struct memcg_stock_pcp *stock, + struct mem_cgroup *root_memcg); + -static void memcg_account_kmem(struct mem_cgroup *memcg, int nr_pages); + + #else +- static inline void drain_obj_stock(struct obj_stock *stock) ++ static inline void drain_obj_stock(struct memcg_stock_pcp *stock) + { + } + static bool obj_stock_flush_required(struct memcg_stock_pcp *stock, +@@@ -7219,22 -6782,21 +7180,30 @@@ static void uncharge_batch(const struc + css_put(&ug->memcg->css); + } + + -static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug) + +static void uncharge_page(struct page *page, struct uncharge_gather *ug) + { + - long nr_pages; + + unsigned long nr_pages; + struct mem_cgroup *memcg; + struct obj_cgroup *objcg; +++<<<<<<< HEAD + + bool use_objcg = PageMemcgKmem(page); +++======= +++>>>>>>> fead2b869764 (mm/memcg: revert ("mm/memcg: optimize user context object stock access")) + + - VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); + + VM_BUG_ON_PAGE(PageLRU(page), page); + + /* + * Nobody should be changing or seriously looking at + - * folio memcg or objcg at this point, we have fully + - * exclusive access to the folio. + + * page memcg or objcg at this point, we have fully + + * exclusive access to the page. + */ +++<<<<<<< HEAD + + if (use_objcg) { + + objcg = __page_objcg(page); +++======= ++ if (folio_memcg_kmem(folio)) { ++ objcg = __folio_objcg(folio); +++>>>>>>> fead2b869764 (mm/memcg: revert ("mm/memcg: optimize user context object stock access")) + /* + * This get matches the put at the end of the function and + * kmem pages do not hold memcg references anymore. +@@@ -7259,9 -6821,9 +7228,9 @@@ + css_get(&memcg->css); + } + + - nr_pages = folio_nr_pages(folio); + + nr_pages = compound_nr(page); + +- if (use_objcg) { ++ if (folio_memcg_kmem(folio)) { + ug->nr_memory += nr_pages; + ug->nr_kmem += nr_pages; + +* Unmerged path mm/memcontrol.c diff --git a/ciq/ciq_backports/kernel-4.18.0-553.89.1.el8_10/rebuild.details.txt b/ciq/ciq_backports/kernel-4.18.0-553.89.1.el8_10/rebuild.details.txt new file mode 100644 index 0000000000000..ac9cf26b13415 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.89.1.el8_10/rebuild.details.txt @@ -0,0 +1,26 @@ +Rebuild_History BUILDABLE +Rebuilding Kernel from rpm changelog with Fuzz Limit: 87.50% +Number of commits in upstream range v4.18~1..kernel-mainline: 581414 +Number of commits in rpm: 14 +Number of commits matched with upstream: 8 (57.14%) +Number of commits in upstream but not in rpm: 581406 +Number of commits NOT found in upstream: 6 (42.86%) + +Rebuilding Kernel on Branch rocky8_10_rebuild_kernel-4.18.0-553.89.1.el8_10 for kernel-4.18.0-553.89.1.el8_10 +Clean Cherry Picks: 4 (50.00%) +Empty Cherry Picks: 4 (50.00%) +_______________________________ + +__EMPTY COMMITS__________________________ +5d122db2ff80cd2aed4dcd630befb56b51ddf947 RDMA/rxe: Fix incomplete state save in rxe_requester +6ab26555c9ffef96c56ca16356e55ac5ab61ec93 gfs2: Add proper lockspace locking +fead2b869764f89d524b79dc8862e61d5191be55 mm/memcg: revert ("mm/memcg: optimize user context object stock access") +3b8abb3239530c423c0b97e42af7f7e856e1ee96 mm: kmem: fix a NULL pointer dereference in obj_stock_flush_required() + +__CHANGES NOT IN UPSTREAM________________ +Adding prod certs and changed cert date to 20210620 +Adding Rocky secure boot certs +Fixing vmlinuz removal +Fixing UEFI CA path +Porting to 8.10, debranding and Rocky branding +Fixing pesign_key_name values diff --git a/configs/kernel-4.18.0-aarch64-debug.config b/configs/kernel-4.18.0-aarch64-debug.config index 255d2aa8334b0..4653989a09fbc 100644 --- a/configs/kernel-4.18.0-aarch64-debug.config +++ b/configs/kernel-4.18.0-aarch64-debug.config @@ -5,7 +5,7 @@ # # -# Compiler: gcc (GCC) 11.5.0 20240719 (Red Hat 11.5.0-5) +# Compiler: gcc (GCC) 11.5.0 20240719 (Red Hat 11.5.0-11) # CONFIG_ARM64=y CONFIG_64BIT=y diff --git a/configs/kernel-4.18.0-aarch64.config b/configs/kernel-4.18.0-aarch64.config index 43aa0941d9e53..3a92f34063d74 100644 --- a/configs/kernel-4.18.0-aarch64.config +++ b/configs/kernel-4.18.0-aarch64.config @@ -5,7 +5,7 @@ # # -# Compiler: gcc (GCC) 11.5.0 20240719 (Red Hat 11.5.0-5) +# Compiler: gcc (GCC) 11.5.0 20240719 (Red Hat 11.5.0-11) # CONFIG_ARM64=y CONFIG_64BIT=y diff --git a/configs/kernel-4.18.0-ppc64le-debug.config b/configs/kernel-4.18.0-ppc64le-debug.config index e972ad333334f..91ab00c30f4cc 100644 --- a/configs/kernel-4.18.0-ppc64le-debug.config +++ b/configs/kernel-4.18.0-ppc64le-debug.config @@ -5,7 +5,7 @@ # # -# Compiler: gcc (GCC) 11.5.0 20240719 (Red Hat 11.5.0-5) +# Compiler: gcc (GCC) 11.5.0 20240719 (Red Hat 11.5.0-11) # CONFIG_PPC64=y diff --git a/configs/kernel-4.18.0-ppc64le.config b/configs/kernel-4.18.0-ppc64le.config index 497d687931d2f..d44cfdbd007a4 100644 --- a/configs/kernel-4.18.0-ppc64le.config +++ b/configs/kernel-4.18.0-ppc64le.config @@ -5,7 +5,7 @@ # # -# Compiler: gcc (GCC) 11.5.0 20240719 (Red Hat 11.5.0-5) +# Compiler: gcc (GCC) 11.5.0 20240719 (Red Hat 11.5.0-11) # CONFIG_PPC64=y diff --git a/configs/kernel-4.18.0-s390x-debug.config b/configs/kernel-4.18.0-s390x-debug.config index b8133c4154bed..f930210e545d8 100644 --- a/configs/kernel-4.18.0-s390x-debug.config +++ b/configs/kernel-4.18.0-s390x-debug.config @@ -5,7 +5,7 @@ # # -# Compiler: gcc (GCC) 11.5.0 20240719 (Red Hat 11.5.0-5) +# Compiler: gcc (GCC) 11.5.0 20240719 (Red Hat 11.5.0-11) # CONFIG_MMU=y CONFIG_ZONE_DMA=y diff --git a/configs/kernel-4.18.0-s390x-zfcpdump.config b/configs/kernel-4.18.0-s390x-zfcpdump.config index d164adf552e14..c076043ca8466 100644 --- a/configs/kernel-4.18.0-s390x-zfcpdump.config +++ b/configs/kernel-4.18.0-s390x-zfcpdump.config @@ -5,7 +5,7 @@ # # -# Compiler: gcc (GCC) 11.5.0 20240719 (Red Hat 11.5.0-5) +# Compiler: gcc (GCC) 11.5.0 20240719 (Red Hat 11.5.0-11) # CONFIG_MMU=y CONFIG_ZONE_DMA=y diff --git a/configs/kernel-4.18.0-s390x.config b/configs/kernel-4.18.0-s390x.config index f0be5118f855b..9423959add43e 100644 --- a/configs/kernel-4.18.0-s390x.config +++ b/configs/kernel-4.18.0-s390x.config @@ -5,7 +5,7 @@ # # -# Compiler: gcc (GCC) 11.5.0 20240719 (Red Hat 11.5.0-5) +# Compiler: gcc (GCC) 11.5.0 20240719 (Red Hat 11.5.0-11) # CONFIG_MMU=y CONFIG_ZONE_DMA=y diff --git a/configs/kernel-4.18.0-x86_64-debug.config b/configs/kernel-4.18.0-x86_64-debug.config index 8c8a005c1e9ff..30086a3d7ef0a 100644 --- a/configs/kernel-4.18.0-x86_64-debug.config +++ b/configs/kernel-4.18.0-x86_64-debug.config @@ -5,7 +5,7 @@ # # -# Compiler: gcc (GCC) 11.5.0 20240719 (Red Hat 11.5.0-5) +# Compiler: gcc (GCC) 11.5.0 20240719 (Red Hat 11.5.0-11) # CONFIG_64BIT=y CONFIG_X86_64=y diff --git a/configs/kernel-4.18.0-x86_64.config b/configs/kernel-4.18.0-x86_64.config index a3441083602f2..4c9255b6f937c 100644 --- a/configs/kernel-4.18.0-x86_64.config +++ b/configs/kernel-4.18.0-x86_64.config @@ -5,7 +5,7 @@ # # -# Compiler: gcc (GCC) 11.5.0 20240719 (Red Hat 11.5.0-5) +# Compiler: gcc (GCC) 11.5.0 20240719 (Red Hat 11.5.0-11) # CONFIG_64BIT=y CONFIG_X86_64=y diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 11a4776bb3857..928d031d96cd1 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -99,6 +99,7 @@ static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf) kfree(mr->map[i]); kfree(mr->map); + mr->map = NULL; err1: return -ENOMEM; } @@ -123,7 +124,6 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, int num_buf; void *vaddr; int err; - int i; umem = ib_umem_get(udata, start, length, access); if (IS_ERR(umem)) { @@ -164,9 +164,8 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, pr_warn("%s: Unable to get virtual address\n", __func__); err = -ENOMEM; - goto err_cleanup_map; + goto err_release_umem; } - buf->addr = (uintptr_t)vaddr; buf->size = PAGE_SIZE; num_buf++; @@ -186,10 +185,6 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, return 0; -err_cleanup_map: - for (i = 0; i < mr->num_map; i++) - kfree(mr->map[i]); - kfree(mr->map); err_release_umem: ib_umem_release(umem); err_out: diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index f637712079705..d66bebc80fcf6 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -529,10 +529,11 @@ static void save_state(struct rxe_send_wqe *wqe, struct rxe_send_wqe *rollback_wqe, u32 *rollback_psn) { - rollback_wqe->state = wqe->state; + rollback_wqe->state = wqe->state; rollback_wqe->first_psn = wqe->first_psn; - rollback_wqe->last_psn = wqe->last_psn; - *rollback_psn = qp->req.psn; + rollback_wqe->last_psn = wqe->last_psn; + rollback_wqe->dma = wqe->dma; + *rollback_psn = qp->req.psn; } static void rollback_state(struct rxe_send_wqe *wqe, @@ -540,10 +541,11 @@ static void rollback_state(struct rxe_send_wqe *wqe, struct rxe_send_wqe *rollback_wqe, u32 rollback_psn) { - wqe->state = rollback_wqe->state; + wqe->state = rollback_wqe->state; wqe->first_psn = rollback_wqe->first_psn; - wqe->last_psn = rollback_wqe->last_psn; - qp->req.psn = rollback_psn; + wqe->last_psn = rollback_wqe->last_psn; + wqe->dma = rollback_wqe->dma; + qp->req.psn = rollback_psn; } static void update_state(struct rxe_qp *qp, struct rxe_pkt_info *pkt) @@ -746,6 +748,9 @@ int rxe_requester(void *arg) pkt.mask = rxe_opcode[opcode].mask; pkt.wqe = wqe; + /* save wqe state before we build and send packet */ + save_state(wqe, qp, &rollback_wqe, &rollback_psn); + av = rxe_get_av(&pkt, &ah); if (unlikely(!av)) { pr_err("qp#%d Failed no address vector\n", qp_num(qp)); @@ -778,29 +783,29 @@ int rxe_requester(void *arg) if (ah) rxe_put(ah); - /* - * To prevent a race on wqe access between requester and completer, - * wqe members state and psn need to be set before calling - * rxe_xmit_packet(). - * Otherwise, completer might initiate an unjustified retry flow. - */ - save_state(wqe, qp, &rollback_wqe, &rollback_psn); + /* update wqe state as though we had sent it */ update_wqe_state(qp, wqe, &pkt); update_wqe_psn(qp, wqe, &pkt, payload); err = rxe_xmit_packet(qp, &pkt, skb); if (err) { - qp->need_req_skb = 1; + if (err != -EAGAIN) { + wqe->status = IB_WC_LOC_QP_OP_ERR; + goto err; + } + /* the packet was dropped so reset wqe to the state + * before we sent it so we can try to resend + */ rollback_state(wqe, qp, &rollback_wqe, rollback_psn); - if (err == -EAGAIN) { - rxe_run_task(&qp->req.task, 1); - goto exit; - } + /* force a delay until the dropped packet is freed and + * the send queue is drained below the low water mark + */ + qp->need_req_skb = 1; - wqe->status = IB_WC_LOC_QP_OP_ERR; - goto err; + rxe_run_task(&qp->req.task, 1); + goto exit; } update_state(qp, &pkt); diff --git a/drivers/s390/scsi/zfcp_sysfs.c b/drivers/s390/scsi/zfcp_sysfs.c index b8cd75a872eeb..8ca46e4aa8d63 100644 --- a/drivers/s390/scsi/zfcp_sysfs.c +++ b/drivers/s390/scsi/zfcp_sysfs.c @@ -450,6 +450,8 @@ static ssize_t zfcp_sysfs_unit_add_store(struct device *dev, if (kstrtoull(buf, 0, (unsigned long long *) &fcp_lun)) return -EINVAL; + flush_work(&port->rport_work); + retval = zfcp_unit_add(port, fcp_lun); if (retval) return retval; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index dd07e0f60888a..6aabdb450ddd9 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1414,6 +1414,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); struct lm_lockstruct *ls = &sdp->sd_lockstruct; + int ret; if (!(fl->fl_flags & FL_POSIX)) return -ENOLCK; @@ -1430,12 +1431,18 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) locks_lock_file_wait(file, fl); return -EIO; } - if (IS_GETLK(cmd)) - return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl); - else if (fl->fl_type == F_UNLCK) - return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl); - else - return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl); + down_read(&ls->ls_sem); + ret = -ENODEV; + if (likely(ls->ls_dlm != NULL)) { + if (IS_GETLK(cmd)) + ret = dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl); + else if (fl->fl_type == F_UNLCK) + ret = dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl); + else + ret = dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl); + } + up_read(&ls->ls_sem); + return ret; } static int do_flock(struct file *file, int cmd, struct file_lock *fl) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index aa0255232985a..8581006a10230 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -821,21 +821,22 @@ __acquires(&gl->gl_lockref.lock) ret = ls->ls_ops->lm_lock(gl, target, lck_flags); spin_lock(&gl->gl_lockref.lock); - if (ret == -EINVAL && gl->gl_target == LM_ST_UNLOCKED && - target == LM_ST_UNLOCKED && - test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) { + if (!ret) { + /* The operation will be completed asynchronously. */ + return; + } + clear_bit(GLF_PENDING_REPLY, &gl->gl_flags); + + if (ret == -ENODEV && gl->gl_target == LM_ST_UNLOCKED && + target == LM_ST_UNLOCKED) { /* * The lockspace has been released and the lock has * been unlocked implicitly. */ - } else if (ret) { + } else { fs_err(sdp, "lm_lock ret %d\n", ret); target = gl->gl_state | LM_OUT_ERROR; - } else { - /* The operation will be completed asynchronously. */ - return; } - clear_bit(GLF_PENDING_REPLY, &gl->gl_flags); } /* Complete the operation now. */ diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 6eca783f0691d..36018c48b8f22 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -665,6 +665,8 @@ struct lm_lockstruct { struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */ char *ls_lvb_bits; + struct rw_semaphore ls_sem; + spinlock_t ls_recover_spin; /* protects following fields */ unsigned long ls_recover_flags; /* DFL_ */ uint32_t ls_recover_mount; /* gen in first recover_done cb */ diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 0579fdbc9c633..fabc797f5ed5c 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -294,8 +294,13 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, */ again: - error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname, - GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); + down_read(&ls->ls_sem); + error = -ENODEV; + if (likely(ls->ls_dlm != NULL)) { + error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname, + GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); + } + up_read(&ls->ls_sem); if (error == -EBUSY) { msleep(20); goto again; @@ -335,8 +340,13 @@ static void gdlm_put_lock(struct gfs2_glock *gl) } again: - error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, - NULL, gl); + down_read(&ls->ls_sem); + error = -ENODEV; + if (likely(ls->ls_dlm != NULL)) { + error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, + DLM_LKF_VALBLK, NULL, gl); + } + up_read(&ls->ls_sem); if (error == -EBUSY) { msleep(20); goto again; @@ -352,7 +362,12 @@ static void gdlm_put_lock(struct gfs2_glock *gl) static void gdlm_cancel(struct gfs2_glock *gl) { struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct; - dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl); + + down_read(&ls->ls_sem); + if (likely(ls->ls_dlm != NULL)) { + dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl); + } + up_read(&ls->ls_sem); } /* @@ -533,7 +548,11 @@ static int sync_unlock(struct gfs2_sbd *sdp, struct dlm_lksb *lksb, char *name) struct lm_lockstruct *ls = &sdp->sd_lockstruct; int error; - error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls); + down_read(&ls->ls_sem); + error = -ENODEV; + if (likely(ls->ls_dlm != NULL)) + error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls); + up_read(&ls->ls_sem); if (error) { fs_err(sdp, "%s lkid %x error %d\n", name, lksb->sb_lkid, error); @@ -560,9 +579,14 @@ static int sync_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags, memset(strname, 0, GDLM_STRNAME_BYTES); snprintf(strname, GDLM_STRNAME_BYTES, "%8x%16x", LM_TYPE_NONDISK, num); - error = dlm_lock(ls->ls_dlm, mode, lksb, flags, - strname, GDLM_STRNAME_BYTES - 1, - 0, sync_wait_cb, ls, NULL); + down_read(&ls->ls_sem); + error = -ENODEV; + if (likely(ls->ls_dlm != NULL)) { + error = dlm_lock(ls->ls_dlm, mode, lksb, flags, + strname, GDLM_STRNAME_BYTES - 1, + 0, sync_wait_cb, ls, NULL); + } + up_read(&ls->ls_sem); if (error) { fs_err(sdp, "%s lkid %x flags %x mode %d error %d\n", name, lksb->sb_lkid, flags, mode, error); @@ -1288,6 +1312,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table) */ INIT_DELAYED_WORK(&sdp->sd_control_work, gfs2_control_func); + ls->ls_dlm = NULL; spin_lock_init(&ls->ls_recover_spin); ls->ls_recover_flags = 0; ls->ls_recover_mount = 0; @@ -1322,6 +1347,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table) * create/join lockspace */ + init_rwsem(&ls->ls_sem); error = dlm_new_lockspace(fsname, cluster, flags, GDLM_LVB_SIZE, &gdlm_lockspace_ops, sdp, &ops_result, &ls->ls_dlm); @@ -1401,10 +1427,12 @@ static void gdlm_unmount(struct gfs2_sbd *sdp) /* mounted_lock and control_lock will be purged in dlm recovery */ release: + down_write(&ls->ls_sem); if (ls->ls_dlm) { dlm_release_lockspace(ls->ls_dlm, 2); ls->ls_dlm = NULL; } + up_write(&ls->ls_sem); free_recover_size(ls); } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6e2a077af4c16..6bcc5a1cfd39c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2243,23 +2243,17 @@ void unlock_page_memcg(struct page *page) } EXPORT_SYMBOL(unlock_page_memcg); -struct obj_stock { +struct memcg_stock_pcp { + struct mem_cgroup *cached; /* this never be root cgroup */ + unsigned int nr_pages; + #ifdef CONFIG_MEMCG_KMEM struct obj_cgroup *cached_objcg; struct pglist_data *cached_pgdat; unsigned int nr_bytes; int nr_slab_reclaimable_b; int nr_slab_unreclaimable_b; -#else - int dummy[0]; #endif -}; - -struct memcg_stock_pcp { - struct mem_cgroup *cached; /* this never be root cgroup */ - unsigned int nr_pages; - struct obj_stock task_obj; - struct obj_stock irq_obj; struct work_struct work; unsigned long flags; @@ -2269,12 +2263,12 @@ static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock); static DEFINE_MUTEX(percpu_charge_mutex); #ifdef CONFIG_MEMCG_KMEM -static void drain_obj_stock(struct obj_stock *stock); +static void drain_obj_stock(struct memcg_stock_pcp *stock); static bool obj_stock_flush_required(struct memcg_stock_pcp *stock, struct mem_cgroup *root_memcg); #else -static inline void drain_obj_stock(struct obj_stock *stock) +static inline void drain_obj_stock(struct memcg_stock_pcp *stock) { } static bool obj_stock_flush_required(struct memcg_stock_pcp *stock, @@ -2307,7 +2301,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) local_irq_save(flags); stock = this_cpu_ptr(&memcg_stock); - if (memcg == stock->cached && stock->nr_pages >= nr_pages) { + if (memcg == READ_ONCE(stock->cached) && stock->nr_pages >= nr_pages) { stock->nr_pages -= nr_pages; ret = true; } @@ -2322,7 +2316,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) */ static void drain_stock(struct memcg_stock_pcp *stock) { - struct mem_cgroup *old = stock->cached; + struct mem_cgroup *old = READ_ONCE(stock->cached); if (!old) return; @@ -2335,7 +2329,7 @@ static void drain_stock(struct memcg_stock_pcp *stock) } css_put(&old->css); - stock->cached = NULL; + WRITE_ONCE(stock->cached, NULL); } static void drain_local_stock(struct work_struct *dummy) @@ -2350,9 +2344,7 @@ static void drain_local_stock(struct work_struct *dummy) local_irq_save(flags); stock = this_cpu_ptr(&memcg_stock); - drain_obj_stock(&stock->irq_obj); - if (in_task()) - drain_obj_stock(&stock->task_obj); + drain_obj_stock(stock); drain_stock(stock); clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); @@ -2371,10 +2363,10 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) local_irq_save(flags); stock = this_cpu_ptr(&memcg_stock); - if (stock->cached != memcg) { /* reset if necessary */ + if (READ_ONCE(stock->cached) != memcg) { /* reset if necessary */ drain_stock(stock); css_get(&memcg->css); - stock->cached = memcg; + WRITE_ONCE(stock->cached, memcg); } stock->nr_pages += nr_pages; @@ -2408,7 +2400,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) bool flush = false; rcu_read_lock(); - memcg = stock->cached; + memcg = READ_ONCE(stock->cached); if (memcg && stock->nr_pages && mem_cgroup_is_descendant(memcg, root_memcg)) flush = true; @@ -2950,41 +2942,6 @@ static struct mem_cgroup *get_mem_cgroup_from_objcg(struct obj_cgroup *objcg) */ #define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | __GFP_ACCOUNT) -/* - * Most kmem_cache_alloc() calls are from user context. The irq disable/enable - * sequence used in this case to access content from object stock is slow. - * To optimize for user context access, there are now two object stocks for - * task context and interrupt context access respectively. - * - * The task context object stock can be accessed by disabling preemption only - * which is cheap in non-preempt kernel. The interrupt context object stock - * can only be accessed after disabling interrupt. User context code can - * access interrupt object stock, but not vice versa. - */ -static inline struct obj_stock *get_obj_stock(unsigned long *pflags) -{ - struct memcg_stock_pcp *stock; - - if (likely(in_task())) { - *pflags = 0UL; - preempt_disable(); - stock = this_cpu_ptr(&memcg_stock); - return &stock->task_obj; - } - - local_irq_save(*pflags); - stock = this_cpu_ptr(&memcg_stock); - return &stock->irq_obj; -} - -static inline void put_obj_stock(unsigned long flags) -{ - if (likely(in_task())) - preempt_enable(); - else - local_irq_restore(flags); -} - /* * mod_objcg_mlstate() may be called with irq enabled, so * mod_memcg_lruvec_state() should be used. @@ -3263,21 +3220,24 @@ void __memcg_kmem_uncharge_page(struct page *page, int order) void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, enum node_stat_item idx, int nr) { + struct memcg_stock_pcp *stock; unsigned long flags; - struct obj_stock *stock = get_obj_stock(&flags); int *bytes; + local_irq_save(flags); + stock = this_cpu_ptr(&memcg_stock); + /* * Save vmstat data in stock and skip vmstat array update unless * accumulating over a page of vmstat data or when pgdat or idx * changes. */ - if (stock->cached_objcg != objcg) { + if (READ_ONCE(stock->cached_objcg) != objcg) { drain_obj_stock(stock); obj_cgroup_get(objcg); stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes) ? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0; - stock->cached_objcg = objcg; + WRITE_ONCE(stock->cached_objcg, objcg); stock->cached_pgdat = pgdat; } else if (stock->cached_pgdat != pgdat) { /* Flush the existing cached vmstat data */ @@ -3317,28 +3277,31 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, if (nr) mod_objcg_mlstate(objcg, pgdat, idx, nr); - put_obj_stock(flags); + local_irq_restore(flags); } static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) { + struct memcg_stock_pcp *stock; unsigned long flags; - struct obj_stock *stock = get_obj_stock(&flags); bool ret = false; - if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) { + local_irq_save(flags); + + stock = this_cpu_ptr(&memcg_stock); + if (objcg == READ_ONCE(stock->cached_objcg) && stock->nr_bytes >= nr_bytes) { stock->nr_bytes -= nr_bytes; ret = true; } - put_obj_stock(flags); + local_irq_restore(flags); return ret; } -static void drain_obj_stock(struct obj_stock *stock) +static void drain_obj_stock(struct memcg_stock_pcp *stock) { - struct obj_cgroup *old = stock->cached_objcg; + struct obj_cgroup *old = READ_ONCE(stock->cached_objcg); if (!old) return; @@ -3384,21 +3347,17 @@ static void drain_obj_stock(struct obj_stock *stock) } obj_cgroup_put(old); - stock->cached_objcg = NULL; + WRITE_ONCE(stock->cached_objcg, NULL); } static bool obj_stock_flush_required(struct memcg_stock_pcp *stock, struct mem_cgroup *root_memcg) { + struct obj_cgroup *objcg = READ_ONCE(stock->cached_objcg); struct mem_cgroup *memcg; - if (in_task() && stock->task_obj.cached_objcg) { - memcg = obj_cgroup_memcg(stock->task_obj.cached_objcg); - if (memcg && mem_cgroup_is_descendant(memcg, root_memcg)) - return true; - } - if (stock->irq_obj.cached_objcg) { - memcg = obj_cgroup_memcg(stock->irq_obj.cached_objcg); + if (objcg) { + memcg = obj_cgroup_memcg(objcg); if (memcg && mem_cgroup_is_descendant(memcg, root_memcg)) return true; } @@ -3409,14 +3368,17 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock, static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes, bool allow_uncharge) { + struct memcg_stock_pcp *stock; unsigned long flags; - struct obj_stock *stock = get_obj_stock(&flags); unsigned int nr_pages = 0; - if (stock->cached_objcg != objcg) { /* reset if necessary */ + local_irq_save(flags); + + stock = this_cpu_ptr(&memcg_stock); + if (READ_ONCE(stock->cached_objcg) != objcg) { /* reset if necessary */ drain_obj_stock(stock); obj_cgroup_get(objcg); - stock->cached_objcg = objcg; + WRITE_ONCE(stock->cached_objcg, objcg); stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes) ? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0; allow_uncharge = true; /* Allow uncharge when objcg changes */ @@ -3428,7 +3390,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes, stock->nr_bytes &= (PAGE_SIZE - 1); } - put_obj_stock(flags); + local_irq_restore(flags); if (nr_pages) obj_cgroup_uncharge_pages(objcg, nr_pages);