From ec591654cb7971382996e5289f39d9154b25fce8 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Tue, 22 Oct 2024 19:08:16 -0400 Subject: [PATCH 1/5] selftests/mm temporary fix of hmm infinite loop jira SECO-170 In Rocky9 if you run ./run_vmtests.sh -t hmm it will fail and cause an infinite loop on ASSERTs in FIXTURE_TEARDOWN() This temporary fix is based on the discussion here https://patchwork.kernel.org/project/linux-kselftest/patch/26017fe3-5ad7-6946-57db-e5ec48063ceb@suse.cz/#25046055 We will investigate further kselftest updates that will resolve the root causes of this. Signed-off-by: Jonathan Maple --- tools/testing/selftests/mm/hmm-tests.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c index d2cfc9b494a0e..6f75c54564176 100644 --- a/tools/testing/selftests/mm/hmm-tests.c +++ b/tools/testing/selftests/mm/hmm-tests.c @@ -159,6 +159,10 @@ FIXTURE_TEARDOWN(hmm) { int ret = close(self->fd); + if (ret != 0) { + fprintf(stderr, "close returned (%d) fd is (%d)\n", ret, self->fd); + exit(1); + } ASSERT_EQ(ret, 0); self->fd = -1; } From aec56b994c0b62072e042868f2f28e5864205e72 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Mon, 9 Jun 2025 15:48:12 -0400 Subject: [PATCH 2/5] net: mana: Add support for Multi Vports on Bare metal jira LE-3208 feature net_mana commit-author Haiyang Zhang commit 290e5d3c49f687c1567bde634dc33d57b0674919 To support Multi Vports on Bare metal, increase the device config response version. And, skip the register HW vport, and register filter steps, when the Bare metal hostmode is set. Signed-off-by: Haiyang Zhang Link: https://patch.msgid.link/1747671636-5810-1-git-send-email-haiyangz@microsoft.com Signed-off-by: Paolo Abeni (cherry picked from commit 290e5d3c49f687c1567bde634dc33d57b0674919) Signed-off-by: Jonathan Maple Signed-off-by: Jonathan Maple --- drivers/net/ethernet/microsoft/mana/mana_en.c | 24 ++++++++++++------- include/net/mana/mana.h | 4 +++- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index ae796e64d24ca..7f125c7289ebf 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -920,7 +920,7 @@ static void mana_pf_deregister_filter(struct mana_port_context *apc) static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, u32 proto_minor_ver, u32 proto_micro_ver, - u16 *max_num_vports) + u16 *max_num_vports, u8 *bm_hostmode) { struct gdma_context *gc = ac->gdma_dev->gdma_context; struct mana_query_device_cfg_resp resp = {}; @@ -931,7 +931,7 @@ static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_DEV_CONFIG, sizeof(req), sizeof(resp)); - req.hdr.resp.msg_version = GDMA_MESSAGE_V2; + req.hdr.resp.msg_version = GDMA_MESSAGE_V3; req.proto_major_ver = proto_major_ver; req.proto_minor_ver = proto_minor_ver; @@ -955,11 +955,16 @@ static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, *max_num_vports = resp.max_num_vports; - if (resp.hdr.response.msg_version == GDMA_MESSAGE_V2) + if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V2) gc->adapter_mtu = resp.adapter_mtu; else gc->adapter_mtu = ETH_FRAME_LEN; + if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V3) + *bm_hostmode = resp.bm_hostmode; + else + *bm_hostmode = 0; + debugfs_create_u16("adapter-MTU", 0400, gc->mana_pci_debugfs, &gc->adapter_mtu); return 0; @@ -2439,7 +2444,7 @@ static void mana_destroy_vport(struct mana_port_context *apc) mana_destroy_txq(apc); mana_uncfg_vport(apc); - if (gd->gdma_context->is_pf) + if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) mana_pf_deregister_hw_vport(apc); } @@ -2451,7 +2456,7 @@ static int mana_create_vport(struct mana_port_context *apc, apc->default_rxobj = INVALID_MANA_HANDLE; - if (gd->gdma_context->is_pf) { + if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) { err = mana_pf_register_hw_vport(apc); if (err) return err; @@ -2675,7 +2680,7 @@ int mana_alloc_queues(struct net_device *ndev) if (err) goto destroy_vport; - if (gd->gdma_context->is_pf) { + if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) { err = mana_pf_register_filter(apc); if (err) goto destroy_vport; @@ -2737,7 +2742,7 @@ static int mana_dealloc_queues(struct net_device *ndev) mana_chn_setxdp(apc, NULL); - if (gd->gdma_context->is_pf) + if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) mana_pf_deregister_filter(apc); /* No packet can be transmitted now since apc->port_is_up is false. @@ -2978,6 +2983,7 @@ int mana_probe(struct gdma_dev *gd, bool resuming) struct gdma_context *gc = gd->gdma_context; struct mana_context *ac = gd->driver_data; struct device *dev = gc->dev; + u8 bm_hostmode = 0; u16 num_ports = 0; int err; int i; @@ -3004,10 +3010,12 @@ int mana_probe(struct gdma_dev *gd, bool resuming) goto out; err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION, - MANA_MICRO_VERSION, &num_ports); + MANA_MICRO_VERSION, &num_ports, &bm_hostmode); if (err) goto out; + ac->bm_hostmode = bm_hostmode; + if (!resuming) { ac->num_ports = num_ports; } else { diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index ceac201caa3a8..e74415c833791 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -405,6 +405,7 @@ struct mana_context { struct gdma_dev *gdma_dev; u16 num_ports; + u8 bm_hostmode; struct mana_eq *eqs; struct dentry *mana_eqs_debugfs; @@ -554,7 +555,8 @@ struct mana_query_device_cfg_resp { u64 pf_cap_flags4; u16 max_num_vports; - u16 reserved; + u8 bm_hostmode; /* response v3: Bare Metal Host Mode */ + u8 reserved; u32 max_num_eqs; /* response v2: */ From ba23d8d1b94a6882730ec024df1c74cfe64ba8c1 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Mon, 9 Jun 2025 15:49:43 -0400 Subject: [PATCH 3/5] tools: hv: Enable debug logs for hv_kvp_daemon jira LE-3207 feature tools_hv commit-author Shradha Gupta commit a9c0b33ef2306327dd2db02c6274107065ff9307 Allow the KVP daemon to log the KVP updates triggered in the VM with a new debug flag(-d). When the daemon is started with this flag, it logs updates and debug information in syslog with loglevel LOG_DEBUG. This information comes in handy for debugging issues where the key-value pairs for certain pools show mismatch/incorrect values. The distro-vendors can further consume these changes and modify the respective service files to redirect the logs to specific files as needed. Signed-off-by: Shradha Gupta Reviewed-by: Naman Jain Reviewed-by: Dexuan Cui Link: https://lore.kernel.org/r/1744715978-8185-1-git-send-email-shradhagupta@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <1744715978-8185-1-git-send-email-shradhagupta@linux.microsoft.com> (cherry picked from commit a9c0b33ef2306327dd2db02c6274107065ff9307) Signed-off-by: Jonathan Maple Signed-off-by: Jonathan Maple --- tools/hv/hv_kvp_daemon.c | 64 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 5 deletions(-) diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 1e6fd6ca513bd..0e0c997134ec6 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -77,6 +77,7 @@ enum { }; static int in_hand_shake; +static int debug; static char *os_name = ""; static char *os_major = ""; @@ -172,6 +173,20 @@ static void kvp_update_file(int pool) kvp_release_lock(pool); } +static void kvp_dump_initial_pools(int pool) +{ + int i; + + syslog(LOG_DEBUG, "===Start dumping the contents of pool %d ===\n", + pool); + + for (i = 0; i < kvp_file_info[pool].num_records; i++) + syslog(LOG_DEBUG, "pool: %d, %d/%d key=%s val=%s\n", + pool, i + 1, kvp_file_info[pool].num_records, + kvp_file_info[pool].records[i].key, + kvp_file_info[pool].records[i].value); +} + static void kvp_update_mem_state(int pool) { FILE *filep; @@ -259,6 +274,8 @@ static int kvp_file_init(void) return 1; kvp_file_info[i].num_records = 0; kvp_update_mem_state(i); + if (debug) + kvp_dump_initial_pools(i); } return 0; @@ -286,6 +303,9 @@ static int kvp_key_delete(int pool, const __u8 *key, int key_size) * Found a match; just move the remaining * entries up. */ + if (debug) + syslog(LOG_DEBUG, "%s: deleting the KVP: pool=%d key=%s val=%s", + __func__, pool, record[i].key, record[i].value); if (i == (num_records - 1)) { kvp_file_info[pool].num_records--; kvp_update_file(pool); @@ -304,20 +324,36 @@ static int kvp_key_delete(int pool, const __u8 *key, int key_size) kvp_update_file(pool); return 0; } + + if (debug) + syslog(LOG_DEBUG, "%s: could not delete KVP: pool=%d key=%s. Record not found", + __func__, pool, key); + return 1; } static int kvp_key_add_or_modify(int pool, const __u8 *key, int key_size, const __u8 *value, int value_size) { - int i; - int num_records; struct kvp_record *record; + int num_records; int num_blocks; + int i; + + if (debug) + syslog(LOG_DEBUG, "%s: got a KVP: pool=%d key=%s val=%s", + __func__, pool, key, value); if ((key_size > HV_KVP_EXCHANGE_MAX_KEY_SIZE) || - (value_size > HV_KVP_EXCHANGE_MAX_VALUE_SIZE)) + (value_size > HV_KVP_EXCHANGE_MAX_VALUE_SIZE)) { + syslog(LOG_ERR, "%s: Too long key or value: key=%s, val=%s", + __func__, key, value); + + if (debug) + syslog(LOG_DEBUG, "%s: Too long key or value: pool=%d, key=%s, val=%s", + __func__, pool, key, value); return 1; + } /* * First update the in-memory state. @@ -337,6 +373,9 @@ static int kvp_key_add_or_modify(int pool, const __u8 *key, int key_size, */ memcpy(record[i].value, value, value_size); kvp_update_file(pool); + if (debug) + syslog(LOG_DEBUG, "%s: updated: pool=%d key=%s val=%s", + __func__, pool, key, value); return 0; } @@ -348,8 +387,10 @@ static int kvp_key_add_or_modify(int pool, const __u8 *key, int key_size, record = realloc(record, sizeof(struct kvp_record) * ENTRIES_PER_BLOCK * (num_blocks + 1)); - if (record == NULL) + if (!record) { + syslog(LOG_ERR, "%s: Memory alloc failure", __func__); return 1; + } kvp_file_info[pool].num_blocks++; } @@ -357,6 +398,11 @@ static int kvp_key_add_or_modify(int pool, const __u8 *key, int key_size, memcpy(record[i].key, key, key_size); kvp_file_info[pool].records = record; kvp_file_info[pool].num_records++; + + if (debug) + syslog(LOG_DEBUG, "%s: added: pool=%d key=%s val=%s", + __func__, pool, key, value); + kvp_update_file(pool); return 0; } @@ -1355,6 +1401,7 @@ void print_usage(char *argv[]) fprintf(stderr, "Usage: %s [options]\n" "Options are:\n" " -n, --no-daemon stay in foreground, don't daemonize\n" + " -d, --debug Enable debug logs(syslog debug by default)\n" " -h, --help print this help\n", argv[0]); } @@ -1376,10 +1423,11 @@ int main(int argc, char *argv[]) static struct option long_options[] = { {"help", no_argument, 0, 'h' }, {"no-daemon", no_argument, 0, 'n' }, + {"debug", no_argument, 0, 'd' }, {0, 0, 0, 0 } }; - while ((opt = getopt_long(argc, argv, "hn", long_options, + while ((opt = getopt_long(argc, argv, "hnd", long_options, &long_index)) != -1) { switch (opt) { case 'n': @@ -1388,6 +1436,9 @@ int main(int argc, char *argv[]) case 'h': print_usage(argv); exit(0); + case 'd': + debug = 1; + break; default: print_usage(argv); exit(EXIT_FAILURE); @@ -1410,6 +1461,9 @@ int main(int argc, char *argv[]) */ kvp_get_domain_name(full_domain_name, sizeof(full_domain_name)); + if (debug) + syslog(LOG_INFO, "Logging debug info in syslog(debug)"); + if (kvp_file_init()) { syslog(LOG_ERR, "Failed to initialize the pools"); exit(EXIT_FAILURE); From 193cf80dde2b0662f1bfde929a04f67931b8b082 Mon Sep 17 00:00:00 2001 From: Shreeya Patel Date: Tue, 19 Aug 2025 10:28:05 +0000 Subject: [PATCH 4/5] RDMA/mana_ib: use the correct page size for mapping user-mode doorbell page jira LE-3813 commit-author Long Li commit 4a3b99bc04e501b816db78f70064e26a01257910 When mapping doorbell page from user-mode, the driver should use the system page size as this memory is allocated via mmap() from user-mode. Cc: stable@vger.kernel.org Fixes: 0266a177631d ("RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter") Signed-off-by: Long Li Link: https://patch.msgid.link/1725030993-16213-2-git-send-email-longli@linuxonhyperv.com Signed-off-by: Leon Romanovsky (cherry picked from commit 4a3b99bc04e501b816db78f70064e26a01257910) Signed-off-by: Shreeya Patel Signed-off-by: Jonathan Maple --- drivers/infiniband/hw/mana/main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index d13abc954d2aa..148d74a8c3a87 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -511,13 +511,13 @@ int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) PAGE_SHIFT; prot = pgprot_writecombine(vma->vm_page_prot); - ret = rdma_user_mmap_io(ibcontext, vma, pfn, gc->db_page_size, prot, + ret = rdma_user_mmap_io(ibcontext, vma, pfn, PAGE_SIZE, prot, NULL); if (ret) ibdev_dbg(ibdev, "can't rdma_user_mmap_io ret %d\n", ret); else - ibdev_dbg(ibdev, "mapped I/O pfn 0x%llx page_size %u, ret %d\n", - pfn, gc->db_page_size, ret); + ibdev_dbg(ibdev, "mapped I/O pfn 0x%llx page_size %lu, ret %d\n", + pfn, PAGE_SIZE, ret); return ret; } From b4790268d6a9c342aad5b4a353262667deaf8948 Mon Sep 17 00:00:00 2001 From: Shreeya Patel Date: Tue, 19 Aug 2025 10:28:47 +0000 Subject: [PATCH 5/5] RDMA/mana_ib: use the correct page table index based on hardware page size jira LE-3813 commit-author Long Li commit 9e517a8e9d9a303bf9bde35e5c5374795544c152 MANA hardware uses 4k page size. When calculating the page table index, it should use the hardware page size, not the system page size. Cc: stable@vger.kernel.org Fixes: 0266a177631d ("RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter") Signed-off-by: Long Li Link: https://patch.msgid.link/1725030993-16213-1-git-send-email-longli@linuxonhyperv.com Signed-off-by: Leon Romanovsky (cherry picked from commit 9e517a8e9d9a303bf9bde35e5c5374795544c152) Signed-off-by: Shreeya Patel Signed-off-by: Jonathan Maple --- drivers/infiniband/hw/mana/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index 148d74a8c3a87..67c2d43135a8a 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -383,7 +383,7 @@ static int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem create_req->length = umem->length; create_req->offset_in_page = ib_umem_dma_offset(umem, page_sz); - create_req->gdma_page_type = order_base_2(page_sz) - PAGE_SHIFT; + create_req->gdma_page_type = order_base_2(page_sz) - MANA_PAGE_SHIFT; create_req->page_count = num_pages_total; ibdev_dbg(&dev->ib_dev, "size_dma_region %lu num_pages_total %lu\n",