1. 17 Dec, 2018 1 commit
    • Piotr Stankiewicz's avatar
      IB/hfi1: Fix an out-of-bounds access in get_hw_stats · 01a16601
      Piotr Stankiewicz authored
      commit 36d84219 upstream.
      
      When running with KASAN, the following trace is produced:
      
      [   62.535888]
      
      ==================================================================
      [   62.544930] BUG: KASAN: slab-out-of-bounds in
      gut_hw_stats+0x122/0x230 [hfi1]
      [   62.553856] Write of size 8 at addr ffff88080e8d6330 by task
      kworker/0:1/14
      
      [   62.565333] CPU: 0 PID: 14 Comm: kworker/0:1 Not tainted
      4.19.0-test-build-kasan+ #8
      [   62.575087] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS
      SE5C610.86B.01.01.0019.101220160604 10/12/2016
      [   62.587951] Workqueue: events work_for_cpu_fn
      [   62.594050] Call Trace:
      [   62.598023]  dump_stack+0xc6/0x14c
      [   62.603089]  ? dump_stack_print_info.cold.1+0x2f/0x2f
      [   62.610041]  ? kmsg_dump_rewind_nolock+0x59/0x59
      [   62.616615]  ? get_hw_stats+0x122/0x230 [hfi1]
      [   62.622985]  print_address_description+0x6c/0x23c
      [   62.629744]  ? get_hw_stats+0x122/0x230 [hfi1]
      [   62.636108]  kasan_report.cold.6+0x241/0x308
      [   62.642365]  get_hw_stats+0x122/0x230 [hfi1]
      [   62.648703]  ? hfi1_alloc_rn+0x40/0x40 [hfi1]
      [   62.655088]  ? __kmalloc+0x110/0x240
      [   62.660695]  ? hfi1_alloc_rn+0x40/0x40 [hfi1]
      [   62.667142]  setup_hw_stats+0xd8/0x430 [ib_core]
      [   62.673972]  ? show_hfi+0x50/0x50 [hfi1]
      [   62.680026]  ib_device_register_sysfs+0x165/0x180 [ib_core]
      [   62.687995]  ib_register_device+0x5a2/0xa10 [ib_core]
      [   62.695340]  ? show_hfi+0x50/0x50 [hfi1]
      [   62.701421]  ? ib_unregister_device+0x2e0/0x2e0 [ib_core]
      [   62.709222]  ? __vmalloc_node_range+0x2d0/0x380
      [   62.716131]  ? rvt_driver_mr_init+0x11f/0x2d0 [rdmavt]
      [   62.723735]  ? vmalloc_node+0x5c/0x70
      [   62.729697]  ? rvt_driver_mr_init+0x11f/0x2d0 [rdmavt]
      [   62.737347]  ? rvt_driver_mr_init+0x1f5/0x2d0 [rdmavt]
      [   62.744998]  ? __rvt_alloc_mr+0x110/0x110 [rdmavt]
      [   62.752315]  ? rvt_rc_error+0x140/0x140 [rdmavt]
      [   62.759434]  ? rvt_vma_open+0x30/0x30 [rdmavt]
      [   62.766364]  ? mutex_unlock+0x1d/0x40
      [   62.772445]  ? kmem_cache_create_usercopy+0x15d/0x230
      [   62.780115]  rvt_register_device+0x1f6/0x360 [rdmavt]
      [   62.787823]  ? rvt_get_port_immutable+0x180/0x180 [rdmavt]
      [   62.796058]  ? __get_txreq+0x400/0x400 [hfi1]
      [   62.802969]  ? memcpy+0x34/0x50
      [   62.808611]  hfi1_register_ib_device+0xde6/0xeb0 [hfi1]
      [   62.816601]  ? hfi1_get_npkeys+0x10/0x10 [hfi1]
      [   62.823760]  ? hfi1_init+0x89f/0x9a0 [hfi1]
      [   62.830469]  ? hfi1_setup_eagerbufs+0xad0/0xad0 [hfi1]
      [   62.838204]  ? pcie_capability_clear_and_set_word+0xcd/0xe0
      [   62.846429]  ? pcie_capability_read_word+0xd0/0xd0
      [   62.853791]  ? hfi1_pcie_init+0x187/0x4b0 [hfi1]
      [   62.860958]  init_one+0x67f/0xae0 [hfi1]
      [   62.867301]  ? hfi1_init+0x9a0/0x9a0 [hfi1]
      [   62.873876]  ? wait_woken+0x130/0x130
      [   62.879860]  ? read_word_at_a_time+0xe/0x20
      [   62.886329]  ? strscpy+0x14b/0x280
      [   62.891998]  ? hfi1_init+0x9a0/0x9a0 [hfi1]
      [   62.898405]  local_pci_probe+0x70/0xd0
      [   62.904295]  ? pci_device_shutdown+0x90/0x90
      [   62.910833]  work_for_cpu_fn+0x29/0x40
      [   62.916750]  process_one_work+0x584/0x960
      [   62.922974]  ? rcu_work_rcufn+0x40/0x40
      [   62.928991]  ? __schedule+0x396/0xdc0
      [   62.934806]  ? __sched_text_start+0x8/0x8
      [   62.941020]  ? pick_next_task_fair+0x68b/0xc60
      [   62.947674]  ? run_rebalance_domains+0x260/0x260
      [   62.954471]  ? __list_add_valid+0x29/0xa0
      [   62.960607]  ? move_linked_works+0x1c7/0x230
      [   62.967077]  ?
      trace_event_raw_event_workqueue_execute_start+0x140/0x140
      [   62.976248]  ? mutex_lock+0xa6/0x100
      [   62.982029]  ? __mutex_lock_slowpath+0x10/0x10
      [   62.988795]  ? __switch_to+0x37a/0x710
      [   62.994731]  worker_thread+0x62e/0x9d0
      [   63.000602]  ? max_active_store+0xf0/0xf0
      [   63.006828]  ? __switch_to_asm+0x40/0x70
      [   63.012932]  ? __switch_to_asm+0x34/0x70
      [   63.019013]  ? __switch_to_asm+0x40/0x70
      [   63.025042]  ? __switch_to_asm+0x34/0x70
      [   63.031030]  ? __switch_to_asm+0x40/0x70
      [   63.037006]  ? __schedule+0x396/0xdc0
      [   63.042660]  ? kmem_cache_alloc_trace+0xf3/0x1f0
      [   63.049323]  ? kthread+0x59/0x1d0
      [   63.054594]  ? ret_from_fork+0x35/0x40
      [   63.060257]  ? __sched_text_start+0x8/0x8
      [   63.066212]  ? schedule+0xcf/0x250
      [   63.071529]  ? __wake_up_common+0x110/0x350
      [   63.077794]  ? __schedule+0xdc0/0xdc0
      [   63.083348]  ? wait_woken+0x130/0x130
      [   63.088963]  ? finish_task_switch+0x1f1/0x520
      [   63.095258]  ? kasan_unpoison_shadow+0x30/0x40
      [   63.101792]  ? __init_waitqueue_head+0xa0/0xd0
      [   63.108183]  ? replenish_dl_entity.cold.60+0x18/0x18
      [   63.115151]  ? _raw_spin_lock_irqsave+0x25/0x50
      [   63.121754]  ? max_active_store+0xf0/0xf0
      [   63.127753]  kthread+0x1ae/0x1d0
      [   63.132894]  ? kthread_bind+0x30/0x30
      [   63.138422]  ret_from_fork+0x35/0x40
      
      [   63.146973] Allocated by task 14:
      [   63.152077]  kasan_kmalloc+0xbf/0xe0
      [   63.157471]  __kmalloc+0x110/0x240
      [   63.162804]  init_cntrs+0x34d/0xdf0 [hfi1]
      [   63.168883]  hfi1_init_dd+0x29a3/0x2f90 [hfi1]
      [   63.175244]  init_one+0x551/0xae0 [hfi1]
      [   63.181065]  local_pci_probe+0x70/0xd0
      [   63.186759]  work_for_cpu_fn+0x29/0x40
      [   63.192310]  process_one_work+0x584/0x960
      [   63.198163]  worker_thread+0x62e/0x9d0
      [   63.203843]  kthread+0x1ae/0x1d0
      [   63.208874]  ret_from_fork+0x35/0x40
      
      [   63.217203] Freed by task 1:
      [   63.221844]  __kasan_slab_free+0x12e/0x180
      [   63.227844]  kfree+0x92/0x1a0
      [   63.232570]  single_release+0x3a/0x60
      [   63.238024]  __fput+0x1d9/0x480
      [   63.242911]  task_work_run+0x139/0x190
      [   63.248440]  exit_to_usermode_loop+0x191/0x1a0
      [   63.254814]  do_syscall_64+0x301/0x330
      [   63.260283]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
      
      [   63.270199] The buggy address belongs to the object at
      ffff88080e8d5500
       which belongs to the cache kmalloc-4096 of size 4096
      [   63.287247] The buggy address is located 3632 bytes inside of
       4096-byte region [ffff88080e8d5500, ffff88080e8d6500)
      [   63.303564] The buggy address belongs to the page:
      [   63.310447] page:ffffea00203a3400 count:1 mapcount:0
      mapping:ffff88081380e840 index:0x0 compound_mapcount: 0
      [   63.323102] flags: 0x2fffff80008100(slab|head)
      [   63.329775] raw: 002fffff80008100 0000000000000000 0000000100000001
      ffff88081380e840
      [   63.340175] raw: 0000000000000000 0000000000070007 00000001ffffffff
      0000000000000000
      [   63.350564] page dumped because: kasan: bad access detected
      
      [   63.361974] Memory state around the buggy address:
      [   63.369137]  ffff88080e8d6200: 00 00 00 00 00 00 00 00 00 00 00 00 00
      00 00 00
      [   63.379082]  ffff88080e8d6280: 00 00 00 00 00 00 00 00 00 00 00 00 00
      00 00 00
      [   63.389032] >ffff88080e8d6300: 00 00 00 00 00 00 fc fc fc fc fc fc fc
      fc fc fc
      [   63.398944]                                      ^
      [   63.406141]  ffff88080e8d6380: fc fc fc fc fc fc fc fc fc fc fc fc fc
      fc fc fc
      [   63.416109]  ffff88080e8d6400: fc fc fc fc fc fc fc fc fc fc fc fc fc
      fc fc fc
      [   63.426099]
      ==================================================================
      
      The trace happens because get_hw_stats() assumes there is room in the
      memory allocated in init_cntrs() to accommodate the driver counters.
      Unfortunately, that routine only allocated space for the device
      counters.
      
      Fix by insuring the allocation has room for the additional driver
      counters.
      
      Cc: <Stable@vger.kernel.org> # v4.14+
      Fixes: b7481944
      
       ("IB/hfi1: Show statistics counters under IB stats interface")
      Reviewed-by: default avatarMike Marciniczyn <mike.marciniszyn@intel.com>
      Reviewed-by: default avatarMike Ruhl <michael.j.ruhl@intel.com>
      Signed-off-by: default avatarPiotr Stankiewicz <piotr.stankiewicz@intel.com>
      Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
      Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      01a16601
  2. 20 Oct, 2018 1 commit
    • Michael J. Ruhl's avatar
      IB/hfi1: Fix destroy_qp hang after a link down · 6edd85a7
      Michael J. Ruhl authored
      commit b4a4957d upstream.
      
      rvt_destroy_qp() cannot complete until all in process packets have
      been released from the underlying hardware.  If a link down event
      occurs, an application can hang with a kernel stack similar to:
      
      cat /proc/<app PID>/stack
       quiesce_qp+0x178/0x250 [hfi1]
       rvt_reset_qp+0x23d/0x400 [rdmavt]
       rvt_destroy_qp+0x69/0x210 [rdmavt]
       ib_destroy_qp+0xba/0x1c0 [ib_core]
       nvme_rdma_destroy_queue_ib+0x46/0x80 [nvme_rdma]
       nvme_rdma_free_queue+0x3c/0xd0 [nvme_rdma]
       nvme_rdma_destroy_io_queues+0x88/0xd0 [nvme_rdma]
       nvme_rdma_error_recovery_work+0x52/0xf0 [nvme_rdma]
       process_one_work+0x17a/0x440
       worker_thread+0x126/0x3c0
       kthread+0xcf/0xe0
       ret_from_fork+0x58/0x90
       0xffffffffffffffff
      
      quiesce_qp() waits until all outstanding packets have been freed.
      This wait should be momentary.  During a link down event, the cleanup
      handling does not ensure that all packets caught by the link down are
      flushed properly.
      
      This is caused by the fact that the freeze path and the link down
      event is handled the same.  This is not correct.  The freeze path
      waits until the HFI is unfrozen and then restarts PIO.  A link down
      is not a freeze event.  The link down path cannot restart the PIO
      until link is restored.  If the PIO path is restarted before the link
      comes up, the application (QP) using the PIO path will hang (until
      link is restored).
      
      Fix by separating the linkdown path from the freeze path and use the
      link down path for link down events.
      
      Close a race condition sc_disable() by acquiring both the progress
      and release locks.
      
      Close a race condition in sc_stop() by moving the setting of the flag
      bits under the alloc lock.
      
      Cc: <stable@vger.kernel.org> # 4.9.x+
      Fixes: 77241056
      
       ("IB/hfi1: add driver files")
      Reviewed-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
      Signed-off-by: default avatarMichael J. Ruhl <michael.j.ruhl@intel.com>
      Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
      Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      6edd85a7
  3. 03 Jul, 2018 1 commit
  4. 30 May, 2018 1 commit
    • Michael J. Ruhl's avatar
      IB/hfi1: Use after free race condition in send context error path · 7a5b3b91
      Michael J. Ruhl authored
      commit f9e76ca3
      
       upstream.
      
      A pio send egress error can occur when the PSM library attempts to
      to send a bad packet.  That issue is still being investigated.
      
      The pio error interrupt handler then attempts to progress the recovery
      of the errored pio send context.
      
      Code inspection reveals that the handling lacks the necessary locking
      if that recovery interleaves with a PSM close of the "context" object
      contains the pio send context.
      
      The lack of the locking can cause the recovery to access the already
      freed pio send context object and incorrectly deduce that the pio
      send context is actually a kernel pio send context as shown by the
      NULL deref stack below:
      
      [<ffffffff8143d78c>] _dev_info+0x6c/0x90
      [<ffffffffc0613230>] sc_restart+0x70/0x1f0 [hfi1]
      [<ffffffff816ab124>] ? __schedule+0x424/0x9b0
      [<ffffffffc06133c5>] sc_halted+0x15/0x20 [hfi1]
      [<ffffffff810aa3ba>] process_one_work+0x17a/0x440
      [<ffffffff810ab086>] worker_thread+0x126/0x3c0
      [<ffffffff810aaf60>] ? manage_workers.isra.24+0x2a0/0x2a0
      [<ffffffff810b252f>] kthread+0xcf/0xe0
      [<ffffffff810b2460>] ? insert_kthread_work+0x40/0x40
      [<ffffffff816b8798>] ret_from_fork+0x58/0x90
      [<ffffffff810b2460>] ? insert_kthread_work+0x40/0x40
      
      This is the best case scenario and other scenarios can corrupt the
      already freed memory.
      
      Fix by adding the necessary locking in the pio send context error
      handler.
      
      Cc: <stable@vger.kernel.org> # 4.9.x
      Reviewed-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
      Reviewed-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
      Signed-off-by: default avatarMichael J. Ruhl <michael.j.ruhl@intel.com>
      Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
      Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      7a5b3b91
  5. 26 Apr, 2018 1 commit
  6. 20 Dec, 2017 1 commit
  7. 30 Nov, 2017 1 commit
  8. 27 Sep, 2017 3 commits
  9. 28 Aug, 2017 1 commit
  10. 22 Aug, 2017 7 commits
  11. 18 Aug, 2017 1 commit
  12. 31 Jul, 2017 13 commits
  13. 17 Jul, 2017 1 commit
  14. 27 Jun, 2017 7 commits